1 //======- CFLGraph.h - Abstract stratified sets implementation. --------======//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
11 /// alias analysis.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_ANALYSIS_CFLGRAPH_H
16 #define LLVM_ANALYSIS_CFLGRAPH_H
17 
18 #include "AliasAnalysisSummary.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Analysis/MemoryBuiltins.h"
21 #include "llvm/IR/InstVisitor.h"
22 #include "llvm/IR/Instructions.h"
23 
24 namespace llvm {
25 namespace cflaa {
26 
27 /// \brief The Program Expression Graph (PEG) of CFL analysis
28 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
29 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
30 /// the main purpose of this graph is to abstract away unrelated facts and
31 /// translate the rest into a form that can be easily digested by CFL analyses.
32 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
33 /// pointer assignment between InstantiatedValue. Pointer
34 /// references/dereferences are not explicitly stored in the graph: we
35 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
36 /// I+1) and a reference edge to (X, I-1).
37 class CFLGraph {
38 public:
39   typedef InstantiatedValue Node;
40 
41   struct Edge {
42     Node Other;
43   };
44 
45   typedef std::vector<Edge> EdgeList;
46 
47   struct NodeInfo {
48     EdgeList Edges;
49     AliasAttrs Attr;
50   };
51 
52   class ValueInfo {
53     std::vector<NodeInfo> Levels;
54 
55   public:
addNodeToLevel(unsigned Level)56     bool addNodeToLevel(unsigned Level) {
57       auto NumLevels = Levels.size();
58       if (NumLevels > Level)
59         return false;
60       Levels.resize(Level + 1);
61       return true;
62     }
63 
getNodeInfoAtLevel(unsigned Level)64     NodeInfo &getNodeInfoAtLevel(unsigned Level) {
65       assert(Level < Levels.size());
66       return Levels[Level];
67     }
getNodeInfoAtLevel(unsigned Level)68     const NodeInfo &getNodeInfoAtLevel(unsigned Level) const {
69       assert(Level < Levels.size());
70       return Levels[Level];
71     }
72 
getNumLevels()73     unsigned getNumLevels() const { return Levels.size(); }
74   };
75 
76 private:
77   typedef DenseMap<Value *, ValueInfo> ValueMap;
78   ValueMap ValueImpls;
79 
getNode(Node N)80   const NodeInfo *getNode(Node N) const {
81     auto Itr = ValueImpls.find(N.Val);
82     if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
83       return nullptr;
84     return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
85   }
getNode(Node N)86   NodeInfo *getNode(Node N) {
87     auto Itr = ValueImpls.find(N.Val);
88     if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
89       return nullptr;
90     return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
91   }
92 
93 public:
94   typedef ValueMap::const_iterator const_value_iterator;
95 
96   bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
97     assert(N.Val != nullptr);
98     auto &ValInfo = ValueImpls[N.Val];
99     auto Changed = ValInfo.addNodeToLevel(N.DerefLevel);
100     ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr;
101     return Changed;
102   }
103 
addAttr(Node N,AliasAttrs Attr)104   void addAttr(Node N, AliasAttrs Attr) {
105     auto *Info = getNode(N);
106     assert(Info != nullptr);
107     Info->Attr |= Attr;
108   }
109 
110   void addEdge(Node From, Node To, int64_t Offset = 0) {
111     assert(getNode(To) != nullptr);
112 
113     auto *FromInfo = getNode(From);
114     assert(FromInfo != nullptr);
115     FromInfo->Edges.push_back(Edge{To});
116   }
117 
attrFor(Node N)118   AliasAttrs attrFor(Node N) const {
119     auto *Info = getNode(N);
120     assert(Info != nullptr);
121     return Info->Attr;
122   }
123 
value_mappings()124   iterator_range<const_value_iterator> value_mappings() const {
125     return make_range<const_value_iterator>(ValueImpls.begin(),
126                                             ValueImpls.end());
127   }
128 };
129 
130 ///\brief A builder class used to create CFLGraph instance from a given function
131 /// The CFL-AA that uses this builder must provide its own type as a template
132 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
133 /// needs a way of obtaining the summary of other functions when callinsts are
134 /// encountered.
135 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
136 /// member function that takes a Function& and returns the corresponding summary
137 /// as a const AliasSummary*.
138 template <typename CFLAA> class CFLGraphBuilder {
139   // Input of the builder
140   CFLAA &Analysis;
141   const TargetLibraryInfo &TLI;
142 
143   // Output of the builder
144   CFLGraph Graph;
145   SmallVector<Value *, 4> ReturnedValues;
146 
147   // Helper class
148   /// Gets the edges our graph should have, based on an Instruction*
149   class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
150     CFLAA &AA;
151     const TargetLibraryInfo &TLI;
152 
153     CFLGraph &Graph;
154     SmallVectorImpl<Value *> &ReturnValues;
155 
hasUsefulEdges(ConstantExpr * CE)156     static bool hasUsefulEdges(ConstantExpr *CE) {
157       // ConstantExpr doesn't have terminators, invokes, or fences, so only
158       // needs
159       // to check for compares.
160       return CE->getOpcode() != Instruction::ICmp &&
161              CE->getOpcode() != Instruction::FCmp;
162     }
163 
164     // Returns possible functions called by CS into the given SmallVectorImpl.
165     // Returns true if targets found, false otherwise.
getPossibleTargets(CallSite CS,SmallVectorImpl<Function * > & Output)166     static bool getPossibleTargets(CallSite CS,
167                                    SmallVectorImpl<Function *> &Output) {
168       if (auto *Fn = CS.getCalledFunction()) {
169         Output.push_back(Fn);
170         return true;
171       }
172 
173       // TODO: If the call is indirect, we might be able to enumerate all
174       // potential
175       // targets of the call and return them, rather than just failing.
176       return false;
177     }
178 
179     void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) {
180       assert(Val != nullptr && Val->getType()->isPointerTy());
181       if (auto GVal = dyn_cast<GlobalValue>(Val)) {
182         if (Graph.addNode(InstantiatedValue{GVal, 0},
183                           getGlobalOrArgAttrFromValue(*GVal)))
184           Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown());
185       } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) {
186         if (hasUsefulEdges(CExpr)) {
187           if (Graph.addNode(InstantiatedValue{CExpr, 0}))
188             visitConstantExpr(CExpr);
189         }
190       } else
191         Graph.addNode(InstantiatedValue{Val, 0}, Attr);
192     }
193 
194     void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) {
195       assert(From != nullptr && To != nullptr);
196       if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
197         return;
198       addNode(From);
199       if (To != From) {
200         addNode(To);
201         Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0},
202                       Offset);
203       }
204     }
205 
addDerefEdge(Value * From,Value * To)206     void addDerefEdge(Value *From, Value *To) {
207       assert(From != nullptr && To != nullptr);
208       if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
209         return;
210       addNode(From);
211       addNode(To);
212       Graph.addNode(InstantiatedValue{From, 1});
213       Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0});
214     }
215 
216   public:
GetEdgesVisitor(CFLGraphBuilder & Builder)217     GetEdgesVisitor(CFLGraphBuilder &Builder)
218         : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
219           ReturnValues(Builder.ReturnedValues) {}
220 
visitInstruction(Instruction &)221     void visitInstruction(Instruction &) {
222       llvm_unreachable("Unsupported instruction encountered");
223     }
224 
visitReturnInst(ReturnInst & Inst)225     void visitReturnInst(ReturnInst &Inst) {
226       if (auto RetVal = Inst.getReturnValue()) {
227         if (RetVal->getType()->isPointerTy()) {
228           addNode(RetVal);
229           ReturnValues.push_back(RetVal);
230         }
231       }
232     }
233 
visitPtrToIntInst(PtrToIntInst & Inst)234     void visitPtrToIntInst(PtrToIntInst &Inst) {
235       auto *Ptr = Inst.getOperand(0);
236       addNode(Ptr, getAttrEscaped());
237     }
238 
visitIntToPtrInst(IntToPtrInst & Inst)239     void visitIntToPtrInst(IntToPtrInst &Inst) {
240       auto *Ptr = &Inst;
241       addNode(Ptr, getAttrUnknown());
242     }
243 
visitCastInst(CastInst & Inst)244     void visitCastInst(CastInst &Inst) {
245       auto *Src = Inst.getOperand(0);
246       addAssignEdge(Src, &Inst);
247     }
248 
visitBinaryOperator(BinaryOperator & Inst)249     void visitBinaryOperator(BinaryOperator &Inst) {
250       auto *Op1 = Inst.getOperand(0);
251       auto *Op2 = Inst.getOperand(1);
252       addAssignEdge(Op1, &Inst);
253       addAssignEdge(Op2, &Inst);
254     }
255 
visitAtomicCmpXchgInst(AtomicCmpXchgInst & Inst)256     void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
257       auto *Ptr = Inst.getPointerOperand();
258       auto *Val = Inst.getNewValOperand();
259       addDerefEdge(Ptr, Val);
260     }
261 
visitAtomicRMWInst(AtomicRMWInst & Inst)262     void visitAtomicRMWInst(AtomicRMWInst &Inst) {
263       auto *Ptr = Inst.getPointerOperand();
264       auto *Val = Inst.getValOperand();
265       addDerefEdge(Ptr, Val);
266     }
267 
visitPHINode(PHINode & Inst)268     void visitPHINode(PHINode &Inst) {
269       for (Value *Val : Inst.incoming_values())
270         addAssignEdge(Val, &Inst);
271     }
272 
visitGetElementPtrInst(GetElementPtrInst & Inst)273     void visitGetElementPtrInst(GetElementPtrInst &Inst) {
274       auto *Op = Inst.getPointerOperand();
275       addAssignEdge(Op, &Inst);
276     }
277 
visitSelectInst(SelectInst & Inst)278     void visitSelectInst(SelectInst &Inst) {
279       // Condition is not processed here (The actual statement producing
280       // the condition result is processed elsewhere). For select, the
281       // condition is evaluated, but not loaded, stored, or assigned
282       // simply as a result of being the condition of a select.
283 
284       auto *TrueVal = Inst.getTrueValue();
285       auto *FalseVal = Inst.getFalseValue();
286       addAssignEdge(TrueVal, &Inst);
287       addAssignEdge(FalseVal, &Inst);
288     }
289 
visitAllocaInst(AllocaInst & Inst)290     void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); }
291 
visitLoadInst(LoadInst & Inst)292     void visitLoadInst(LoadInst &Inst) {
293       auto *Ptr = Inst.getPointerOperand();
294       auto *Val = &Inst;
295       addDerefEdge(Ptr, Val);
296     }
297 
visitStoreInst(StoreInst & Inst)298     void visitStoreInst(StoreInst &Inst) {
299       auto *Ptr = Inst.getPointerOperand();
300       auto *Val = Inst.getValueOperand();
301       addDerefEdge(Ptr, Val);
302     }
303 
visitVAArgInst(VAArgInst & Inst)304     void visitVAArgInst(VAArgInst &Inst) {
305       // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
306       // does
307       // two things:
308       //  1. Loads a value from *((T*)*Ptr).
309       //  2. Increments (stores to) *Ptr by some target-specific amount.
310       // For now, we'll handle this like a landingpad instruction (by placing
311       // the
312       // result in its own group, and having that group alias externals).
313       addNode(&Inst, getAttrUnknown());
314     }
315 
isFunctionExternal(Function * Fn)316     static bool isFunctionExternal(Function *Fn) {
317       return !Fn->hasExactDefinition();
318     }
319 
tryInterproceduralAnalysis(CallSite CS,const SmallVectorImpl<Function * > & Fns)320     bool tryInterproceduralAnalysis(CallSite CS,
321                                     const SmallVectorImpl<Function *> &Fns) {
322       assert(Fns.size() > 0);
323 
324       if (CS.arg_size() > MaxSupportedArgsInSummary)
325         return false;
326 
327       // Exit early if we'll fail anyway
328       for (auto *Fn : Fns) {
329         if (isFunctionExternal(Fn) || Fn->isVarArg())
330           return false;
331         // Fail if the caller does not provide enough arguments
332         assert(Fn->arg_size() <= CS.arg_size());
333         if (!AA.getAliasSummary(*Fn))
334           return false;
335       }
336 
337       for (auto *Fn : Fns) {
338         auto Summary = AA.getAliasSummary(*Fn);
339         assert(Summary != nullptr);
340 
341         auto &RetParamRelations = Summary->RetParamRelations;
342         for (auto &Relation : RetParamRelations) {
343           auto IRelation = instantiateExternalRelation(Relation, CS);
344           if (IRelation.hasValue()) {
345             Graph.addNode(IRelation->From);
346             Graph.addNode(IRelation->To);
347             Graph.addEdge(IRelation->From, IRelation->To);
348           }
349         }
350 
351         auto &RetParamAttributes = Summary->RetParamAttributes;
352         for (auto &Attribute : RetParamAttributes) {
353           auto IAttr = instantiateExternalAttribute(Attribute, CS);
354           if (IAttr.hasValue())
355             Graph.addNode(IAttr->IValue, IAttr->Attr);
356         }
357       }
358 
359       return true;
360     }
361 
visitCallSite(CallSite CS)362     void visitCallSite(CallSite CS) {
363       auto Inst = CS.getInstruction();
364 
365       // Make sure all arguments and return value are added to the graph first
366       for (Value *V : CS.args())
367         if (V->getType()->isPointerTy())
368           addNode(V);
369       if (Inst->getType()->isPointerTy())
370         addNode(Inst);
371 
372       // Check if Inst is a call to a library function that
373       // allocates/deallocates
374       // on the heap. Those kinds of functions do not introduce any aliases.
375       // TODO: address other common library functions such as realloc(),
376       // strdup(),
377       // etc.
378       if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
379           isFreeCall(Inst, &TLI))
380         return;
381 
382       // TODO: Add support for noalias args/all the other fun function
383       // attributes
384       // that we can tack on.
385       SmallVector<Function *, 4> Targets;
386       if (getPossibleTargets(CS, Targets))
387         if (tryInterproceduralAnalysis(CS, Targets))
388           return;
389 
390       // Because the function is opaque, we need to note that anything
391       // could have happened to the arguments (unless the function is marked
392       // readonly or readnone), and that the result could alias just about
393       // anything, too (unless the result is marked noalias).
394       if (!CS.onlyReadsMemory())
395         for (Value *V : CS.args()) {
396           if (V->getType()->isPointerTy()) {
397             // The argument itself escapes.
398             Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
399             // The fate of argument memory is unknown. Note that since
400             // AliasAttrs is transitive with respect to dereference, we only
401             // need to specify it for the first-level memory.
402             Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown());
403           }
404         }
405 
406       if (Inst->getType()->isPointerTy()) {
407         auto *Fn = CS.getCalledFunction();
408         if (Fn == nullptr || !Fn->doesNotAlias(0))
409           // No need to call addNode() since we've added Inst at the
410           // beginning of this function and we know it is not a global.
411           Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
412       }
413     }
414 
415     /// Because vectors/aggregates are immutable and unaddressable, there's
416     /// nothing we can do to coax a value out of them, other than calling
417     /// Extract{Element,Value}. We can effectively treat them as pointers to
418     /// arbitrary memory locations we can store in and load from.
visitExtractElementInst(ExtractElementInst & Inst)419     void visitExtractElementInst(ExtractElementInst &Inst) {
420       auto *Ptr = Inst.getVectorOperand();
421       auto *Val = &Inst;
422       addDerefEdge(Ptr, Val);
423     }
424 
visitInsertElementInst(InsertElementInst & Inst)425     void visitInsertElementInst(InsertElementInst &Inst) {
426       auto *Vec = Inst.getOperand(0);
427       auto *Val = Inst.getOperand(1);
428       addAssignEdge(Vec, &Inst);
429       addDerefEdge(&Inst, Val);
430     }
431 
visitLandingPadInst(LandingPadInst & Inst)432     void visitLandingPadInst(LandingPadInst &Inst) {
433       // Exceptions come from "nowhere", from our analysis' perspective.
434       // So we place the instruction its own group, noting that said group may
435       // alias externals
436       addNode(&Inst, getAttrUnknown());
437     }
438 
visitInsertValueInst(InsertValueInst & Inst)439     void visitInsertValueInst(InsertValueInst &Inst) {
440       auto *Agg = Inst.getOperand(0);
441       auto *Val = Inst.getOperand(1);
442       addAssignEdge(Agg, &Inst);
443       addDerefEdge(&Inst, Val);
444     }
445 
visitExtractValueInst(ExtractValueInst & Inst)446     void visitExtractValueInst(ExtractValueInst &Inst) {
447       auto *Ptr = Inst.getAggregateOperand();
448       addDerefEdge(Ptr, &Inst);
449     }
450 
visitShuffleVectorInst(ShuffleVectorInst & Inst)451     void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
452       auto *From1 = Inst.getOperand(0);
453       auto *From2 = Inst.getOperand(1);
454       addAssignEdge(From1, &Inst);
455       addAssignEdge(From2, &Inst);
456     }
457 
visitConstantExpr(ConstantExpr * CE)458     void visitConstantExpr(ConstantExpr *CE) {
459       switch (CE->getOpcode()) {
460       default:
461         llvm_unreachable("Unknown instruction type encountered!");
462 // Build the switch statement using the Instruction.def file.
463 #define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
464   case Instruction::OPCODE:                                                    \
465     this->visit##OPCODE(*(CLASS *)CE);                                         \
466     break;
467 #include "llvm/IR/Instruction.def"
468       }
469     }
470   };
471 
472   // Helper functions
473 
474   // Determines whether or not we an instruction is useless to us (e.g.
475   // FenceInst)
hasUsefulEdges(Instruction * Inst)476   static bool hasUsefulEdges(Instruction *Inst) {
477     bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
478                                     !isa<InvokeInst>(Inst) &&
479                                     !isa<ReturnInst>(Inst);
480     return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
481            !IsNonInvokeRetTerminator;
482   }
483 
addArgumentToGraph(Argument & Arg)484   void addArgumentToGraph(Argument &Arg) {
485     if (Arg.getType()->isPointerTy()) {
486       Graph.addNode(InstantiatedValue{&Arg, 0},
487                     getGlobalOrArgAttrFromValue(Arg));
488       // Pointees of a formal parameter is known to the caller
489       Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller());
490     }
491   }
492 
493   // Given an Instruction, this will add it to the graph, along with any
494   // Instructions that are potentially only available from said Instruction
495   // For example, given the following line:
496   //   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
497   // addInstructionToGraph would add both the `load` and `getelementptr`
498   // instructions to the graph appropriately.
addInstructionToGraph(GetEdgesVisitor & Visitor,Instruction & Inst)499   void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) {
500     if (!hasUsefulEdges(&Inst))
501       return;
502 
503     Visitor.visit(Inst);
504   }
505 
506   // Builds the graph needed for constructing the StratifiedSets for the given
507   // function
buildGraphFrom(Function & Fn)508   void buildGraphFrom(Function &Fn) {
509     GetEdgesVisitor Visitor(*this);
510 
511     for (auto &Bb : Fn.getBasicBlockList())
512       for (auto &Inst : Bb.getInstList())
513         addInstructionToGraph(Visitor, Inst);
514 
515     for (auto &Arg : Fn.args())
516       addArgumentToGraph(Arg);
517   }
518 
519 public:
CFLGraphBuilder(CFLAA & Analysis,const TargetLibraryInfo & TLI,Function & Fn)520   CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
521       : Analysis(Analysis), TLI(TLI) {
522     buildGraphFrom(Fn);
523   }
524 
getCFLGraph()525   const CFLGraph &getCFLGraph() const { return Graph; }
getReturnValues()526   const SmallVector<Value *, 4> &getReturnValues() const {
527     return ReturnedValues;
528   }
529 };
530 }
531 }
532 
533 #endif
534