1 //===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_ADT_STRATIFIEDSETS_H
11 #define LLVM_ADT_STRATIFIEDSETS_H
12 
13 #include "AliasAnalysisSummary.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include <bitset>
19 #include <cassert>
20 #include <cmath>
21 #include <type_traits>
22 #include <utility>
23 #include <vector>
24 
25 namespace llvm {
26 namespace cflaa {
27 /// An index into Stratified Sets.
28 typedef unsigned StratifiedIndex;
29 /// NOTE: ^ This can't be a short -- bootstrapping clang has a case where
30 /// ~1M sets exist.
31 
32 // Container of information related to a value in a StratifiedSet.
33 struct StratifiedInfo {
34   StratifiedIndex Index;
35   /// For field sensitivity, etc. we can tack fields on here.
36 };
37 
38 /// A "link" between two StratifiedSets.
39 struct StratifiedLink {
40   /// This is a value used to signify "does not exist" where the
41   /// StratifiedIndex type is used.
42   ///
43   /// This is used instead of Optional<StratifiedIndex> because
44   /// Optional<StratifiedIndex> would eat up a considerable amount of extra
45   /// memory, after struct padding/alignment is taken into account.
46   static const StratifiedIndex SetSentinel;
47 
48   /// The index for the set "above" current
49   StratifiedIndex Above;
50 
51   /// The link for the set "below" current
52   StratifiedIndex Below;
53 
54   /// Attributes for these StratifiedSets.
55   AliasAttrs Attrs;
56 
StratifiedLinkStratifiedLink57   StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {}
58 
hasBelowStratifiedLink59   bool hasBelow() const { return Below != SetSentinel; }
hasAboveStratifiedLink60   bool hasAbove() const { return Above != SetSentinel; }
61 
clearBelowStratifiedLink62   void clearBelow() { Below = SetSentinel; }
clearAboveStratifiedLink63   void clearAbove() { Above = SetSentinel; }
64 };
65 
66 /// These are stratified sets, as described in "Fast algorithms for
67 /// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M
68 /// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets
69 /// of Value*s. If two Value*s are in the same set, or if both sets have
70 /// overlapping attributes, then the Value*s are said to alias.
71 ///
72 /// Sets may be related by position, meaning that one set may be considered as
73 /// above or below another. In CFL Alias Analysis, this gives us an indication
74 /// of how two variables are related; if the set of variable A is below a set
75 /// containing variable B, then at some point, a variable that has interacted
76 /// with B (or B itself) was either used in order to extract the variable A, or
77 /// was used as storage of variable A.
78 ///
79 /// Sets may also have attributes (as noted above). These attributes are
80 /// generally used for noting whether a variable in the set has interacted with
81 /// a variable whose origins we don't quite know (i.e. globals/arguments), or if
82 /// the variable may have had operations performed on it (modified in a function
83 /// call). All attributes that exist in a set A must exist in all sets marked as
84 /// below set A.
85 template <typename T> class StratifiedSets {
86 public:
87   StratifiedSets() = default;
88   StratifiedSets(StratifiedSets &&) = default;
89   StratifiedSets &operator=(StratifiedSets &&) = default;
90 
StratifiedSets(DenseMap<T,StratifiedInfo> Map,std::vector<StratifiedLink> Links)91   StratifiedSets(DenseMap<T, StratifiedInfo> Map,
92                  std::vector<StratifiedLink> Links)
93       : Values(std::move(Map)), Links(std::move(Links)) {}
94 
find(const T & Elem)95   Optional<StratifiedInfo> find(const T &Elem) const {
96     auto Iter = Values.find(Elem);
97     if (Iter == Values.end())
98       return None;
99     return Iter->second;
100   }
101 
getLink(StratifiedIndex Index)102   const StratifiedLink &getLink(StratifiedIndex Index) const {
103     assert(inbounds(Index));
104     return Links[Index];
105   }
106 
107 private:
108   DenseMap<T, StratifiedInfo> Values;
109   std::vector<StratifiedLink> Links;
110 
inbounds(StratifiedIndex Idx)111   bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); }
112 };
113 
114 /// Generic Builder class that produces StratifiedSets instances.
115 ///
116 /// The goal of this builder is to efficiently produce correct StratifiedSets
117 /// instances. To this end, we use a few tricks:
118 ///   > Set chains (A method for linking sets together)
119 ///   > Set remaps (A method for marking a set as an alias [irony?] of another)
120 ///
121 /// ==== Set chains ====
122 /// This builder has a notion of some value A being above, below, or with some
123 /// other value B:
124 ///   > The `A above B` relationship implies that there is a reference edge
125 ///   going from A to B. Namely, it notes that A can store anything in B's set.
126 ///   > The `A below B` relationship is the opposite of `A above B`. It implies
127 ///   that there's a dereference edge going from A to B.
128 ///   > The `A with B` relationship states that there's an assignment edge going
129 ///   from A to B, and that A and B should be treated as equals.
130 ///
131 /// As an example, take the following code snippet:
132 ///
133 /// %a = alloca i32, align 4
134 /// %ap = alloca i32*, align 8
135 /// %app = alloca i32**, align 8
136 /// store %a, %ap
137 /// store %ap, %app
138 /// %aw = getelementptr %ap, i32 0
139 ///
140 /// Given this, the following relations exist:
141 ///   - %a below %ap & %ap above %a
142 ///   - %ap below %app & %app above %ap
143 ///   - %aw with %ap & %ap with %aw
144 ///
145 /// These relations produce the following sets:
146 ///   [{%a}, {%ap, %aw}, {%app}]
147 ///
148 /// ...Which state that the only MayAlias relationship in the above program is
149 /// between %ap and %aw.
150 ///
151 /// Because LLVM allows arbitrary casts, code like the following needs to be
152 /// supported:
153 ///   %ip = alloca i64, align 8
154 ///   %ipp = alloca i64*, align 8
155 ///   %i = bitcast i64** ipp to i64
156 ///   store i64* %ip, i64** %ipp
157 ///   store i64 %i, i64* %ip
158 ///
159 /// Which, because %ipp ends up *both* above and below %ip, is fun.
160 ///
161 /// This is solved by merging %i and %ipp into a single set (...which is the
162 /// only way to solve this, since their bit patterns are equivalent). Any sets
163 /// that ended up in between %i and %ipp at the time of merging (in this case,
164 /// the set containing %ip) also get conservatively merged into the set of %i
165 /// and %ipp. In short, the resulting StratifiedSet from the above code would be
166 /// {%ip, %ipp, %i}.
167 ///
168 /// ==== Set remaps ====
169 /// More of an implementation detail than anything -- when merging sets, we need
170 /// to update the numbers of all of the elements mapped to those sets. Rather
171 /// than doing this at each merge, we note in the BuilderLink structure that a
172 /// remap has occurred, and use this information so we can defer renumbering set
173 /// elements until build time.
174 template <typename T> class StratifiedSetsBuilder {
175   /// Represents a Stratified Set, with information about the Stratified
176   /// Set above it, the set below it, and whether the current set has been
177   /// remapped to another.
178   struct BuilderLink {
179     const StratifiedIndex Number;
180 
BuilderLinkBuilderLink181     BuilderLink(StratifiedIndex N) : Number(N) {
182       Remap = StratifiedLink::SetSentinel;
183     }
184 
hasAboveBuilderLink185     bool hasAbove() const {
186       assert(!isRemapped());
187       return Link.hasAbove();
188     }
189 
hasBelowBuilderLink190     bool hasBelow() const {
191       assert(!isRemapped());
192       return Link.hasBelow();
193     }
194 
setBelowBuilderLink195     void setBelow(StratifiedIndex I) {
196       assert(!isRemapped());
197       Link.Below = I;
198     }
199 
setAboveBuilderLink200     void setAbove(StratifiedIndex I) {
201       assert(!isRemapped());
202       Link.Above = I;
203     }
204 
clearBelowBuilderLink205     void clearBelow() {
206       assert(!isRemapped());
207       Link.clearBelow();
208     }
209 
clearAboveBuilderLink210     void clearAbove() {
211       assert(!isRemapped());
212       Link.clearAbove();
213     }
214 
getBelowBuilderLink215     StratifiedIndex getBelow() const {
216       assert(!isRemapped());
217       assert(hasBelow());
218       return Link.Below;
219     }
220 
getAboveBuilderLink221     StratifiedIndex getAbove() const {
222       assert(!isRemapped());
223       assert(hasAbove());
224       return Link.Above;
225     }
226 
getAttrsBuilderLink227     AliasAttrs getAttrs() {
228       assert(!isRemapped());
229       return Link.Attrs;
230     }
231 
setAttrsBuilderLink232     void setAttrs(AliasAttrs Other) {
233       assert(!isRemapped());
234       Link.Attrs |= Other;
235     }
236 
isRemappedBuilderLink237     bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; }
238 
239     /// For initial remapping to another set
remapToBuilderLink240     void remapTo(StratifiedIndex Other) {
241       assert(!isRemapped());
242       Remap = Other;
243     }
244 
getRemapIndexBuilderLink245     StratifiedIndex getRemapIndex() const {
246       assert(isRemapped());
247       return Remap;
248     }
249 
250     /// Should only be called when we're already remapped.
updateRemapBuilderLink251     void updateRemap(StratifiedIndex Other) {
252       assert(isRemapped());
253       Remap = Other;
254     }
255 
256     /// Prefer the above functions to calling things directly on what's returned
257     /// from this -- they guard against unexpected calls when the current
258     /// BuilderLink is remapped.
getLinkBuilderLink259     const StratifiedLink &getLink() const { return Link; }
260 
261   private:
262     StratifiedLink Link;
263     StratifiedIndex Remap;
264   };
265 
266   /// This function performs all of the set unioning/value renumbering
267   /// that we've been putting off, and generates a vector<StratifiedLink> that
268   /// may be placed in a StratifiedSets instance.
finalizeSets(std::vector<StratifiedLink> & StratLinks)269   void finalizeSets(std::vector<StratifiedLink> &StratLinks) {
270     DenseMap<StratifiedIndex, StratifiedIndex> Remaps;
271     for (auto &Link : Links) {
272       if (Link.isRemapped())
273         continue;
274 
275       StratifiedIndex Number = StratLinks.size();
276       Remaps.insert(std::make_pair(Link.Number, Number));
277       StratLinks.push_back(Link.getLink());
278     }
279 
280     for (auto &Link : StratLinks) {
281       if (Link.hasAbove()) {
282         auto &Above = linksAt(Link.Above);
283         auto Iter = Remaps.find(Above.Number);
284         assert(Iter != Remaps.end());
285         Link.Above = Iter->second;
286       }
287 
288       if (Link.hasBelow()) {
289         auto &Below = linksAt(Link.Below);
290         auto Iter = Remaps.find(Below.Number);
291         assert(Iter != Remaps.end());
292         Link.Below = Iter->second;
293       }
294     }
295 
296     for (auto &Pair : Values) {
297       auto &Info = Pair.second;
298       auto &Link = linksAt(Info.Index);
299       auto Iter = Remaps.find(Link.Number);
300       assert(Iter != Remaps.end());
301       Info.Index = Iter->second;
302     }
303   }
304 
305   /// There's a guarantee in StratifiedLink where all bits set in a
306   /// Link.externals will be set in all Link.externals "below" it.
propagateAttrs(std::vector<StratifiedLink> & Links)307   static void propagateAttrs(std::vector<StratifiedLink> &Links) {
308     const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) {
309       const auto *Link = &Links[Idx];
310       while (Link->hasAbove()) {
311         Idx = Link->Above;
312         Link = &Links[Idx];
313       }
314       return Idx;
315     };
316 
317     SmallSet<StratifiedIndex, 16> Visited;
318     for (unsigned I = 0, E = Links.size(); I < E; ++I) {
319       auto CurrentIndex = getHighestParentAbove(I);
320       if (!Visited.insert(CurrentIndex).second)
321         continue;
322 
323       while (Links[CurrentIndex].hasBelow()) {
324         auto &CurrentBits = Links[CurrentIndex].Attrs;
325         auto NextIndex = Links[CurrentIndex].Below;
326         auto &NextBits = Links[NextIndex].Attrs;
327         NextBits |= CurrentBits;
328         CurrentIndex = NextIndex;
329       }
330     }
331   }
332 
333 public:
334   /// Builds a StratifiedSet from the information we've been given since either
335   /// construction or the prior build() call.
build()336   StratifiedSets<T> build() {
337     std::vector<StratifiedLink> StratLinks;
338     finalizeSets(StratLinks);
339     propagateAttrs(StratLinks);
340     Links.clear();
341     return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
342   }
343 
has(const T & Elem)344   bool has(const T &Elem) const { return get(Elem).hasValue(); }
345 
add(const T & Main)346   bool add(const T &Main) {
347     if (get(Main).hasValue())
348       return false;
349 
350     auto NewIndex = getNewUnlinkedIndex();
351     return addAtMerging(Main, NewIndex);
352   }
353 
354   /// Restructures the stratified sets as necessary to make "ToAdd" in a
355   /// set above "Main". There are some cases where this is not possible (see
356   /// above), so we merge them such that ToAdd and Main are in the same set.
addAbove(const T & Main,const T & ToAdd)357   bool addAbove(const T &Main, const T &ToAdd) {
358     assert(has(Main));
359     auto Index = *indexOf(Main);
360     if (!linksAt(Index).hasAbove())
361       addLinkAbove(Index);
362 
363     auto Above = linksAt(Index).getAbove();
364     return addAtMerging(ToAdd, Above);
365   }
366 
367   /// Restructures the stratified sets as necessary to make "ToAdd" in a
368   /// set below "Main". There are some cases where this is not possible (see
369   /// above), so we merge them such that ToAdd and Main are in the same set.
addBelow(const T & Main,const T & ToAdd)370   bool addBelow(const T &Main, const T &ToAdd) {
371     assert(has(Main));
372     auto Index = *indexOf(Main);
373     if (!linksAt(Index).hasBelow())
374       addLinkBelow(Index);
375 
376     auto Below = linksAt(Index).getBelow();
377     return addAtMerging(ToAdd, Below);
378   }
379 
addWith(const T & Main,const T & ToAdd)380   bool addWith(const T &Main, const T &ToAdd) {
381     assert(has(Main));
382     auto MainIndex = *indexOf(Main);
383     return addAtMerging(ToAdd, MainIndex);
384   }
385 
noteAttributes(const T & Main,AliasAttrs NewAttrs)386   void noteAttributes(const T &Main, AliasAttrs NewAttrs) {
387     assert(has(Main));
388     auto *Info = *get(Main);
389     auto &Link = linksAt(Info->Index);
390     Link.setAttrs(NewAttrs);
391   }
392 
393 private:
394   DenseMap<T, StratifiedInfo> Values;
395   std::vector<BuilderLink> Links;
396 
397   /// Adds the given element at the given index, merging sets if necessary.
addAtMerging(const T & ToAdd,StratifiedIndex Index)398   bool addAtMerging(const T &ToAdd, StratifiedIndex Index) {
399     StratifiedInfo Info = {Index};
400     auto Pair = Values.insert(std::make_pair(ToAdd, Info));
401     if (Pair.second)
402       return true;
403 
404     auto &Iter = Pair.first;
405     auto &IterSet = linksAt(Iter->second.Index);
406     auto &ReqSet = linksAt(Index);
407 
408     // Failed to add where we wanted to. Merge the sets.
409     if (&IterSet != &ReqSet)
410       merge(IterSet.Number, ReqSet.Number);
411 
412     return false;
413   }
414 
415   /// Gets the BuilderLink at the given index, taking set remapping into
416   /// account.
linksAt(StratifiedIndex Index)417   BuilderLink &linksAt(StratifiedIndex Index) {
418     auto *Start = &Links[Index];
419     if (!Start->isRemapped())
420       return *Start;
421 
422     auto *Current = Start;
423     while (Current->isRemapped())
424       Current = &Links[Current->getRemapIndex()];
425 
426     auto NewRemap = Current->Number;
427 
428     // Run through everything that has yet to be updated, and update them to
429     // remap to NewRemap
430     Current = Start;
431     while (Current->isRemapped()) {
432       auto *Next = &Links[Current->getRemapIndex()];
433       Current->updateRemap(NewRemap);
434       Current = Next;
435     }
436 
437     return *Current;
438   }
439 
440   /// Merges two sets into one another. Assumes that these sets are not
441   /// already one in the same.
merge(StratifiedIndex Idx1,StratifiedIndex Idx2)442   void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) {
443     assert(inbounds(Idx1) && inbounds(Idx2));
444     assert(&linksAt(Idx1) != &linksAt(Idx2) &&
445            "Merging a set into itself is not allowed");
446 
447     // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge
448     // both the
449     // given sets, and all sets between them, into one.
450     if (tryMergeUpwards(Idx1, Idx2))
451       return;
452 
453     if (tryMergeUpwards(Idx2, Idx1))
454       return;
455 
456     // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`.
457     // We therefore need to merge the two chains together.
458     mergeDirect(Idx1, Idx2);
459   }
460 
461   /// Merges two sets assuming that the set at `Idx1` is unreachable from
462   /// traversing above or below the set at `Idx2`.
mergeDirect(StratifiedIndex Idx1,StratifiedIndex Idx2)463   void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) {
464     assert(inbounds(Idx1) && inbounds(Idx2));
465 
466     auto *LinksInto = &linksAt(Idx1);
467     auto *LinksFrom = &linksAt(Idx2);
468     // Merging everything above LinksInto then proceeding to merge everything
469     // below LinksInto becomes problematic, so we go as far "up" as possible!
470     while (LinksInto->hasAbove() && LinksFrom->hasAbove()) {
471       LinksInto = &linksAt(LinksInto->getAbove());
472       LinksFrom = &linksAt(LinksFrom->getAbove());
473     }
474 
475     if (LinksFrom->hasAbove()) {
476       LinksInto->setAbove(LinksFrom->getAbove());
477       auto &NewAbove = linksAt(LinksInto->getAbove());
478       NewAbove.setBelow(LinksInto->Number);
479     }
480 
481     // Merging strategy:
482     //  > If neither has links below, stop.
483     //  > If only `LinksInto` has links below, stop.
484     //  > If only `LinksFrom` has links below, reset `LinksInto.Below` to
485     //  match `LinksFrom.Below`
486     //  > If both have links above, deal with those next.
487     while (LinksInto->hasBelow() && LinksFrom->hasBelow()) {
488       auto FromAttrs = LinksFrom->getAttrs();
489       LinksInto->setAttrs(FromAttrs);
490 
491       // Remap needs to happen after getBelow(), but before
492       // assignment of LinksFrom
493       auto *NewLinksFrom = &linksAt(LinksFrom->getBelow());
494       LinksFrom->remapTo(LinksInto->Number);
495       LinksFrom = NewLinksFrom;
496       LinksInto = &linksAt(LinksInto->getBelow());
497     }
498 
499     if (LinksFrom->hasBelow()) {
500       LinksInto->setBelow(LinksFrom->getBelow());
501       auto &NewBelow = linksAt(LinksInto->getBelow());
502       NewBelow.setAbove(LinksInto->Number);
503     }
504 
505     LinksInto->setAttrs(LinksFrom->getAttrs());
506     LinksFrom->remapTo(LinksInto->Number);
507   }
508 
509   /// Checks to see if lowerIndex is at a level lower than upperIndex. If so, it
510   /// will merge lowerIndex with upperIndex (and all of the sets between) and
511   /// return true. Otherwise, it will return false.
tryMergeUpwards(StratifiedIndex LowerIndex,StratifiedIndex UpperIndex)512   bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) {
513     assert(inbounds(LowerIndex) && inbounds(UpperIndex));
514     auto *Lower = &linksAt(LowerIndex);
515     auto *Upper = &linksAt(UpperIndex);
516     if (Lower == Upper)
517       return true;
518 
519     SmallVector<BuilderLink *, 8> Found;
520     auto *Current = Lower;
521     auto Attrs = Current->getAttrs();
522     while (Current->hasAbove() && Current != Upper) {
523       Found.push_back(Current);
524       Attrs |= Current->getAttrs();
525       Current = &linksAt(Current->getAbove());
526     }
527 
528     if (Current != Upper)
529       return false;
530 
531     Upper->setAttrs(Attrs);
532 
533     if (Lower->hasBelow()) {
534       auto NewBelowIndex = Lower->getBelow();
535       Upper->setBelow(NewBelowIndex);
536       auto &NewBelow = linksAt(NewBelowIndex);
537       NewBelow.setAbove(UpperIndex);
538     } else {
539       Upper->clearBelow();
540     }
541 
542     for (const auto &Ptr : Found)
543       Ptr->remapTo(Upper->Number);
544 
545     return true;
546   }
547 
get(const T & Val)548   Optional<const StratifiedInfo *> get(const T &Val) const {
549     auto Result = Values.find(Val);
550     if (Result == Values.end())
551       return None;
552     return &Result->second;
553   }
554 
get(const T & Val)555   Optional<StratifiedInfo *> get(const T &Val) {
556     auto Result = Values.find(Val);
557     if (Result == Values.end())
558       return None;
559     return &Result->second;
560   }
561 
indexOf(const T & Val)562   Optional<StratifiedIndex> indexOf(const T &Val) {
563     auto MaybeVal = get(Val);
564     if (!MaybeVal.hasValue())
565       return None;
566     auto *Info = *MaybeVal;
567     auto &Link = linksAt(Info->Index);
568     return Link.Number;
569   }
570 
addLinkBelow(StratifiedIndex Set)571   StratifiedIndex addLinkBelow(StratifiedIndex Set) {
572     auto At = addLinks();
573     Links[Set].setBelow(At);
574     Links[At].setAbove(Set);
575     return At;
576   }
577 
addLinkAbove(StratifiedIndex Set)578   StratifiedIndex addLinkAbove(StratifiedIndex Set) {
579     auto At = addLinks();
580     Links[At].setBelow(Set);
581     Links[Set].setAbove(At);
582     return At;
583   }
584 
getNewUnlinkedIndex()585   StratifiedIndex getNewUnlinkedIndex() { return addLinks(); }
586 
addLinks()587   StratifiedIndex addLinks() {
588     auto Link = Links.size();
589     Links.push_back(BuilderLink(Link));
590     return Link;
591   }
592 
inbounds(StratifiedIndex N)593   bool inbounds(StratifiedIndex N) const { return N < Links.size(); }
594 };
595 }
596 }
597 #endif // LLVM_ADT_STRATIFIEDSETS_H
598