1 //===- Utils.h - General analysis utilities ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines prototypes for various transformation utilities for
10 // memref's and non-loop IR structures. These are not passes by themselves but
11 // are used either by passes, optimization sequences, or in turn by other
12 // transformation utilities.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef MLIR_ANALYSIS_UTILS_H
17 #define MLIR_ANALYSIS_UTILS_H
18 
19 #include "mlir/Analysis/AffineStructures.h"
20 #include "mlir/IR/AffineMap.h"
21 #include "mlir/IR/Block.h"
22 #include "mlir/IR/Location.h"
23 #include "mlir/Support/LLVM.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include <memory>
26 
27 namespace mlir {
28 
29 class AffineForOp;
30 class Block;
31 class FlatAffineConstraints;
32 class Location;
33 struct MemRefAccess;
34 class Operation;
35 class Value;
36 
37 /// Populates 'loops' with IVs of the loops surrounding 'op' ordered from
38 /// the outermost 'affine.for' operation to the innermost one.
39 //  TODO: handle 'affine.if' ops.
40 void getLoopIVs(Operation &op, SmallVectorImpl<AffineForOp> *loops);
41 
42 /// Populates 'ops' with IVs of the loops surrounding `op`, along with
43 /// `affine.if` operations interleaved between these loops, ordered from the
44 /// outermost `affine.for` or `affine.if` operation to the innermost one.
45 void getEnclosingAffineForAndIfOps(Operation &op,
46                                    SmallVectorImpl<Operation *> *ops);
47 
48 /// Returns the nesting depth of this operation, i.e., the number of loops
49 /// surrounding this operation.
50 unsigned getNestingDepth(Operation *op);
51 
52 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
53 /// at 'forOp'.
54 void getSequentialLoops(AffineForOp forOp,
55                         llvm::SmallDenseSet<Value, 8> *sequentialLoops);
56 
57 /// ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their
58 /// associated operands for a set of loops within a loop nest (typically the
59 /// set of loops surrounding a store operation). Loop bound AffineMaps which
60 /// are non-null represent slices of that loop's iteration space.
61 struct ComputationSliceState {
62   // List of sliced loop IVs (ordered from outermost to innermost).
63   // EX: 'ivs[i]' has lower bound 'lbs[i]' and upper bound 'ubs[i]'.
64   SmallVector<Value, 4> ivs;
65   // List of lower bound AffineMaps.
66   SmallVector<AffineMap, 4> lbs;
67   // List of upper bound AffineMaps.
68   SmallVector<AffineMap, 4> ubs;
69   // List of lower bound operands (lbOperands[i] are used by 'lbs[i]').
70   std::vector<SmallVector<Value, 4>> lbOperands;
71   // List of upper bound operands (ubOperands[i] are used by 'ubs[i]').
72   std::vector<SmallVector<Value, 4>> ubOperands;
73   // Slice loop nest insertion point in target loop nest.
74   Block::iterator insertPoint;
75   // Adds to 'cst' with constraints which represent the slice bounds on 'ivs'
76   // in 'this'. Specifically, the values in 'ivs' are added to 'cst' as dim
77   // identifiers and the values in 'lb/ubOperands' are added as symbols.
78   // Constraints are added for all loop IV bounds (dim or symbol), and
79   // constraints are added for slice bounds in 'lbs'/'ubs'.
80   // Returns failure if we cannot add loop bounds because of unsupported cases.
81   LogicalResult getAsConstraints(FlatAffineConstraints *cst);
82 
83   // Clears all bounds and operands in slice state.
84   void clearBounds();
85 
86   /// Return true if the computation slice is empty.
isEmptyComputationSliceState87   bool isEmpty() const { return ivs.empty(); }
88 
89   void dump() const;
90 };
91 
92 /// Computes the computation slice loop bounds for one loop nest as affine maps
93 /// of the other loop nest's IVs and symbols, using 'dependenceConstraints'
94 /// computed between 'depSourceAccess' and 'depSinkAccess'.
95 /// If 'isBackwardSlice' is true, a backwards slice is computed in which the
96 /// slice bounds of loop nest surrounding 'depSourceAccess' are computed in
97 /// terms of loop IVs and symbols of the loop nest surrounding 'depSinkAccess'
98 /// at 'loopDepth'.
99 /// If 'isBackwardSlice' is false, a forward slice is computed in which the
100 /// slice bounds of loop nest surrounding 'depSinkAccess' are computed in terms
101 /// of loop IVs and symbols of the loop nest surrounding 'depSourceAccess' at
102 /// 'loopDepth'.
103 /// The slice loop bounds and associated operands are returned in 'sliceState'.
104 //
105 //  Backward slice example:
106 //
107 //    affine.for %i0 = 0 to 10 {
108 //      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
109 //    }
110 //    affine.for %i1 = 0 to 10 {
111 //      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
112 //    }
113 //
114 //    // Backward computation slice of loop nest '%i0'.
115 //    affine.for %i0 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 1)(%i1) {
116 //      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
117 //    }
118 //
119 //  Forward slice example:
120 //
121 //    affine.for %i0 = 0 to 10 {
122 //      affine.store %cst, %0[%i0] : memref<100xf32>  // 'depSourceAccess'
123 //    }
124 //    affine.for %i1 = 0 to 10 {
125 //      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
126 //    }
127 //
128 //    // Forward computation slice of loop nest '%i1'.
129 //    affine.for %i1 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 1)(%i0) {
130 //      %v = affine.load %0[%i1] : memref<100xf32>    // 'depSinkAccess'
131 //    }
132 //
133 void getComputationSliceState(Operation *depSourceOp, Operation *depSinkOp,
134                               FlatAffineConstraints *dependenceConstraints,
135                               unsigned loopDepth, bool isBackwardSlice,
136                               ComputationSliceState *sliceState);
137 
138 /// Computes in 'sliceUnion' the union of all slice bounds computed at
139 /// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB'.
140 /// The parameter 'numCommonLoops' is the number of loops common to the
141 /// operations in 'opsA' and 'opsB'.
142 /// If 'isBackwardSlice' is true, computes slice bounds for loop nest
143 /// surrounding ops in 'opsA', as a function of IVs and symbols of loop nest
144 /// surrounding ops in 'opsB' at 'loopDepth'.
145 /// If 'isBackwardSlice' is false, computes slice bounds for loop nest
146 /// surrounding ops in 'opsB', as a function of IVs and symbols of loop nest
147 /// surrounding ops in 'opsA' at 'loopDepth'.
148 /// Returns 'success' if union was computed, 'failure' otherwise.
149 // TODO: Change this API to take 'forOpA'/'forOpB'.
150 LogicalResult computeSliceUnion(ArrayRef<Operation *> opsA,
151                                 ArrayRef<Operation *> opsB, unsigned loopDepth,
152                                 unsigned numCommonLoops, bool isBackwardSlice,
153                                 ComputationSliceState *sliceUnion);
154 
155 /// Creates a clone of the computation contained in the loop nest surrounding
156 /// 'srcOpInst', slices the iteration space of src loop based on slice bounds
157 /// in 'sliceState', and inserts the computation slice at the beginning of the
158 /// operation block of the loop at 'dstLoopDepth' in the loop nest surrounding
159 /// 'dstOpInst'. Returns the top-level loop of the computation slice on
160 /// success, returns nullptr otherwise.
161 // Loop depth is a crucial optimization choice that determines where to
162 // materialize the results of the backward slice - presenting a trade-off b/w
163 // storage and redundant computation in several cases.
164 // TODO: Support computation slices with common surrounding loops.
165 AffineForOp insertBackwardComputationSlice(Operation *srcOpInst,
166                                            Operation *dstOpInst,
167                                            unsigned dstLoopDepth,
168                                            ComputationSliceState *sliceState);
169 
170 /// A region of a memref's data space; this is typically constructed by
171 /// analyzing load/store op's on this memref and the index space of loops
172 /// surrounding such op's.
173 // For example, the memref region for a load operation at loop depth = 1:
174 //
175 //    affine.for %i = 0 to 32 {
176 //      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
177 //        affine.load %A[%ii]
178 //      }
179 //    }
180 //
181 // Region:  {memref = %A, write = false, {%i <= m0 <= %i + 7} }
182 // The last field is a 2-d FlatAffineConstraints symbolic in %i.
183 //
184 struct MemRefRegion {
MemRefRegionMemRefRegion185   explicit MemRefRegion(Location loc) : loc(loc) {}
186 
187   /// Computes the memory region accessed by this memref with the region
188   /// represented as constraints symbolic/parametric in 'loopDepth' loops
189   /// surrounding opInst. The computed region's 'cst' field has exactly as many
190   /// dimensional identifiers as the rank of the memref, and *potentially*
191   /// additional symbolic identifiers which could include any of the loop IVs
192   /// surrounding opInst up until 'loopDepth' and another additional Function
193   /// symbols involved with the access (for eg., those appear in affine.apply's,
194   /// loop bounds, etc.). If 'sliceState' is non-null, operands from
195   /// 'sliceState' are added as symbols, and the following constraints are added
196   /// to the system:
197   /// *) Inequality constraints which represent loop bounds for 'sliceState'
198   ///    operands which are loop IVS (these represent the destination loop IVs
199   ///    of the slice, and are added as symbols to MemRefRegion's constraint
200   ///    system).
201   /// *) Inequality constraints for the slice bounds in 'sliceState', which
202   ///    represent the bounds on the loop IVs in this constraint system w.r.t
203   ///    to slice operands (which correspond to symbols).
204   /// If 'addMemRefDimBounds' is true, constant upper/lower bounds
205   /// [0, memref.getDimSize(i)) are added for each MemRef dimension 'i'.
206   ///
207   ///  For example, the memref region for this operation at loopDepth = 1 will
208   ///  be:
209   ///
210   ///    affine.for %i = 0 to 32 {
211   ///      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
212   ///        load %A[%ii]
213   ///      }
214   ///    }
215   ///
216   ///   {memref = %A, write = false, {%i <= m0 <= %i + 7} }
217   /// The last field is a 2-d FlatAffineConstraints symbolic in %i.
218   ///
219   LogicalResult compute(Operation *op, unsigned loopDepth,
220                         const ComputationSliceState *sliceState = nullptr,
221                         bool addMemRefDimBounds = true);
222 
getConstraintsMemRefRegion223   FlatAffineConstraints *getConstraints() { return &cst; }
getConstraintsMemRefRegion224   const FlatAffineConstraints *getConstraints() const { return &cst; }
isWriteMemRefRegion225   bool isWrite() const { return write; }
setWriteMemRefRegion226   void setWrite(bool flag) { write = flag; }
227 
228   /// Returns a constant upper bound on the number of elements in this region if
229   /// bounded by a known constant (always possible for static shapes), None
230   /// otherwise. Note that the symbols of the region are treated specially,
231   /// i.e., the returned bounding constant holds for *any given* value of the
232   /// symbol identifiers. The 'shape' vector is set to the corresponding
233   /// dimension-wise bounds major to minor. We use int64_t instead of uint64_t
234   /// since index types can be at most int64_t. `lbs` are set to the lower
235   /// bounds for each of the rank dimensions, and lbDivisors contains the
236   /// corresponding denominators for floorDivs.
237   Optional<int64_t> getConstantBoundingSizeAndShape(
238       SmallVectorImpl<int64_t> *shape = nullptr,
239       std::vector<SmallVector<int64_t, 4>> *lbs = nullptr,
240       SmallVectorImpl<int64_t> *lbDivisors = nullptr) const;
241 
242   /// Gets the lower and upper bound map for the dimensional identifier at
243   /// `pos`.
244   void getLowerAndUpperBound(unsigned pos, AffineMap &lbMap,
245                              AffineMap &ubMap) const;
246 
247   /// A wrapper around FlatAffineConstraints::getConstantBoundOnDimSize(). 'pos'
248   /// corresponds to the position of the memref shape's dimension (major to
249   /// minor) which matches 1:1 with the dimensional identifier positions in
250   //'cst'.
251   Optional<int64_t>
252   getConstantBoundOnDimSize(unsigned pos,
253                             SmallVectorImpl<int64_t> *lb = nullptr,
254                             int64_t *lbFloorDivisor = nullptr) const {
255     assert(pos < getRank() && "invalid position");
256     return cst.getConstantBoundOnDimSize(pos, lb);
257   }
258 
259   /// Returns the size of this MemRefRegion in bytes.
260   Optional<int64_t> getRegionSize();
261 
262   // Wrapper around FlatAffineConstraints::unionBoundingBox.
263   LogicalResult unionBoundingBox(const MemRefRegion &other);
264 
265   /// Returns the rank of the memref that this region corresponds to.
266   unsigned getRank() const;
267 
268   /// Memref that this region corresponds to.
269   Value memref;
270 
271   /// Read or write.
272   bool write;
273 
274   /// If there is more than one load/store op associated with the region, the
275   /// location information would correspond to one of those op's.
276   Location loc;
277 
278   /// Region (data space) of the memref accessed. This set will thus have at
279   /// least as many dimensional identifiers as the shape dimensionality of the
280   /// memref, and these are the leading dimensions of the set appearing in that
281   /// order (major to minor / outermost to innermost). There may be additional
282   /// identifiers since getMemRefRegion() is called with a specific loop depth,
283   /// and thus the region is symbolic in the outer surrounding loops at that
284   /// depth.
285   // TODO: Replace this to exploit HyperRectangularSet.
286   FlatAffineConstraints cst;
287 };
288 
289 /// Returns the size of memref data in bytes if it's statically shaped, None
290 /// otherwise.
291 Optional<uint64_t> getMemRefSizeInBytes(MemRefType memRefType);
292 
293 /// Checks a load or store op for an out of bound access; returns failure if the
294 /// access is out of bounds along any of the dimensions, success otherwise.
295 /// Emits a diagnostic error (with location information) if emitError is true.
296 template <typename LoadOrStoreOpPointer>
297 LogicalResult boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
298                                       bool emitError = true);
299 
300 /// Returns the number of surrounding loops common to both A and B.
301 unsigned getNumCommonSurroundingLoops(Operation &A, Operation &B);
302 
303 /// Gets the memory footprint of all data touched in the specified memory space
304 /// in bytes; if the memory space is unspecified, considers all memory spaces.
305 Optional<int64_t> getMemoryFootprintBytes(AffineForOp forOp,
306                                           int memorySpace = -1);
307 
308 /// Returns true if `forOp' is a parallel loop.
309 bool isLoopParallel(AffineForOp forOp);
310 
311 /// Simplify the integer set by simplifying the underlying affine expressions by
312 /// flattening and some simple inference. Also, drop any duplicate constraints.
313 /// Returns the simplified integer set. This method runs in time linear in the
314 /// number of constraints.
315 IntegerSet simplifyIntegerSet(IntegerSet set);
316 
317 /// Returns the innermost common loop depth for the set of operations in 'ops'.
318 unsigned getInnermostCommonLoopDepth(
319     ArrayRef<Operation *> ops,
320     SmallVectorImpl<AffineForOp> *surroundingLoops = nullptr);
321 
322 } // end namespace mlir
323 
324 #endif // MLIR_ANALYSIS_UTILS_H
325