1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Analysis/LoopAnalysis.h"
14 
15 #include "mlir/Analysis/AffineAnalysis.h"
16 #include "mlir/Analysis/AffineStructures.h"
17 #include "mlir/Analysis/NestedMatcher.h"
18 #include "mlir/Dialect/Affine/IR/AffineOps.h"
19 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
20 #include "mlir/Support/MathExtras.h"
21 
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/SmallString.h"
24 #include <type_traits>
25 
26 using namespace mlir;
27 
28 /// Returns the trip count of the loop as an affine expression if the latter is
29 /// expressible as an affine expression, and nullptr otherwise. The trip count
30 /// expression is simplified before returning. This method only utilizes map
31 /// composition to construct lower and upper bounds before computing the trip
32 /// count expressions.
buildTripCountMapAndOperands(AffineForOp forOp,AffineMap * tripCountMap,SmallVectorImpl<Value> * tripCountOperands)33 void mlir::buildTripCountMapAndOperands(
34     AffineForOp forOp, AffineMap *tripCountMap,
35     SmallVectorImpl<Value> *tripCountOperands) {
36   int64_t loopSpan;
37 
38   int64_t step = forOp.getStep();
39   OpBuilder b(forOp.getOperation());
40 
41   if (forOp.hasConstantBounds()) {
42     int64_t lb = forOp.getConstantLowerBound();
43     int64_t ub = forOp.getConstantUpperBound();
44     loopSpan = ub - lb;
45     if (loopSpan < 0)
46       loopSpan = 0;
47     *tripCountMap = b.getConstantAffineMap(ceilDiv(loopSpan, step));
48     tripCountOperands->clear();
49     return;
50   }
51   auto lbMap = forOp.getLowerBoundMap();
52   auto ubMap = forOp.getUpperBoundMap();
53   if (lbMap.getNumResults() != 1) {
54     *tripCountMap = AffineMap();
55     return;
56   }
57 
58   // Difference of each upper bound expression from the single lower bound
59   // expression (divided by the step) provides the expressions for the trip
60   // count map.
61   AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
62 
63   SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
64                                          lbMap.getResult(0));
65   auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
66                                    lbSplatExpr, b.getContext());
67   AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
68 
69   AffineValueMap tripCountValueMap;
70   AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
71   for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
72     tripCountValueMap.setResult(i,
73                                 tripCountValueMap.getResult(i).ceilDiv(step));
74 
75   *tripCountMap = tripCountValueMap.getAffineMap();
76   tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
77                             tripCountValueMap.getOperands().end());
78 }
79 
80 /// Returns the trip count of the loop if it's a constant, None otherwise. This
81 /// method uses affine expression analysis (in turn using getTripCount) and is
82 /// able to determine constant trip count in non-trivial cases.
83 // FIXME(mlir-team): this is really relying on buildTripCountMapAndOperands;
84 // being an analysis utility, it shouldn't. Replace with a version that just
85 // works with analysis structures (FlatAffineConstraints) and thus doesn't
86 // update the IR.
getConstantTripCount(AffineForOp forOp)87 Optional<uint64_t> mlir::getConstantTripCount(AffineForOp forOp) {
88   SmallVector<Value, 4> operands;
89   AffineMap map;
90   buildTripCountMapAndOperands(forOp, &map, &operands);
91 
92   if (!map)
93     return None;
94 
95   // Take the min if all trip counts are constant.
96   Optional<uint64_t> tripCount;
97   for (auto resultExpr : map.getResults()) {
98     if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
99       if (tripCount.hasValue())
100         tripCount = std::min(tripCount.getValue(),
101                              static_cast<uint64_t>(constExpr.getValue()));
102       else
103         tripCount = constExpr.getValue();
104     } else
105       return None;
106   }
107   return tripCount;
108 }
109 
110 /// Returns the greatest known integral divisor of the trip count. Affine
111 /// expression analysis is used (indirectly through getTripCount), and
112 /// this method is thus able to determine non-trivial divisors.
getLargestDivisorOfTripCount(AffineForOp forOp)113 uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
114   SmallVector<Value, 4> operands;
115   AffineMap map;
116   buildTripCountMapAndOperands(forOp, &map, &operands);
117 
118   if (!map)
119     return 1;
120 
121   // The largest divisor of the trip count is the GCD of the individual largest
122   // divisors.
123   assert(map.getNumResults() >= 1 && "expected one or more results");
124   Optional<uint64_t> gcd;
125   for (auto resultExpr : map.getResults()) {
126     uint64_t thisGcd;
127     if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
128       uint64_t tripCount = constExpr.getValue();
129       // 0 iteration loops (greatest divisor is 2^64 - 1).
130       if (tripCount == 0)
131         thisGcd = std::numeric_limits<uint64_t>::max();
132       else
133         // The greatest divisor is the trip count.
134         thisGcd = tripCount;
135     } else {
136       // Trip count is not a known constant; return its largest known divisor.
137       thisGcd = resultExpr.getLargestKnownDivisor();
138     }
139     if (gcd.hasValue())
140       gcd = llvm::GreatestCommonDivisor64(gcd.getValue(), thisGcd);
141     else
142       gcd = thisGcd;
143   }
144   assert(gcd.hasValue() && "value expected per above logic");
145   return gcd.getValue();
146 }
147 
148 /// Given an induction variable `iv` of type AffineForOp and an access `index`
149 /// of type index, returns `true` if `index` is independent of `iv` and
150 /// false otherwise. The determination supports composition with at most one
151 /// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
152 /// the composition of AffineApplyOp needs to be canonicalized by construction
153 /// to avoid writing code that composes arbitrary numbers of AffineApplyOps
154 /// everywhere. To achieve this, at the very least, the compose-affine-apply
155 /// pass must have been run.
156 ///
157 /// Prerequisites:
158 ///   1. `iv` and `index` of the proper type;
159 ///   2. at most one reachable AffineApplyOp from index;
160 ///
161 /// Returns false in cases with more than one AffineApplyOp, this is
162 /// conservative.
isAccessIndexInvariant(Value iv,Value index)163 static bool isAccessIndexInvariant(Value iv, Value index) {
164   assert(isForInductionVar(iv) && "iv must be a AffineForOp");
165   assert(index.getType().isa<IndexType>() && "index must be of IndexType");
166   SmallVector<Operation *, 4> affineApplyOps;
167   getReachableAffineApplyOps({index}, affineApplyOps);
168 
169   if (affineApplyOps.empty()) {
170     // Pointer equality test because of Value pointer semantics.
171     return index != iv;
172   }
173 
174   if (affineApplyOps.size() > 1) {
175     affineApplyOps[0]->emitRemark(
176         "CompositionAffineMapsPass must have been run: there should be at most "
177         "one AffineApplyOp, returning false conservatively.");
178     return false;
179   }
180 
181   auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
182   // We need yet another level of indirection because the `dim` index of the
183   // access may not correspond to the `dim` index of composeOp.
184   return !composeOp.getAffineValueMap().isFunctionOf(0, iv);
185 }
186 
getInvariantAccesses(Value iv,ArrayRef<Value> indices)187 DenseSet<Value> mlir::getInvariantAccesses(Value iv, ArrayRef<Value> indices) {
188   DenseSet<Value> res;
189   for (unsigned idx = 0, n = indices.size(); idx < n; ++idx) {
190     auto val = indices[idx];
191     if (isAccessIndexInvariant(iv, val)) {
192       res.insert(val);
193     }
194   }
195   return res;
196 }
197 
198 /// Given:
199 ///   1. an induction variable `iv` of type AffineForOp;
200 ///   2. a `memoryOp` of type const LoadOp& or const StoreOp&;
201 /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
202 /// is defined as either invariant or varying only along a unique MemRef dim.
203 /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
204 /// convey the memRef access is invariant along `iv`).
205 ///
206 /// Prerequisites:
207 ///   1. `memRefDim` ~= nullptr;
208 ///   2. `iv` of the proper type;
209 ///   3. the MemRef accessed by `memoryOp` has no layout map or at most an
210 ///      identity layout map.
211 ///
212 /// Currently only supports no layoutMap or identity layoutMap in the MemRef.
213 /// Returns false if the MemRef has a non-identity layoutMap or more than 1
214 /// layoutMap. This is conservative.
215 ///
216 // TODO: check strides.
217 template <typename LoadOrStoreOp>
isContiguousAccess(Value iv,LoadOrStoreOp memoryOp,int * memRefDim)218 static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
219                                int *memRefDim) {
220   static_assert(
221       llvm::is_one_of<LoadOrStoreOp, AffineLoadOp, AffineStoreOp>::value,
222       "Must be called on either LoadOp or StoreOp");
223   assert(memRefDim && "memRefDim == nullptr");
224   auto memRefType = memoryOp.getMemRefType();
225 
226   auto layoutMap = memRefType.getAffineMaps();
227   // TODO: remove dependence on Builder once we support non-identity layout map.
228   Builder b(memoryOp.getContext());
229   if (layoutMap.size() >= 2 ||
230       (layoutMap.size() == 1 &&
231        !(layoutMap[0] ==
232          b.getMultiDimIdentityMap(layoutMap[0].getNumDims())))) {
233     return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
234   }
235 
236   int uniqueVaryingIndexAlongIv = -1;
237   auto accessMap = memoryOp.getAffineMap();
238   SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
239   unsigned numDims = accessMap.getNumDims();
240   for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
241     // Gather map operands used result expr 'i' in 'exprOperands'.
242     SmallVector<Value, 4> exprOperands;
243     auto resultExpr = accessMap.getResult(i);
244     resultExpr.walk([&](AffineExpr expr) {
245       if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
246         exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
247       else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
248         exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
249     });
250     // Check access invariance of each operand in 'exprOperands'.
251     for (auto exprOperand : exprOperands) {
252       if (!isAccessIndexInvariant(iv, exprOperand)) {
253         if (uniqueVaryingIndexAlongIv != -1) {
254           // 2+ varying indices -> do not vectorize along iv.
255           return false;
256         }
257         uniqueVaryingIndexAlongIv = i;
258       }
259     }
260   }
261 
262   if (uniqueVaryingIndexAlongIv == -1)
263     *memRefDim = -1;
264   else
265     *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
266   return true;
267 }
268 
269 template <typename LoadOrStoreOp>
isVectorElement(LoadOrStoreOp memoryOp)270 static bool isVectorElement(LoadOrStoreOp memoryOp) {
271   auto memRefType = memoryOp.getMemRefType();
272   return memRefType.getElementType().template isa<VectorType>();
273 }
274 
275 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
276 
277 static bool
isVectorizableLoopBodyWithOpCond(AffineForOp loop,VectorizableOpFun isVectorizableOp,NestedPattern & vectorTransferMatcher)278 isVectorizableLoopBodyWithOpCond(AffineForOp loop,
279                                  VectorizableOpFun isVectorizableOp,
280                                  NestedPattern &vectorTransferMatcher) {
281   auto *forOp = loop.getOperation();
282 
283   // No vectorization across conditionals for now.
284   auto conditionals = matcher::If();
285   SmallVector<NestedMatch, 8> conditionalsMatched;
286   conditionals.match(forOp, &conditionalsMatched);
287   if (!conditionalsMatched.empty()) {
288     return false;
289   }
290 
291   // No vectorization across unknown regions.
292   auto regions = matcher::Op([](Operation &op) -> bool {
293     return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
294   });
295   SmallVector<NestedMatch, 8> regionsMatched;
296   regions.match(forOp, &regionsMatched);
297   if (!regionsMatched.empty()) {
298     return false;
299   }
300 
301   SmallVector<NestedMatch, 8> vectorTransfersMatched;
302   vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
303   if (!vectorTransfersMatched.empty()) {
304     return false;
305   }
306 
307   auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
308   SmallVector<NestedMatch, 8> loadAndStoresMatched;
309   loadAndStores.match(forOp, &loadAndStoresMatched);
310   for (auto ls : loadAndStoresMatched) {
311     auto *op = ls.getMatchedOperation();
312     auto load = dyn_cast<AffineLoadOp>(op);
313     auto store = dyn_cast<AffineStoreOp>(op);
314     // Only scalar types are considered vectorizable, all load/store must be
315     // vectorizable for a loop to qualify as vectorizable.
316     // TODO: ponder whether we want to be more general here.
317     bool vector = load ? isVectorElement(load) : isVectorElement(store);
318     if (vector) {
319       return false;
320     }
321     if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
322       return false;
323     }
324   }
325   return true;
326 }
327 
isVectorizableLoopBody(AffineForOp loop,int * memRefDim,NestedPattern & vectorTransferMatcher)328 bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
329                                   NestedPattern &vectorTransferMatcher) {
330   VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
331     auto load = dyn_cast<AffineLoadOp>(op);
332     auto store = dyn_cast<AffineStoreOp>(op);
333     return load ? isContiguousAccess(loop.getInductionVar(), load, memRefDim)
334                 : isContiguousAccess(loop.getInductionVar(), store, memRefDim);
335   });
336   return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
337 }
338 
isVectorizableLoopBody(AffineForOp loop,NestedPattern & vectorTransferMatcher)339 bool mlir::isVectorizableLoopBody(AffineForOp loop,
340                                   NestedPattern &vectorTransferMatcher) {
341   return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
342 }
343 
344 /// Checks whether SSA dominance would be violated if a for op's body
345 /// operations are shifted by the specified shifts. This method checks if a
346 /// 'def' and all its uses have the same shift factor.
347 // TODO: extend this to check for memory-based dependence violation when we have
348 // the support.
isOpwiseShiftValid(AffineForOp forOp,ArrayRef<uint64_t> shifts)349 bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
350   auto *forBody = forOp.getBody();
351   assert(shifts.size() == forBody->getOperations().size());
352 
353   // Work backwards over the body of the block so that the shift of a use's
354   // ancestor operation in the block gets recorded before it's looked up.
355   DenseMap<Operation *, uint64_t> forBodyShift;
356   for (auto it : llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
357     auto &op = it.value();
358 
359     // Get the index of the current operation, note that we are iterating in
360     // reverse so we need to fix it up.
361     size_t index = shifts.size() - it.index() - 1;
362 
363     // Remember the shift of this operation.
364     uint64_t shift = shifts[index];
365     forBodyShift.try_emplace(&op, shift);
366 
367     // Validate the results of this operation if it were to be shifted.
368     for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
369       Value result = op.getResult(i);
370       for (auto *user : result.getUsers()) {
371         // If an ancestor operation doesn't lie in the block of forOp,
372         // there is no shift to check.
373         if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
374           assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
375           if (shift != forBodyShift[ancOp])
376             return false;
377         }
378       }
379     }
380   }
381   return true;
382 }
383