1 //===- AffineLoopNormalize.cpp - AffineLoopNormalize Pass -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a normalizer for affine loop-like ops.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PassDetail.h"
14 #include "mlir/Dialect/Affine/IR/AffineOps.h"
15 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
16 #include "mlir/Dialect/Affine/Passes.h"
17 #include "mlir/Dialect/Affine/Utils.h"
18 #include "mlir/IR/PatternMatch.h"
19 #include "mlir/Transforms/LoopUtils.h"
20 
21 using namespace mlir;
22 
normalizeAffineParallel(AffineParallelOp op)23 void mlir::normalizeAffineParallel(AffineParallelOp op) {
24   AffineMap lbMap = op.lowerBoundsMap();
25   SmallVector<int64_t, 8> steps = op.getSteps();
26   // No need to do any work if the parallel op is already normalized.
27   bool isAlreadyNormalized =
28       llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
29         int64_t step = std::get<0>(tuple);
30         auto lbExpr =
31             std::get<1>(tuple).template dyn_cast<AffineConstantExpr>();
32         return lbExpr && lbExpr.getValue() == 0 && step == 1;
33       });
34   if (isAlreadyNormalized)
35     return;
36 
37   AffineValueMap ranges = op.getRangesValueMap();
38   auto builder = OpBuilder::atBlockBegin(op.getBody());
39   auto zeroExpr = builder.getAffineConstantExpr(0);
40   SmallVector<AffineExpr, 8> lbExprs;
41   SmallVector<AffineExpr, 8> ubExprs;
42   for (unsigned i = 0, e = steps.size(); i < e; ++i) {
43     int64_t step = steps[i];
44 
45     // Adjust the lower bound to be 0.
46     lbExprs.push_back(zeroExpr);
47 
48     // Adjust the upper bound expression: 'range / step'.
49     AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
50     ubExprs.push_back(ubExpr);
51 
52     // Adjust the corresponding IV: 'lb + i * step'.
53     BlockArgument iv = op.getBody()->getArgument(i);
54     AffineExpr lbExpr = lbMap.getResult(i);
55     unsigned nDims = lbMap.getNumDims();
56     auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
57     auto map = AffineMap::get(/*dimCount=*/nDims + 1,
58                               /*symbolCount=*/lbMap.getNumSymbols(), expr);
59 
60     // Use an 'affine.apply' op that will be simplified later in subsequent
61     // canonicalizations.
62     OperandRange lbOperands = op.getLowerBoundsOperands();
63     OperandRange dimOperands = lbOperands.take_front(nDims);
64     OperandRange symbolOperands = lbOperands.drop_front(nDims);
65     SmallVector<Value, 8> applyOperands{dimOperands};
66     applyOperands.push_back(iv);
67     applyOperands.append(symbolOperands.begin(), symbolOperands.end());
68     auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
69     iv.replaceAllUsesExcept(apply, SmallPtrSet<Operation *, 1>{apply});
70   }
71 
72   SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
73   op.setSteps(newSteps);
74   auto newLowerMap = AffineMap::get(
75       /*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
76   op.setLowerBounds({}, newLowerMap);
77   auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
78                                     ubExprs, op.getContext());
79   op.setUpperBounds(ranges.getOperands(), newUpperMap);
80 }
81 
82 /// Normalizes affine.for ops. If the affine.for op has only a single iteration
83 /// only then it is simply promoted, else it is normalized in the traditional
84 /// way, by converting the lower bound to zero and loop step to one. The upper
85 /// bound is set to the trip count of the loop. For now, original loops must
86 /// have lower bound with a single result only. There is no such restriction on
87 /// upper bounds.
normalizeAffineFor(AffineForOp op)88 static void normalizeAffineFor(AffineForOp op) {
89   if (succeeded(promoteIfSingleIteration(op)))
90     return;
91 
92   // Check if the forop is already normalized.
93   if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
94       (op.getStep() == 1))
95     return;
96 
97   // Check if the lower bound has a single result only. Loops with a max lower
98   // bound can't be normalized without additional support like
99   // affine.execute_region's. If the lower bound does not have a single result
100   // then skip this op.
101   if (op.getLowerBoundMap().getNumResults() != 1)
102     return;
103 
104   Location loc = op.getLoc();
105   OpBuilder opBuilder(op);
106   int64_t origLoopStep = op.getStep();
107 
108   // Calculate upperBound for normalized loop.
109   SmallVector<Value, 4> ubOperands;
110   AffineBound lb = op.getLowerBound();
111   AffineBound ub = op.getUpperBound();
112   ubOperands.reserve(ub.getNumOperands() + lb.getNumOperands());
113   AffineMap origLbMap = lb.getMap();
114   AffineMap origUbMap = ub.getMap();
115 
116   // Add dimension operands from upper/lower bound.
117   for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j)
118     ubOperands.push_back(ub.getOperand(j));
119   for (unsigned j = 0, e = origLbMap.getNumDims(); j < e; ++j)
120     ubOperands.push_back(lb.getOperand(j));
121 
122   // Add symbol operands from upper/lower bound.
123   for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j)
124     ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
125   for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
126     ubOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
127 
128   // Add original result expressions from lower/upper bound map.
129   SmallVector<AffineExpr, 1> origLbExprs(origLbMap.getResults().begin(),
130                                          origLbMap.getResults().end());
131   SmallVector<AffineExpr, 2> origUbExprs(origUbMap.getResults().begin(),
132                                          origUbMap.getResults().end());
133   SmallVector<AffineExpr, 4> newUbExprs;
134 
135   // The original upperBound can have more than one result. For the new
136   // upperBound of this loop, take difference of all possible combinations of
137   // the ub results and lb result and ceildiv with the loop step. For e.g.,
138   //
139   //  affine.for %i1 = 0 to min affine_map<(d0)[] -> (d0 + 32, 1024)>(%i0)
140   //  will have an upperBound map as,
141   //  affine_map<(d0)[] -> (((d0 + 32) - 0) ceildiv 1, (1024 - 0) ceildiv
142   //  1)>(%i0)
143   //
144   // Insert all combinations of upper/lower bound results.
145   for (unsigned i = 0, e = origUbExprs.size(); i < e; ++i) {
146     newUbExprs.push_back(
147         (origUbExprs[i] - origLbExprs[0]).ceilDiv(origLoopStep));
148   }
149 
150   // Construct newUbMap.
151   AffineMap newUbMap =
152       AffineMap::get(origLbMap.getNumDims() + origUbMap.getNumDims(),
153                      origLbMap.getNumSymbols() + origUbMap.getNumSymbols(),
154                      newUbExprs, opBuilder.getContext());
155 
156   // Normalize the loop.
157   op.setUpperBound(ubOperands, newUbMap);
158   op.setLowerBound({}, opBuilder.getConstantAffineMap(0));
159   op.setStep(1);
160 
161   // Calculate the Value of new loopIV. Create affine.apply for the value of
162   // the loopIV in normalized loop.
163   opBuilder.setInsertionPointToStart(op.getBody());
164   SmallVector<Value, 4> lbOperands(lb.getOperands().begin(),
165                                    lb.getOperands().begin() +
166                                        lb.getMap().getNumDims());
167   // Add an extra dim operand for loopIV.
168   lbOperands.push_back(op.getInductionVar());
169   // Add symbol operands from lower bound.
170   for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
171     lbOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
172 
173   AffineExpr origIVExpr = opBuilder.getAffineDimExpr(lb.getMap().getNumDims());
174   AffineExpr newIVExpr = origIVExpr * origLoopStep + origLbMap.getResult(0);
175   AffineMap ivMap = AffineMap::get(origLbMap.getNumDims() + 1,
176                                    origLbMap.getNumSymbols(), newIVExpr);
177   Operation *newIV = opBuilder.create<AffineApplyOp>(loc, ivMap, lbOperands);
178   op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0),
179                                             SmallPtrSet<Operation *, 1>{newIV});
180 }
181 
182 namespace {
183 
184 /// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1.
185 /// As currently implemented, this pass cannot fail, but it might skip over ops
186 /// that are already in a normalized form.
187 struct AffineLoopNormalizePass
188     : public AffineLoopNormalizeBase<AffineLoopNormalizePass> {
189 
runOnFunction__anon2ef374680211::AffineLoopNormalizePass190   void runOnFunction() override {
191     getFunction().walk([](Operation *op) {
192       if (auto affineParallel = dyn_cast<AffineParallelOp>(op))
193         normalizeAffineParallel(affineParallel);
194       else if (auto affineFor = dyn_cast<AffineForOp>(op))
195         normalizeAffineFor(affineFor);
196     });
197   }
198 };
199 
200 } // namespace
201 
createAffineLoopNormalizePass()202 std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineLoopNormalizePass() {
203   return std::make_unique<AffineLoopNormalizePass>();
204 }
205