1 //===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines prototypes for various loop fusion utility
10 // methods: these are not passes by themselves but are used either by passes,
11 // optimization sequences, or in turn by other transformation utilities.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
16 #define MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
17 
18 #include "mlir/IR/Value.h"
19 #include "mlir/Support/LLVM.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 
23 namespace mlir {
24 class AffineForOp;
25 struct ComputationSliceState;
26 class Operation;
27 
28 // TODO: Extend this module to include utility functions for querying fusion
29 // cost/storage reduction, and for performing the loop fusion transformation.
30 
31 struct FusionResult {
32   enum ResultEnum {
33     Success,
34     FailPrecondition,     // Failed precondition for fusion. (e.g. same block).
35     FailBlockDependence,  // Fusion would violate another dependence in block.
36     FailFusionDependence, // Fusion would reverse dependences between loops.
37     FailComputationSlice, // Unable to compute src loop computation slice.
38   } value;
FusionResultFusionResult39   FusionResult(ResultEnum v) : value(v) {}
40 };
41 
42 /// Describes the fusion strategy to be used in the Affine loop fusion
43 /// utilities. Currently, it is used to specialized the loop fusion utilities
44 /// with the assumptions made in the AffineLoopFusion pass for producer-consumer
45 /// and sibling fusion, while sharing a single implementation. The latter
46 /// strategies are also limited to scenarios where a single memref is involved
47 /// in the producer-consume or sibling relationship between the candidate
48 /// loops. We use 'memref' to keep track of such a memref.
49 // TODO: Remove 'memref' when we support more generic scenarios.
50 // TODO: Generalize utilities so that producer-consumer and sibling fusion
51 // strategies can be used without the assumptions made in the AffineLoopFusion
52 // pass.
53 struct FusionStrategy {
54   enum StrategyEnum {
55     // Generic loop fusion: Arbitrary loops are considered for fusion. No
56     // assumptions about a specific fusion strategy from AffineLoopFusion pass
57     // are made.
58     // TODO: Generic fusion is not fully implemented by fusion utilities yet.
59     // It should only be used for testing.
60     Generic,
61     // Producer-consumer fusion: Only loops with a producer-consumer
62     // memref dependence are considered for fusion. Currently, assumptions from
63     // the producer-consumer fusion implementation in AffineLoopFusion pass are
64     // made. See pass for specific details.
65     ProducerConsumer,
66     // Sibling fusion: Only sibling loops with no producer-consumer memref
67     // dependences are considered for fusion. Memref reuse is taken into account
68     // for profitability. Currently, assumptions from the sibling fusion
69     // implementation in AffineLoopFusion pass are made. See pass for specific
70     // details.
71     Sibling
72   } strategy;
73 
74   // Target memref for this fusion transformation.
75   Value memref;
76 
FusionStrategyFusionStrategy77   FusionStrategy(StrategyEnum strategy, Value memref)
78       : strategy(strategy), memref(memref) {}
79 };
80 
81 /// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the
82 /// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult
83 /// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are
84 /// in the same block and dependences would not be violated). Otherwise
85 /// returns a FusionResult explaining why fusion is not feasible.
86 /// NOTE: This function is not feature complete and should only be used in
87 /// testing.
88 /// TODO: Update comments when this function is fully implemented.
89 FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
90                           unsigned dstLoopDepth,
91                           ComputationSliceState *srcSlice,
92                           FusionStrategy fusionStrategy = {
93                               FusionStrategy::Generic, Value()});
94 
95 /// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point
96 /// and source slice loop bounds specified in 'srcSlice'.
97 void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
98                const ComputationSliceState &srcSlice);
99 
100 /// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
101 /// and operation count) for a loop nest up until (and including) the innermost
102 /// loop body.
103 struct LoopNestStats {
104   /// Map from AffineForOp to immediate child AffineForOps in its loop body.
105   DenseMap<Operation *, SmallVector<AffineForOp, 2>> loopMap;
106   /// Map from AffineForOp to count of operations in its loop body.
107   DenseMap<Operation *, uint64_t> opCountMap;
108   /// Map from AffineForOp to its constant trip count.
109   DenseMap<Operation *, uint64_t> tripCountMap;
110 };
111 
112 /// Collect loop nest statistics (eg. loop trip count and operation count)
113 /// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
114 /// returns false otherwise.
115 // TODO: Consider moving this to LoopUtils.
116 bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats);
117 
118 /// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'.
119 /// Currently, the total cost is computed by counting the total operation
120 /// instance count (i.e. total number of operations in the loop body * loop
121 /// trip count) for the entire loop nest.
122 // TODO: Improve this cost model.
123 int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats);
124 
125 /// Computes and returns in 'computeCost', the total compute cost of fusing the
126 /// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently,
127 /// the total cost is computed by counting the total operation instance count
128 /// (i.e. total number of operations in the loop body * loop trip count) for
129 /// the entire loop nest.
130 /// Returns true on success, failure otherwise (e.g. non-constant trip counts).
131 // TODO: Improve this cost model.
132 bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats,
133                           AffineForOp dstForOp, LoopNestStats &dstStats,
134                           const ComputationSliceState &slice,
135                           int64_t *computeCost);
136 
137 } // end namespace mlir
138 
139 #endif // MLIR_TRANSFORMS_LOOP_FUSION_UTILS_H
140