1// RUN: mlir-opt -allow-unregistered-dialect %s -memref-dataflow-opt | FileCheck %s
2
3// CHECK-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0, d1) -> (d1 + 1)>
4// CHECK-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0, d1) -> (d0)>
5// CHECK-DAG: [[$MAP2:#map[0-9]+]] = affine_map<(d0, d1) -> (d1)>
6// CHECK-DAG: [[$MAP3:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 - 1)>
7// CHECK-DAG: [[$MAP4:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
8
9// CHECK-LABEL: func @simple_store_load() {
10func @simple_store_load() {
11  %cf7 = constant 7.0 : f32
12  %m = alloc() : memref<10xf32>
13  affine.for %i0 = 0 to 10 {
14    affine.store %cf7, %m[%i0] : memref<10xf32>
15    %v0 = affine.load %m[%i0] : memref<10xf32>
16    %v1 = addf %v0, %v0 : f32
17  }
18  return
19// CHECK:       %{{.*}} = constant 7.000000e+00 : f32
20// CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
21// CHECK-NEXT:    %{{.*}} = addf %{{.*}}, %{{.*}} : f32
22// CHECK-NEXT:  }
23// CHECK-NEXT:  return
24}
25
26// CHECK-LABEL: func @multi_store_load() {
27func @multi_store_load() {
28  %c0 = constant 0 : index
29  %cf7 = constant 7.0 : f32
30  %cf8 = constant 8.0 : f32
31  %cf9 = constant 9.0 : f32
32  %m = alloc() : memref<10xf32>
33  affine.for %i0 = 0 to 10 {
34    affine.store %cf7, %m[%i0] : memref<10xf32>
35    %v0 = affine.load %m[%i0] : memref<10xf32>
36    %v1 = addf %v0, %v0 : f32
37    affine.store %cf8, %m[%i0] : memref<10xf32>
38    affine.store %cf9, %m[%i0] : memref<10xf32>
39    %v2 = affine.load %m[%i0] : memref<10xf32>
40    %v3 = affine.load %m[%i0] : memref<10xf32>
41    %v4 = mulf %v2, %v3 : f32
42  }
43  return
44// CHECK:       %{{.*}} = constant 0 : index
45// CHECK-NEXT:  %{{.*}} = constant 7.000000e+00 : f32
46// CHECK-NEXT:  %{{.*}} = constant 8.000000e+00 : f32
47// CHECK-NEXT:  %{{.*}} = constant 9.000000e+00 : f32
48// CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
49// CHECK-NEXT:    %{{.*}} = addf %{{.*}}, %{{.*}} : f32
50// CHECK-NEXT:    %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
51// CHECK-NEXT:  }
52// CHECK-NEXT:  return
53
54}
55
56// The store-load forwarding can see through affine apply's since it relies on
57// dependence information.
58// CHECK-LABEL: func @store_load_affine_apply
59func @store_load_affine_apply() -> memref<10x10xf32> {
60  %cf7 = constant 7.0 : f32
61  %m = alloc() : memref<10x10xf32>
62  affine.for %i0 = 0 to 10 {
63    affine.for %i1 = 0 to 10 {
64      %t0 = affine.apply affine_map<(d0, d1) -> (d1 + 1)>(%i0, %i1)
65      %t1 = affine.apply affine_map<(d0, d1) -> (d0)>(%i0, %i1)
66      %idx0 = affine.apply affine_map<(d0, d1) -> (d1)> (%t0, %t1)
67      %idx1 = affine.apply affine_map<(d0, d1) -> (d0 - 1)> (%t0, %t1)
68      affine.store %cf7, %m[%idx0, %idx1] : memref<10x10xf32>
69      // CHECK-NOT: affine.load %{{[0-9]+}}
70      %v0 = affine.load %m[%i0, %i1] : memref<10x10xf32>
71      %v1 = addf %v0, %v0 : f32
72    }
73  }
74  // The memref and its stores won't be erased due to this memref return.
75  return %m : memref<10x10xf32>
76// CHECK:       %{{.*}} = constant 7.000000e+00 : f32
77// CHECK-NEXT:  %{{.*}} = alloc() : memref<10x10xf32>
78// CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
79// CHECK-NEXT:    affine.for %{{.*}} = 0 to 10 {
80// CHECK-NEXT:      %{{.*}} = affine.apply [[$MAP0]](%{{.*}}, %{{.*}})
81// CHECK-NEXT:      %{{.*}} = affine.apply [[$MAP1]](%{{.*}}, %{{.*}})
82// CHECK-NEXT:      %{{.*}} = affine.apply [[$MAP2]](%{{.*}}, %{{.*}})
83// CHECK-NEXT:      %{{.*}} = affine.apply [[$MAP3]](%{{.*}}, %{{.*}})
84// CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
85// CHECK-NEXT:      %{{.*}} = addf %{{.*}}, %{{.*}} : f32
86// CHECK-NEXT:    }
87// CHECK-NEXT:  }
88// CHECK-NEXT:  return %{{.*}} : memref<10x10xf32>
89}
90
91// CHECK-LABEL: func @store_load_nested
92func @store_load_nested(%N : index) {
93  %cf7 = constant 7.0 : f32
94  %m = alloc() : memref<10xf32>
95  affine.for %i0 = 0 to 10 {
96    affine.store %cf7, %m[%i0] : memref<10xf32>
97    affine.for %i1 = 0 to %N {
98      %v0 = affine.load %m[%i0] : memref<10xf32>
99      %v1 = addf %v0, %v0 : f32
100    }
101  }
102  return
103// CHECK:       %{{.*}} = constant 7.000000e+00 : f32
104// CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
105// CHECK-NEXT:    affine.for %{{.*}} = 0 to %{{.*}} {
106// CHECK-NEXT:      %{{.*}} = addf %{{.*}}, %{{.*}} : f32
107// CHECK-NEXT:    }
108// CHECK-NEXT:  }
109// CHECK-NEXT:  return
110}
111
112// No forwarding happens here since either of the two stores could be the last
113// writer; store/load forwarding will however be possible here once loop live
114// out SSA scalars are available.
115// CHECK-LABEL: func @multi_store_load_nested_no_fwd
116func @multi_store_load_nested_no_fwd(%N : index) {
117  %cf7 = constant 7.0 : f32
118  %cf8 = constant 8.0 : f32
119  %m = alloc() : memref<10xf32>
120  affine.for %i0 = 0 to 10 {
121    affine.store %cf7, %m[%i0] : memref<10xf32>
122    affine.for %i1 = 0 to %N {
123      affine.store %cf8, %m[%i1] : memref<10xf32>
124    }
125    affine.for %i2 = 0 to %N {
126      // CHECK: %{{[0-9]+}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
127      %v0 = affine.load %m[%i0] : memref<10xf32>
128      %v1 = addf %v0, %v0 : f32
129    }
130  }
131  return
132}
133
134// No forwarding happens here since both stores have a value going into
135// the load.
136// CHECK-LABEL: func @store_load_store_nested_no_fwd
137func @store_load_store_nested_no_fwd(%N : index) {
138  %cf7 = constant 7.0 : f32
139  %cf9 = constant 9.0 : f32
140  %m = alloc() : memref<10xf32>
141  affine.for %i0 = 0 to 10 {
142    affine.store %cf7, %m[%i0] : memref<10xf32>
143    affine.for %i1 = 0 to %N {
144      // CHECK: %{{[0-9]+}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
145      %v0 = affine.load %m[%i0] : memref<10xf32>
146      %v1 = addf %v0, %v0 : f32
147      affine.store %cf9, %m[%i0] : memref<10xf32>
148    }
149  }
150  return
151}
152
153// Forwarding happens here since the last store postdominates all other stores
154// and other forwarding criteria are satisfied.
155// CHECK-LABEL: func @multi_store_load_nested_fwd
156func @multi_store_load_nested_fwd(%N : index) {
157  %cf7 = constant 7.0 : f32
158  %cf8 = constant 8.0 : f32
159  %cf9 = constant 9.0 : f32
160  %cf10 = constant 10.0 : f32
161  %m = alloc() : memref<10xf32>
162  affine.for %i0 = 0 to 10 {
163    affine.store %cf7, %m[%i0] : memref<10xf32>
164    affine.for %i1 = 0 to %N {
165      affine.store %cf8, %m[%i1] : memref<10xf32>
166    }
167    affine.for %i2 = 0 to %N {
168      affine.store %cf9, %m[%i2] : memref<10xf32>
169    }
170    affine.store %cf10, %m[%i0] : memref<10xf32>
171    affine.for %i3 = 0 to %N {
172      // CHECK-NOT: %{{[0-9]+}} = affine.load
173      %v0 = affine.load %m[%i0] : memref<10xf32>
174      %v1 = addf %v0, %v0 : f32
175    }
176  }
177  return
178}
179
180// There is no unique load location for the store to forward to.
181// CHECK-LABEL: func @store_load_no_fwd
182func @store_load_no_fwd() {
183  %cf7 = constant 7.0 : f32
184  %m = alloc() : memref<10xf32>
185  affine.for %i0 = 0 to 10 {
186    affine.store %cf7, %m[%i0] : memref<10xf32>
187    affine.for %i1 = 0 to 10 {
188      affine.for %i2 = 0 to 10 {
189        // CHECK: affine.load %{{[0-9]+}}
190        %v0 = affine.load %m[%i2] : memref<10xf32>
191        %v1 = addf %v0, %v0 : f32
192      }
193    }
194  }
195  return
196}
197
198// Forwarding happens here as there is a one-to-one store-load correspondence.
199// CHECK-LABEL: func @store_load_fwd
200func @store_load_fwd() {
201  %cf7 = constant 7.0 : f32
202  %c0 = constant 0 : index
203  %m = alloc() : memref<10xf32>
204  affine.store %cf7, %m[%c0] : memref<10xf32>
205  affine.for %i0 = 0 to 10 {
206    affine.for %i1 = 0 to 10 {
207      affine.for %i2 = 0 to 10 {
208        // CHECK-NOT: affine.load %{{[0-9]}}+
209        %v0 = affine.load %m[%c0] : memref<10xf32>
210        %v1 = addf %v0, %v0 : f32
211      }
212    }
213  }
214  return
215}
216
217// Although there is a dependence from the second store to the load, it is
218// satisfied by the outer surrounding loop, and does not prevent the first
219// store to be forwarded to the load.
220func @store_load_store_nested_fwd(%N : index) -> f32 {
221  %cf7 = constant 7.0 : f32
222  %cf9 = constant 9.0 : f32
223  %c0 = constant 0 : index
224  %c1 = constant 1 : index
225  %m = alloc() : memref<10xf32>
226  affine.for %i0 = 0 to 10 {
227    affine.store %cf7, %m[%i0] : memref<10xf32>
228    affine.for %i1 = 0 to %N {
229      %v0 = affine.load %m[%i0] : memref<10xf32>
230      %v1 = addf %v0, %v0 : f32
231      %idx = affine.apply affine_map<(d0) -> (d0 + 1)> (%i0)
232      affine.store %cf9, %m[%idx] : memref<10xf32>
233    }
234  }
235  // Due to this load, the memref isn't optimized away.
236  %v3 = affine.load %m[%c1] : memref<10xf32>
237  return %v3 : f32
238// CHECK:       %{{.*}} = alloc() : memref<10xf32>
239// CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
240// CHECK-NEXT:    affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
241// CHECK-NEXT:    affine.for %{{.*}} = 0 to %{{.*}} {
242// CHECK-NEXT:      %{{.*}} = addf %{{.*}}, %{{.*}} : f32
243// CHECK-NEXT:      %{{.*}} = affine.apply [[$MAP4]](%{{.*}})
244// CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
245// CHECK-NEXT:    }
246// CHECK-NEXT:  }
247// CHECK-NEXT:  %{{.*}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
248// CHECK-NEXT:  return %{{.*}} : f32
249}
250
251// CHECK-LABEL: func @should_not_fwd
252func @should_not_fwd(%A: memref<100xf32>, %M : index, %N : index) -> f32 {
253  %cf = constant 0.0 : f32
254  affine.store %cf, %A[%M] : memref<100xf32>
255  // CHECK: affine.load %{{.*}}[%{{.*}}]
256  %v = affine.load %A[%N] : memref<100xf32>
257  return %v : f32
258}
259
260// Can store forward to A[%j, %i], but no forwarding to load on %A[%i, %j]
261// CHECK-LABEL: func @refs_not_known_to_be_equal
262func @refs_not_known_to_be_equal(%A : memref<100 x 100 x f32>, %M : index) {
263  %N = affine.apply affine_map<(d0) -> (d0 + 1)> (%M)
264  %cf1 = constant 1.0 : f32
265  affine.for %i = 0 to 100 {
266  // CHECK: affine.for %[[I:.*]] =
267    affine.for %j = 0 to 100 {
268    // CHECK: affine.for %[[J:.*]] =
269      // CHECK: affine.load %{{.*}}[%[[I]], %[[J]]]
270      %u = affine.load %A[%i, %j] : memref<100x100xf32>
271      // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[J]], %[[I]]]
272      affine.store %cf1, %A[%j, %i] : memref<100x100xf32>
273      // CHECK-NEXT: affine.load %{{.*}}[%[[I]], %[[J]]]
274      %v = affine.load %A[%i, %j] : memref<100x100xf32>
275      // This load should disappear.
276      %w = affine.load %A[%j, %i] : memref<100x100xf32>
277      // CHECK-NEXT: "foo"
278      "foo" (%u, %v, %w) : (f32, f32, f32) -> ()
279    }
280  }
281  return
282}
283
284// The test checks for value forwarding from vector stores to vector loads.
285// The value loaded from %in can directly be stored to %out by eliminating
286// store and load from %tmp.
287func @vector_forwarding(%in : memref<512xf32>, %out : memref<512xf32>) {
288  %tmp = alloc() : memref<512xf32>
289  affine.for %i = 0 to 16 {
290    %ld0 = affine.vector_load %in[32*%i] : memref<512xf32>, vector<32xf32>
291    affine.vector_store %ld0, %tmp[32*%i] : memref<512xf32>, vector<32xf32>
292    %ld1 = affine.vector_load %tmp[32*%i] : memref<512xf32>, vector<32xf32>
293    affine.vector_store %ld1, %out[32*%i] : memref<512xf32>, vector<32xf32>
294  }
295  return
296}
297
298// CHECK-LABEL: func @vector_forwarding
299// CHECK:      affine.for %{{.*}} = 0 to 16 {
300// CHECK-NEXT:   %[[LDVAL:.*]] = affine.vector_load
301// CHECK-NEXT:   affine.vector_store %[[LDVAL]],{{.*}}
302// CHECK-NEXT: }
303