1// RUN: mlir-opt -allow-unregistered-dialect %s -memref-dataflow-opt | FileCheck %s 2 3// CHECK-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0, d1) -> (d1 + 1)> 4// CHECK-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0, d1) -> (d0)> 5// CHECK-DAG: [[$MAP2:#map[0-9]+]] = affine_map<(d0, d1) -> (d1)> 6// CHECK-DAG: [[$MAP3:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 - 1)> 7// CHECK-DAG: [[$MAP4:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)> 8 9// CHECK-LABEL: func @simple_store_load() { 10func @simple_store_load() { 11 %cf7 = constant 7.0 : f32 12 %m = alloc() : memref<10xf32> 13 affine.for %i0 = 0 to 10 { 14 affine.store %cf7, %m[%i0] : memref<10xf32> 15 %v0 = affine.load %m[%i0] : memref<10xf32> 16 %v1 = addf %v0, %v0 : f32 17 } 18 return 19// CHECK: %{{.*}} = constant 7.000000e+00 : f32 20// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 21// CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 22// CHECK-NEXT: } 23// CHECK-NEXT: return 24} 25 26// CHECK-LABEL: func @multi_store_load() { 27func @multi_store_load() { 28 %c0 = constant 0 : index 29 %cf7 = constant 7.0 : f32 30 %cf8 = constant 8.0 : f32 31 %cf9 = constant 9.0 : f32 32 %m = alloc() : memref<10xf32> 33 affine.for %i0 = 0 to 10 { 34 affine.store %cf7, %m[%i0] : memref<10xf32> 35 %v0 = affine.load %m[%i0] : memref<10xf32> 36 %v1 = addf %v0, %v0 : f32 37 affine.store %cf8, %m[%i0] : memref<10xf32> 38 affine.store %cf9, %m[%i0] : memref<10xf32> 39 %v2 = affine.load %m[%i0] : memref<10xf32> 40 %v3 = affine.load %m[%i0] : memref<10xf32> 41 %v4 = mulf %v2, %v3 : f32 42 } 43 return 44// CHECK: %{{.*}} = constant 0 : index 45// CHECK-NEXT: %{{.*}} = constant 7.000000e+00 : f32 46// CHECK-NEXT: %{{.*}} = constant 8.000000e+00 : f32 47// CHECK-NEXT: %{{.*}} = constant 9.000000e+00 : f32 48// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 49// CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 50// CHECK-NEXT: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 51// CHECK-NEXT: } 52// CHECK-NEXT: return 53 54} 55 56// The store-load forwarding can see through affine apply's since it relies on 57// dependence information. 58// CHECK-LABEL: func @store_load_affine_apply 59func @store_load_affine_apply() -> memref<10x10xf32> { 60 %cf7 = constant 7.0 : f32 61 %m = alloc() : memref<10x10xf32> 62 affine.for %i0 = 0 to 10 { 63 affine.for %i1 = 0 to 10 { 64 %t0 = affine.apply affine_map<(d0, d1) -> (d1 + 1)>(%i0, %i1) 65 %t1 = affine.apply affine_map<(d0, d1) -> (d0)>(%i0, %i1) 66 %idx0 = affine.apply affine_map<(d0, d1) -> (d1)> (%t0, %t1) 67 %idx1 = affine.apply affine_map<(d0, d1) -> (d0 - 1)> (%t0, %t1) 68 affine.store %cf7, %m[%idx0, %idx1] : memref<10x10xf32> 69 // CHECK-NOT: affine.load %{{[0-9]+}} 70 %v0 = affine.load %m[%i0, %i1] : memref<10x10xf32> 71 %v1 = addf %v0, %v0 : f32 72 } 73 } 74 // The memref and its stores won't be erased due to this memref return. 75 return %m : memref<10x10xf32> 76// CHECK: %{{.*}} = constant 7.000000e+00 : f32 77// CHECK-NEXT: %{{.*}} = alloc() : memref<10x10xf32> 78// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 79// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 80// CHECK-NEXT: %{{.*}} = affine.apply [[$MAP0]](%{{.*}}, %{{.*}}) 81// CHECK-NEXT: %{{.*}} = affine.apply [[$MAP1]](%{{.*}}, %{{.*}}) 82// CHECK-NEXT: %{{.*}} = affine.apply [[$MAP2]](%{{.*}}, %{{.*}}) 83// CHECK-NEXT: %{{.*}} = affine.apply [[$MAP3]](%{{.*}}, %{{.*}}) 84// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 85// CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 86// CHECK-NEXT: } 87// CHECK-NEXT: } 88// CHECK-NEXT: return %{{.*}} : memref<10x10xf32> 89} 90 91// CHECK-LABEL: func @store_load_nested 92func @store_load_nested(%N : index) { 93 %cf7 = constant 7.0 : f32 94 %m = alloc() : memref<10xf32> 95 affine.for %i0 = 0 to 10 { 96 affine.store %cf7, %m[%i0] : memref<10xf32> 97 affine.for %i1 = 0 to %N { 98 %v0 = affine.load %m[%i0] : memref<10xf32> 99 %v1 = addf %v0, %v0 : f32 100 } 101 } 102 return 103// CHECK: %{{.*}} = constant 7.000000e+00 : f32 104// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 105// CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} { 106// CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 107// CHECK-NEXT: } 108// CHECK-NEXT: } 109// CHECK-NEXT: return 110} 111 112// No forwarding happens here since either of the two stores could be the last 113// writer; store/load forwarding will however be possible here once loop live 114// out SSA scalars are available. 115// CHECK-LABEL: func @multi_store_load_nested_no_fwd 116func @multi_store_load_nested_no_fwd(%N : index) { 117 %cf7 = constant 7.0 : f32 118 %cf8 = constant 8.0 : f32 119 %m = alloc() : memref<10xf32> 120 affine.for %i0 = 0 to 10 { 121 affine.store %cf7, %m[%i0] : memref<10xf32> 122 affine.for %i1 = 0 to %N { 123 affine.store %cf8, %m[%i1] : memref<10xf32> 124 } 125 affine.for %i2 = 0 to %N { 126 // CHECK: %{{[0-9]+}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 127 %v0 = affine.load %m[%i0] : memref<10xf32> 128 %v1 = addf %v0, %v0 : f32 129 } 130 } 131 return 132} 133 134// No forwarding happens here since both stores have a value going into 135// the load. 136// CHECK-LABEL: func @store_load_store_nested_no_fwd 137func @store_load_store_nested_no_fwd(%N : index) { 138 %cf7 = constant 7.0 : f32 139 %cf9 = constant 9.0 : f32 140 %m = alloc() : memref<10xf32> 141 affine.for %i0 = 0 to 10 { 142 affine.store %cf7, %m[%i0] : memref<10xf32> 143 affine.for %i1 = 0 to %N { 144 // CHECK: %{{[0-9]+}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 145 %v0 = affine.load %m[%i0] : memref<10xf32> 146 %v1 = addf %v0, %v0 : f32 147 affine.store %cf9, %m[%i0] : memref<10xf32> 148 } 149 } 150 return 151} 152 153// Forwarding happens here since the last store postdominates all other stores 154// and other forwarding criteria are satisfied. 155// CHECK-LABEL: func @multi_store_load_nested_fwd 156func @multi_store_load_nested_fwd(%N : index) { 157 %cf7 = constant 7.0 : f32 158 %cf8 = constant 8.0 : f32 159 %cf9 = constant 9.0 : f32 160 %cf10 = constant 10.0 : f32 161 %m = alloc() : memref<10xf32> 162 affine.for %i0 = 0 to 10 { 163 affine.store %cf7, %m[%i0] : memref<10xf32> 164 affine.for %i1 = 0 to %N { 165 affine.store %cf8, %m[%i1] : memref<10xf32> 166 } 167 affine.for %i2 = 0 to %N { 168 affine.store %cf9, %m[%i2] : memref<10xf32> 169 } 170 affine.store %cf10, %m[%i0] : memref<10xf32> 171 affine.for %i3 = 0 to %N { 172 // CHECK-NOT: %{{[0-9]+}} = affine.load 173 %v0 = affine.load %m[%i0] : memref<10xf32> 174 %v1 = addf %v0, %v0 : f32 175 } 176 } 177 return 178} 179 180// There is no unique load location for the store to forward to. 181// CHECK-LABEL: func @store_load_no_fwd 182func @store_load_no_fwd() { 183 %cf7 = constant 7.0 : f32 184 %m = alloc() : memref<10xf32> 185 affine.for %i0 = 0 to 10 { 186 affine.store %cf7, %m[%i0] : memref<10xf32> 187 affine.for %i1 = 0 to 10 { 188 affine.for %i2 = 0 to 10 { 189 // CHECK: affine.load %{{[0-9]+}} 190 %v0 = affine.load %m[%i2] : memref<10xf32> 191 %v1 = addf %v0, %v0 : f32 192 } 193 } 194 } 195 return 196} 197 198// Forwarding happens here as there is a one-to-one store-load correspondence. 199// CHECK-LABEL: func @store_load_fwd 200func @store_load_fwd() { 201 %cf7 = constant 7.0 : f32 202 %c0 = constant 0 : index 203 %m = alloc() : memref<10xf32> 204 affine.store %cf7, %m[%c0] : memref<10xf32> 205 affine.for %i0 = 0 to 10 { 206 affine.for %i1 = 0 to 10 { 207 affine.for %i2 = 0 to 10 { 208 // CHECK-NOT: affine.load %{{[0-9]}}+ 209 %v0 = affine.load %m[%c0] : memref<10xf32> 210 %v1 = addf %v0, %v0 : f32 211 } 212 } 213 } 214 return 215} 216 217// Although there is a dependence from the second store to the load, it is 218// satisfied by the outer surrounding loop, and does not prevent the first 219// store to be forwarded to the load. 220func @store_load_store_nested_fwd(%N : index) -> f32 { 221 %cf7 = constant 7.0 : f32 222 %cf9 = constant 9.0 : f32 223 %c0 = constant 0 : index 224 %c1 = constant 1 : index 225 %m = alloc() : memref<10xf32> 226 affine.for %i0 = 0 to 10 { 227 affine.store %cf7, %m[%i0] : memref<10xf32> 228 affine.for %i1 = 0 to %N { 229 %v0 = affine.load %m[%i0] : memref<10xf32> 230 %v1 = addf %v0, %v0 : f32 231 %idx = affine.apply affine_map<(d0) -> (d0 + 1)> (%i0) 232 affine.store %cf9, %m[%idx] : memref<10xf32> 233 } 234 } 235 // Due to this load, the memref isn't optimized away. 236 %v3 = affine.load %m[%c1] : memref<10xf32> 237 return %v3 : f32 238// CHECK: %{{.*}} = alloc() : memref<10xf32> 239// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 240// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 241// CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} { 242// CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 243// CHECK-NEXT: %{{.*}} = affine.apply [[$MAP4]](%{{.*}}) 244// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 245// CHECK-NEXT: } 246// CHECK-NEXT: } 247// CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 248// CHECK-NEXT: return %{{.*}} : f32 249} 250 251// CHECK-LABEL: func @should_not_fwd 252func @should_not_fwd(%A: memref<100xf32>, %M : index, %N : index) -> f32 { 253 %cf = constant 0.0 : f32 254 affine.store %cf, %A[%M] : memref<100xf32> 255 // CHECK: affine.load %{{.*}}[%{{.*}}] 256 %v = affine.load %A[%N] : memref<100xf32> 257 return %v : f32 258} 259 260// Can store forward to A[%j, %i], but no forwarding to load on %A[%i, %j] 261// CHECK-LABEL: func @refs_not_known_to_be_equal 262func @refs_not_known_to_be_equal(%A : memref<100 x 100 x f32>, %M : index) { 263 %N = affine.apply affine_map<(d0) -> (d0 + 1)> (%M) 264 %cf1 = constant 1.0 : f32 265 affine.for %i = 0 to 100 { 266 // CHECK: affine.for %[[I:.*]] = 267 affine.for %j = 0 to 100 { 268 // CHECK: affine.for %[[J:.*]] = 269 // CHECK: affine.load %{{.*}}[%[[I]], %[[J]]] 270 %u = affine.load %A[%i, %j] : memref<100x100xf32> 271 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[J]], %[[I]]] 272 affine.store %cf1, %A[%j, %i] : memref<100x100xf32> 273 // CHECK-NEXT: affine.load %{{.*}}[%[[I]], %[[J]]] 274 %v = affine.load %A[%i, %j] : memref<100x100xf32> 275 // This load should disappear. 276 %w = affine.load %A[%j, %i] : memref<100x100xf32> 277 // CHECK-NEXT: "foo" 278 "foo" (%u, %v, %w) : (f32, f32, f32) -> () 279 } 280 } 281 return 282} 283 284// The test checks for value forwarding from vector stores to vector loads. 285// The value loaded from %in can directly be stored to %out by eliminating 286// store and load from %tmp. 287func @vector_forwarding(%in : memref<512xf32>, %out : memref<512xf32>) { 288 %tmp = alloc() : memref<512xf32> 289 affine.for %i = 0 to 16 { 290 %ld0 = affine.vector_load %in[32*%i] : memref<512xf32>, vector<32xf32> 291 affine.vector_store %ld0, %tmp[32*%i] : memref<512xf32>, vector<32xf32> 292 %ld1 = affine.vector_load %tmp[32*%i] : memref<512xf32>, vector<32xf32> 293 affine.vector_store %ld1, %out[32*%i] : memref<512xf32>, vector<32xf32> 294 } 295 return 296} 297 298// CHECK-LABEL: func @vector_forwarding 299// CHECK: affine.for %{{.*}} = 0 to 16 { 300// CHECK-NEXT: %[[LDVAL:.*]] = affine.vector_load 301// CHECK-NEXT: affine.vector_store %[[LDVAL]],{{.*}} 302// CHECK-NEXT: } 303