1// RUN: mlir-opt %s -allow-unregistered-dialect -test-loop-fusion -test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s 2 3// CHECK-LABEL: func @slice_depth1_loop_nest() { 4func @slice_depth1_loop_nest() { 5 %0 = alloc() : memref<100xf32> 6 %cst = constant 7.000000e+00 : f32 7 affine.for %i0 = 0 to 16 { 8 affine.store %cst, %0[%i0] : memref<100xf32> 9 } 10 affine.for %i1 = 0 to 5 { 11 %1 = affine.load %0[%i1] : memref<100xf32> 12 "prevent.dce"(%1) : (f32) -> () 13 } 14 // CHECK: affine.for %[[IV0:.*]] = 0 to 5 { 15 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[IV0]]] : memref<100xf32> 16 // CHECK-NEXT: affine.load %{{.*}}[%[[IV0]]] : memref<100xf32> 17 // CHECK-NEXT: "prevent.dce"(%1) : (f32) -> () 18 // CHECK-NEXT: } 19 // CHECK-NEXT: return 20 return 21} 22 23// ----- 24 25// CHECK-LABEL: func @should_fuse_reduction_to_pointwise() { 26func @should_fuse_reduction_to_pointwise() { 27 %a = alloc() : memref<10x10xf32> 28 %b = alloc() : memref<10xf32> 29 %c = alloc() : memref<10xf32> 30 31 %cf7 = constant 7.0 : f32 32 33 affine.for %i0 = 0 to 10 { 34 affine.for %i1 = 0 to 10 { 35 %v0 = affine.load %b[%i0] : memref<10xf32> 36 %v1 = affine.load %a[%i0, %i1] : memref<10x10xf32> 37 %v3 = addf %v0, %v1 : f32 38 affine.store %v3, %b[%i0] : memref<10xf32> 39 } 40 } 41 affine.for %i2 = 0 to 10 { 42 %v4 = affine.load %b[%i2] : memref<10xf32> 43 affine.store %v4, %c[%i2] : memref<10xf32> 44 } 45 46 // Match on the fused loop nest. 47 // Should fuse in entire inner loop on %i1 from source loop nest, as %i1 48 // is not used in the access function of the store/load on %b. 49 // CHECK: affine.for %{{.*}} = 0 to 10 { 50 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 51 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 52 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 53 // CHECK-NEXT: addf %{{.*}}, %{{.*}} : f32 54 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 55 // CHECK-NEXT: } 56 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 57 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 58 // CHECK-NEXT: } 59 // CHECK-NEXT: return 60 return 61} 62 63// ----- 64 65// CHECK-LABEL: func @should_fuse_avoiding_dependence_cycle() { 66func @should_fuse_avoiding_dependence_cycle() { 67 %a = alloc() : memref<10xf32> 68 %b = alloc() : memref<10xf32> 69 %c = alloc() : memref<10xf32> 70 71 %cf7 = constant 7.0 : f32 72 73 // Set up the following dependences: 74 // 1) loop0 -> loop1 on memref '%{{.*}}' 75 // 2) loop0 -> loop2 on memref '%{{.*}}' 76 // 3) loop1 -> loop2 on memref '%{{.*}}' 77 affine.for %i0 = 0 to 10 { 78 %v0 = affine.load %a[%i0] : memref<10xf32> 79 affine.store %v0, %b[%i0] : memref<10xf32> 80 } 81 affine.for %i1 = 0 to 10 { 82 affine.store %cf7, %a[%i1] : memref<10xf32> 83 %v1 = affine.load %c[%i1] : memref<10xf32> 84 "prevent.dce"(%v1) : (f32) -> () 85 } 86 affine.for %i2 = 0 to 10 { 87 %v2 = affine.load %b[%i2] : memref<10xf32> 88 affine.store %v2, %c[%i2] : memref<10xf32> 89 } 90 // Fusing loop first loop into last would create a cycle: 91 // {1} <--> {0, 2} 92 // However, we can avoid the dependence cycle if we first fuse loop0 into 93 // loop1: 94 // {0, 1) --> {2} 95 // Then fuse this loop nest with loop2: 96 // {0, 1, 2} 97 // 98 // CHECK: affine.for %{{.*}} = 0 to 10 { 99 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 100 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 101 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 102 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 103 // CHECK-NEXT: "prevent.dce" 104 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 105 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 106 // CHECK-NEXT: } 107 // CHECK-NEXT: return 108 return 109} 110