1// RUN: mlir-opt %s -allow-unregistered-dialect -test-loop-fusion -test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s
2
3// CHECK-LABEL: func @slice_depth1_loop_nest() {
4func @slice_depth1_loop_nest() {
5  %0 = alloc() : memref<100xf32>
6  %cst = constant 7.000000e+00 : f32
7  affine.for %i0 = 0 to 16 {
8    affine.store %cst, %0[%i0] : memref<100xf32>
9  }
10  affine.for %i1 = 0 to 5 {
11    %1 = affine.load %0[%i1] : memref<100xf32>
12    "prevent.dce"(%1) : (f32) -> ()
13  }
14  // CHECK:      affine.for %[[IV0:.*]] = 0 to 5 {
15  // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[%[[IV0]]] : memref<100xf32>
16  // CHECK-NEXT:   affine.load %{{.*}}[%[[IV0]]] : memref<100xf32>
17  // CHECK-NEXT:   "prevent.dce"(%1) : (f32) -> ()
18  // CHECK-NEXT: }
19  // CHECK-NEXT: return
20  return
21}
22
23// -----
24
25// CHECK-LABEL: func @should_fuse_reduction_to_pointwise() {
26func @should_fuse_reduction_to_pointwise() {
27  %a = alloc() : memref<10x10xf32>
28  %b = alloc() : memref<10xf32>
29  %c = alloc() : memref<10xf32>
30
31  %cf7 = constant 7.0 : f32
32
33  affine.for %i0 = 0 to 10 {
34    affine.for %i1 = 0 to 10 {
35      %v0 = affine.load %b[%i0] : memref<10xf32>
36      %v1 = affine.load %a[%i0, %i1] : memref<10x10xf32>
37      %v3 = addf %v0, %v1 : f32
38      affine.store %v3, %b[%i0] : memref<10xf32>
39    }
40  }
41  affine.for %i2 = 0 to 10 {
42    %v4 = affine.load %b[%i2] : memref<10xf32>
43    affine.store %v4, %c[%i2] : memref<10xf32>
44  }
45
46  // Match on the fused loop nest.
47  // Should fuse in entire inner loop on %i1 from source loop nest, as %i1
48  // is not used in the access function of the store/load on %b.
49  // CHECK:       affine.for %{{.*}} = 0 to 10 {
50  // CHECK-NEXT:    affine.for %{{.*}} = 0 to 10 {
51  // CHECK-NEXT:      affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
52  // CHECK-NEXT:      affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
53  // CHECK-NEXT:      addf %{{.*}}, %{{.*}} : f32
54  // CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
55  // CHECK-NEXT:    }
56  // CHECK-NEXT:    affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
57  // CHECK-NEXT:    affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
58  // CHECK-NEXT:  }
59  // CHECK-NEXT:  return
60  return
61}
62
63// -----
64
65// CHECK-LABEL: func @should_fuse_avoiding_dependence_cycle() {
66func @should_fuse_avoiding_dependence_cycle() {
67  %a = alloc() : memref<10xf32>
68  %b = alloc() : memref<10xf32>
69  %c = alloc() : memref<10xf32>
70
71  %cf7 = constant 7.0 : f32
72
73  // Set up the following dependences:
74  // 1) loop0 -> loop1 on memref '%{{.*}}'
75  // 2) loop0 -> loop2 on memref '%{{.*}}'
76  // 3) loop1 -> loop2 on memref '%{{.*}}'
77  affine.for %i0 = 0 to 10 {
78    %v0 = affine.load %a[%i0] : memref<10xf32>
79    affine.store %v0, %b[%i0] : memref<10xf32>
80  }
81  affine.for %i1 = 0 to 10 {
82    affine.store %cf7, %a[%i1] : memref<10xf32>
83    %v1 = affine.load %c[%i1] : memref<10xf32>
84    "prevent.dce"(%v1) : (f32) -> ()
85  }
86  affine.for %i2 = 0 to 10 {
87    %v2 = affine.load %b[%i2] : memref<10xf32>
88    affine.store %v2, %c[%i2] : memref<10xf32>
89  }
90  // Fusing loop first loop into last would create a cycle:
91  //   {1} <--> {0, 2}
92  // However, we can avoid the dependence cycle if we first fuse loop0 into
93  // loop1:
94  //   {0, 1) --> {2}
95  // Then fuse this loop nest with loop2:
96  //   {0, 1, 2}
97  //
98  // CHECK:      affine.for %{{.*}} = 0 to 10 {
99  // CHECK-NEXT:   affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
100  // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
101  // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
102  // CHECK-NEXT:   affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
103  // CHECK-NEXT:   "prevent.dce"
104  // CHECK-NEXT:   affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
105  // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
106  // CHECK-NEXT: }
107  // CHECK-NEXT: return
108  return
109}
110