1// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full" | FileCheck %s --check-prefix UNROLL-FULL
2// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
3// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
4// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1
5
6// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
7// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
8// UNROLL-FULL-DAG: [[$MAP2:#map[0-9]+]] = affine_map<(d0) -> (d0 + 3)>
9// UNROLL-FULL-DAG: [[$MAP3:#map[0-9]+]] = affine_map<(d0) -> (d0 + 4)>
10// UNROLL-FULL-DAG: [[$MAP4:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 + 1)>
11// UNROLL-FULL-DAG: [[$MAP5:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 + 3)>
12// UNROLL-FULL-DAG: [[$MAP6:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)>
13
14// SHORT-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
15
16// UNROLL-BY-4-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
17// UNROLL-BY-4-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
18// UNROLL-BY-4-DAG: [[$MAP2:#map[0-9]+]] = affine_map<(d0) -> (d0 + 3)>
19// UNROLL-BY-4-DAG: [[$MAP3:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 + 1)>
20// UNROLL-BY-4-DAG: [[$MAP4:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 + 3)>
21// UNROLL-BY-4-DAG: [[$MAP5:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)>
22// UNROLL-BY-4-DAG: [[$MAP6:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 16 + d1)>
23// UNROLL-BY-4-DAG: [[$MAP11:#map[0-9]+]] = affine_map<(d0) -> (d0)>
24// UNROLL-BY-4-DAG: [[$MAP_TRIP_COUNT_MULTIPLE_FOUR:#map[0-9]+]] = affine_map<()[s0, s1, s2] -> (s0 + ((-s0 + s1) floordiv 4) * 4, s0 + ((-s0 + s2) floordiv 4) * 4, s0 + ((-s0) floordiv 4) * 4 + 1024)>
25
26// UNROLL-FULL-LABEL: func @loop_nest_simplest() {
27func @loop_nest_simplest() {
28  // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
29  affine.for %i = 0 to 100 step 2 {
30    // UNROLL-FULL: %c1_i32 = constant 1 : i32
31    // UNROLL-FULL-NEXT: %c1_i32_0 = constant 1 : i32
32    // UNROLL-FULL-NEXT: %c1_i32_1 = constant 1 : i32
33    // UNROLL-FULL-NEXT: %c1_i32_2 = constant 1 : i32
34    affine.for %j = 0 to 4 {
35      %x = constant 1 : i32
36    }
37  }       // UNROLL-FULL:  }
38  return  // UNROLL-FULL:  return
39}         // UNROLL-FULL }
40
41// UNROLL-FULL-LABEL: func @loop_nest_simple_iv_use() {
42func @loop_nest_simple_iv_use() {
43  // UNROLL-FULL: %c0 = constant 0 : index
44  // UNROLL-FULL-NEXT: affine.for %arg0 = 0 to 100 step 2 {
45  affine.for %i = 0 to 100 step 2 {
46    // UNROLL-FULL: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
47    // UNROLL-FULL: %1 = affine.apply [[$MAP0]](%c0)
48    // UNROLL-FULL-NEXT:  %2 = "addi32"(%1, %1) : (index, index) -> i32
49    // UNROLL-FULL: %3 = affine.apply [[$MAP1]](%c0)
50    // UNROLL-FULL-NEXT:  %4 = "addi32"(%3, %3) : (index, index) -> i32
51    // UNROLL-FULL: %5 = affine.apply [[$MAP2]](%c0)
52    // UNROLL-FULL-NEXT:  %6 = "addi32"(%5, %5) : (index, index) -> i32
53    affine.for %j = 0 to 4 {
54      %x = "addi32"(%j, %j) : (index, index) -> i32
55    }
56  }       // UNROLL-FULL:  }
57  return  // UNROLL-FULL:  return
58}         // UNROLL-FULL }
59
60// Operations in the loop body have results that are used therein.
61// UNROLL-FULL-LABEL: func @loop_nest_body_def_use() {
62func @loop_nest_body_def_use() {
63  // UNROLL-FULL: %c0 = constant 0 : index
64  // UNROLL-FULL-NEXT: affine.for %arg0 = 0 to 100 step 2 {
65  affine.for %i = 0 to 100 step 2 {
66    // UNROLL-FULL: %c0_0 = constant 0 : index
67    %c0 = constant 0 : index
68    // UNROLL-FULL:      %0 = affine.apply [[$MAP0]](%c0)
69    // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %c0_0) : (index, index) -> index
70    // UNROLL-FULL-NEXT: %2 = affine.apply [[$MAP0]](%c0)
71    // UNROLL-FULL-NEXT: %3 = affine.apply [[$MAP0]](%2)
72    // UNROLL-FULL-NEXT: %4 = "addi32"(%3, %c0_0) : (index, index) -> index
73    // UNROLL-FULL-NEXT: %5 = affine.apply [[$MAP1]](%c0)
74    // UNROLL-FULL-NEXT: %6 = affine.apply [[$MAP0]](%5)
75    // UNROLL-FULL-NEXT: %7 = "addi32"(%6, %c0_0) : (index, index) -> index
76    // UNROLL-FULL-NEXT: %8 = affine.apply [[$MAP2]](%c0)
77    // UNROLL-FULL-NEXT: %9 = affine.apply [[$MAP0]](%8)
78    // UNROLL-FULL-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
79    affine.for %j = 0 to 4 {
80      %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
81        (index) -> (index)
82      %y = "addi32"(%x, %c0) : (index, index) -> index
83    }
84  }       // UNROLL-FULL:  }
85  return  // UNROLL-FULL:  return
86}         // UNROLL-FULL }
87
88// UNROLL-FULL-LABEL: func @loop_nest_strided() {
89func @loop_nest_strided() {
90  // UNROLL-FULL: %c2 = constant 2 : index
91  // UNROLL-FULL-NEXT: %c2_0 = constant 2 : index
92  // UNROLL-FULL-NEXT: affine.for %arg0 = 0 to 100 {
93  affine.for %i = 0 to 100 {
94    // UNROLL-FULL:      %0 = affine.apply [[$MAP0]](%c2_0)
95    // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
96    // UNROLL-FULL-NEXT: %2 = affine.apply [[$MAP1]](%c2_0)
97    // UNROLL-FULL-NEXT: %3 = affine.apply [[$MAP0]](%2)
98    // UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
99    affine.for %j = 2 to 6 step 2 {
100      %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
101        (index) -> (index)
102      %y = "addi32"(%x, %x) : (index, index) -> index
103    }
104    // UNROLL-FULL:      %5 = affine.apply [[$MAP0]](%c2)
105    // UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
106    // UNROLL-FULL-NEXT: %7 = affine.apply [[$MAP1]](%c2)
107    // UNROLL-FULL-NEXT: %8 = affine.apply [[$MAP0]](%7)
108    // UNROLL-FULL-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> index
109    // UNROLL-FULL-NEXT: %10 = affine.apply [[$MAP3]](%c2)
110    // UNROLL-FULL-NEXT: %11 = affine.apply [[$MAP0]](%10)
111    // UNROLL-FULL-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
112    affine.for %k = 2 to 7 step 2 {
113      %z = "affine.apply" (%k) { map = affine_map<(d0) -> (d0 + 1)> } :
114        (index) -> (index)
115      %w = "addi32"(%z, %z) : (index, index) -> index
116    }
117  }       // UNROLL-FULL:  }
118  return  // UNROLL-FULL:  return
119}         // UNROLL-FULL }
120
121// UNROLL-FULL-LABEL: func @loop_nest_multiple_results() {
122func @loop_nest_multiple_results() {
123  // UNROLL-FULL: %c0 = constant 0 : index
124  // UNROLL-FULL-NEXT: affine.for %arg0 = 0 to 100 {
125  affine.for %i = 0 to 100 {
126    // UNROLL-FULL: %0 = affine.apply [[$MAP4]](%arg0, %c0)
127    // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
128    // UNROLL-FULL-NEXT: %2 = affine.apply #map{{.*}}(%arg0, %c0)
129    // UNROLL-FULL-NEXT: %3:2 = "fma"(%2, %0, %0) : (index, index, index) -> (index, index)
130    // UNROLL-FULL-NEXT: %4 = affine.apply #map{{.*}}(%c0)
131    // UNROLL-FULL-NEXT: %5 = affine.apply #map{{.*}}(%arg0, %4)
132    // UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
133    // UNROLL-FULL-NEXT: %7 = affine.apply #map{{.*}}(%arg0, %4)
134    // UNROLL-FULL-NEXT: %8:2 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
135    affine.for %j = 0 to 2 step 1 {
136      %x = affine.apply affine_map<(d0, d1) -> (d0 + 1)> (%i, %j)
137      %y = "addi32"(%x, %x) : (index, index) -> index
138      %z = affine.apply affine_map<(d0, d1) -> (d0 + 3)> (%i, %j)
139      %w:2 = "fma"(%z, %x, %x) : (index, index, index) -> (index, index)
140    }
141  }       // UNROLL-FULL:  }
142  return  // UNROLL-FULL:  return
143}         // UNROLL-FULL }
144
145
146// Imperfect loop nest. Unrolling innermost here yields a perfect nest.
147// UNROLL-FULL-LABEL: func @loop_nest_seq_imperfect(%arg0: memref<128x128xf32>) {
148func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
149  // UNROLL-FULL: %c0 = constant 0 : index
150  // UNROLL-FULL-NEXT: %c128 = constant 128 : index
151  %c128 = constant 128 : index
152  // UNROLL-FULL: affine.for %arg1 = 0 to 100 {
153  affine.for %i = 0 to 100 {
154    // UNROLL-FULL: %0 = "vld"(%arg1) : (index) -> i32
155    %ld = "vld"(%i) : (index) -> i32
156    // UNROLL-FULL: %1 = affine.apply [[$MAP0]](%c0)
157    // UNROLL-FULL-NEXT: %2 = "vmulf"(%c0, %1) : (index, index) -> index
158    // UNROLL-FULL-NEXT: %3 = "vaddf"(%2, %2) : (index, index) -> index
159    // UNROLL-FULL-NEXT: %4 = affine.apply [[$MAP0]](%c0)
160    // UNROLL-FULL-NEXT: %5 = affine.apply [[$MAP0]](%4)
161    // UNROLL-FULL-NEXT: %6 = "vmulf"(%4, %5) : (index, index) -> index
162    // UNROLL-FULL-NEXT: %7 = "vaddf"(%6, %6) : (index, index) -> index
163    // UNROLL-FULL-NEXT: %8 = affine.apply [[$MAP1]](%c0)
164    // UNROLL-FULL-NEXT: %9 = affine.apply [[$MAP0]](%8)
165    // UNROLL-FULL-NEXT: %10 = "vmulf"(%8, %9) : (index, index) -> index
166    // UNROLL-FULL-NEXT: %11 = "vaddf"(%10, %10) : (index, index) -> index
167    // UNROLL-FULL-NEXT: %12 = affine.apply [[$MAP2]](%c0)
168    // UNROLL-FULL-NEXT: %13 = affine.apply [[$MAP0]](%12)
169    // UNROLL-FULL-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
170    // UNROLL-FULL-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
171    affine.for %j = 0 to 4 {
172      %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
173        (index) -> (index)
174       %y = "vmulf"(%j, %x) : (index, index) -> index
175       %z = "vaddf"(%y, %y) : (index, index) -> index
176    }
177    // UNROLL-FULL: %16 = "scale"(%c128, %arg1) : (index, index) -> index
178    %addr = "scale"(%c128, %i) : (index, index) -> index
179    // UNROLL-FULL: "vst"(%16, %arg1) : (index, index) -> ()
180    "vst"(%addr, %i) : (index, index) -> ()
181  }       // UNROLL-FULL }
182  return  // UNROLL-FULL:  return
183}
184
185// UNROLL-FULL-LABEL: func @loop_nest_seq_multiple() {
186func @loop_nest_seq_multiple() {
187  // UNROLL-FULL: c0 = constant 0 : index
188  // UNROLL-FULL-NEXT: %c0_0 = constant 0 : index
189  // UNROLL-FULL-NEXT: %0 = affine.apply [[$MAP0]](%c0_0)
190  // UNROLL-FULL-NEXT: "mul"(%0, %0) : (index, index) -> ()
191  // UNROLL-FULL-NEXT: %1 = affine.apply [[$MAP0]](%c0_0)
192  // UNROLL-FULL-NEXT: %2 = affine.apply [[$MAP0]](%1)
193  // UNROLL-FULL-NEXT: "mul"(%2, %2) : (index, index) -> ()
194  // UNROLL-FULL-NEXT: %3 = affine.apply [[$MAP1]](%c0_0)
195  // UNROLL-FULL-NEXT: %4 = affine.apply [[$MAP0]](%3)
196  // UNROLL-FULL-NEXT: "mul"(%4, %4) : (index, index) -> ()
197  // UNROLL-FULL-NEXT: %5 = affine.apply [[$MAP2]](%c0_0)
198  // UNROLL-FULL-NEXT: %6 = affine.apply [[$MAP0]](%5)
199  // UNROLL-FULL-NEXT: "mul"(%6, %6) : (index, index) -> ()
200  affine.for %j = 0 to 4 {
201    %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
202      (index) -> (index)
203    "mul"(%x, %x) : (index, index) -> ()
204  }
205
206  // UNROLL-FULL: %c99 = constant 99 : index
207  %k = constant 99 : index
208  // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
209  affine.for %m = 0 to 100 step 2 {
210    // UNROLL-FULL: %7 = affine.apply [[$MAP0]](%c0)
211    // UNROLL-FULL-NEXT: %8 = affine.apply [[$MAP6]](%c0)[%c99]
212    // UNROLL-FULL-NEXT: %9 = affine.apply [[$MAP0]](%c0)
213    // UNROLL-FULL-NEXT: %10 = affine.apply [[$MAP0]](%9)
214    // UNROLL-FULL-NEXT: %11 = affine.apply [[$MAP6]](%9)[%c99]
215    // UNROLL-FULL-NEXT: %12 = affine.apply [[$MAP1]](%c0)
216    // UNROLL-FULL-NEXT: %13 = affine.apply [[$MAP0]](%12)
217    // UNROLL-FULL-NEXT: %14 = affine.apply [[$MAP6]](%12)[%c99]
218    // UNROLL-FULL-NEXT: %15 = affine.apply [[$MAP2]](%c0)
219    // UNROLL-FULL-NEXT: %16 = affine.apply [[$MAP0]](%15)
220    // UNROLL-FULL-NEXT: %17 = affine.apply [[$MAP6]](%15)[%c99]
221    affine.for %n = 0 to 4 {
222      %y = "affine.apply" (%n) { map = affine_map<(d0) -> (d0 + 1)> } :
223        (index) -> (index)
224      %z = "affine.apply" (%n, %k) { map = affine_map<(d0) [s0] -> (d0 + s0 + 1)> } :
225        (index, index) -> (index)
226    }     // UNROLL-FULL }
227  }       // UNROLL-FULL }
228  return  // UNROLL-FULL:  return
229}         // UNROLL-FULL }
230
231// UNROLL-FULL-LABEL: func @loop_nest_unroll_full() {
232func @loop_nest_unroll_full() {
233  // UNROLL-FULL-NEXT: %0 = "foo"() : () -> i32
234  // UNROLL-FULL-NEXT: %1 = "bar"() : () -> i32
235  // UNROLL-FULL-NEXT:  return
236  affine.for %i = 0 to 1 {
237    %x = "foo"() : () -> i32
238    %y = "bar"() : () -> i32
239  }
240  return
241} // UNROLL-FULL }
242
243// SHORT-LABEL: func @loop_nest_outer_unroll() {
244func @loop_nest_outer_unroll() {
245  // SHORT:      affine.for %arg0 = 0 to 4 {
246  // SHORT-NEXT:   %0 = affine.apply [[$MAP0]](%arg0)
247  // SHORT-NEXT:   %1 = "addi32"(%0, %0) : (index, index) -> index
248  // SHORT-NEXT: }
249  // SHORT-NEXT: affine.for %arg0 = 0 to 4 {
250  // SHORT-NEXT:   %0 = affine.apply [[$MAP0]](%arg0)
251  // SHORT-NEXT:   %1 = "addi32"(%0, %0) : (index, index) -> index
252  // SHORT-NEXT: }
253  affine.for %i = 0 to 2 {
254    affine.for %j = 0 to 4 {
255      %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
256        (index) -> (index)
257      %y = "addi32"(%x, %x) : (index, index) -> index
258    }
259  }
260  return  // SHORT:  return
261}         // SHORT }
262
263// We are doing a minimal FileCheck here. We just need this test case to
264// successfully run. Both %x and %y will get unrolled here as the min trip
265// count threshold set to 2.
266// SHORT-LABEL: func @loop_nest_seq_long() -> i32 {
267func @loop_nest_seq_long() -> i32 {
268  %A = alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
269  %B = alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
270  %C = alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
271
272  %zero = constant 0 : i32
273  %one = constant 1 : i32
274  %two = constant 2 : i32
275
276  %zero_idx = constant 0 : index
277
278  // CHECK: affine.for %arg0 = 0 to 512
279  affine.for %n0 = 0 to 512 {
280    // CHECK: affine.for %arg1 = 0 to 8
281    affine.for %n1 = 0 to 8 {
282      store %one,  %A[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
283      store %two,  %B[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
284      store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
285    }
286  }
287
288  affine.for %x = 0 to 2 {
289    affine.for %y = 0 to 2 {
290      // CHECK: affine.for
291      affine.for %arg2 = 0 to 8 {
292        // CHECK-NOT: affine.for
293        // CHECK: %{{[0-9]+}} = affine.apply
294        %b2 = "affine.apply" (%y, %arg2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index
295        %z = load %B[%x, %b2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
296        "op1"(%z) : (i32) -> ()
297      }
298      affine.for %j1 = 0 to 8 {
299        affine.for %j2 = 0 to 8 {
300          %a2 = "affine.apply" (%y, %j2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index
301          %v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
302          "op2"(%v203) : (i32) -> ()
303        }
304        affine.for %k2 = 0 to 8 {
305          %s0 = "op3"() : () -> i32
306          %c2 = "affine.apply" (%x, %k2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index
307          %s1 =  load %C[%j1, %c2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
308          %s2 = "addi32"(%s0, %s1) : (i32, i32) -> i32
309          store %s2, %C[%j1, %c2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
310        }
311      }
312      "op4"() : () -> ()
313    }
314  }
315  %ret = load %C[%zero_idx, %zero_idx] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
316  return %ret : i32
317}
318
319// UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
320func @unroll_unit_stride_no_cleanup() {
321  // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
322  affine.for %i = 0 to 100 {
323    // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
324    // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
325    // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
326    // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]+}}([[L1]])
327    // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
328    // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
329    // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]+}}([[L1]])
330    // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
331    // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
332    // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]+}}([[L1]])
333    // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
334    // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
335    // UNROLL-BY-4-NEXT: }
336    affine.for %j = 0 to 8 {
337      %x = "addi32"(%j, %j) : (index, index) -> i32
338      %y = "addi32"(%x, %x) : (i32, i32) -> i32
339    }
340    // empty loop
341    // UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
342    affine.for %k = 0 to 8 {
343    }
344  }
345  return
346}
347
348// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
349func @unroll_unit_stride_cleanup() {
350  // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
351  affine.for %i = 0 to 100 {
352    // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
353    // UNROLL-BY-4-NEXT:   %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
354    // UNROLL-BY-4-NEXT:   %1 = "addi32"(%0, %0) : (i32, i32) -> i32
355    // UNROLL-BY-4-NEXT:   %2 = affine.apply #map{{[0-9]+}}([[L1]])
356    // UNROLL-BY-4-NEXT:   %3 = "addi32"(%2, %2) : (index, index) -> i32
357    // UNROLL-BY-4-NEXT:   %4 = "addi32"(%3, %3) : (i32, i32) -> i32
358    // UNROLL-BY-4-NEXT:   %5 = affine.apply #map{{[0-9]+}}([[L1]])
359    // UNROLL-BY-4-NEXT:   %6 = "addi32"(%5, %5) : (index, index) -> i32
360    // UNROLL-BY-4-NEXT:   %7 = "addi32"(%6, %6) : (i32, i32) -> i32
361    // UNROLL-BY-4-NEXT:   %8 = affine.apply #map{{[0-9]+}}([[L1]])
362    // UNROLL-BY-4-NEXT:   %9 = "addi32"(%8, %8) : (index, index) -> i32
363    // UNROLL-BY-4-NEXT:   %10 = "addi32"(%9, %9) : (i32, i32) -> i32
364    // UNROLL-BY-4-NEXT: }
365    // UNROLL-BY-4-NEXT: for [[L2:%arg[0-9]+]] = 8 to 10 {
366    // UNROLL-BY-4-NEXT:   %0 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
367    // UNROLL-BY-4-NEXT:   %1 = "addi32"(%0, %0) : (i32, i32) -> i32
368    // UNROLL-BY-4-NEXT: }
369    affine.for %j = 0 to 10 {
370      %x = "addi32"(%j, %j) : (index, index) -> i32
371      %y = "addi32"(%x, %x) : (i32, i32) -> i32
372    }
373  }
374  return
375}
376
377// UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
378func @unroll_non_unit_stride_cleanup() {
379  // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
380  affine.for %i = 0 to 100 {
381    // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 2 to 42 step 20 {
382    // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
383    // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
384    // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]+}}([[L1]])
385    // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
386    // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
387    // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]+}}([[L1]])
388    // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
389    // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
390    // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]+}}([[L1]])
391    // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
392    // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
393    // UNROLL-BY-4-NEXT: }
394    // UNROLL-BY-4-NEXT: for [[L2:%arg[0-9]+]] = 42 to 48 step 5 {
395    // UNROLL-BY-4-NEXT: %0 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
396    // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
397    // UNROLL-BY-4-NEXT: }
398    affine.for %j = 2 to 48 step 5 {
399      %x = "addi32"(%j, %j) : (index, index) -> i32
400      %y = "addi32"(%x, %x) : (i32, i32) -> i32
401    }
402  }
403  return
404}
405
406// Both the unrolled loop and the cleanup loop are single iteration loops.
407// UNROLL-BY-4-LABEL: func @loop_nest_single_iteration_after_unroll
408func @loop_nest_single_iteration_after_unroll(%N: index) {
409  // UNROLL-BY-4: %c0 = constant 0 : index
410  // UNROLL-BY-4: %c4 = constant 4 : index
411  // UNROLL-BY-4: affine.for %arg1 = 0 to %arg0 {
412  affine.for %i = 0 to %N {
413    // UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
414    // UNROLL-BY-4-NEXT: %1 = affine.apply [[$MAP0]](%c0)
415    // UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
416    // UNROLL-BY-4-NEXT: %3 = affine.apply [[$MAP1]](%c0)
417    // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32
418    // UNROLL-BY-4-NEXT: %5 = affine.apply [[$MAP2]](%c0)
419    // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
420    // UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
421    // UNROLL-BY-4-NOT: for
422    affine.for %j = 0 to 5 {
423      %x = "addi32"(%j, %j) : (index, index) -> i32
424    } // UNROLL-BY-4-NOT: }
425  } // UNROLL-BY-4:  }
426  return
427}
428
429// Test cases with loop bound operands.
430
431// No cleanup will be generated here.
432// UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
433func @loop_nest_operand1() {
434// UNROLL-BY-4:      affine.for %arg0 = 0 to 100 step 2 {
435// UNROLL-BY-4-NEXT:   affine.for %arg1 = 0 to #map{{[0-9]+}}(%arg0) step 4
436// UNROLL-BY-4-NEXT:      %0 = "foo"() : () -> i32
437// UNROLL-BY-4-NEXT:      %1 = "foo"() : () -> i32
438// UNROLL-BY-4-NEXT:      %2 = "foo"() : () -> i32
439// UNROLL-BY-4-NEXT:      %3 = "foo"() : () -> i32
440// UNROLL-BY-4-NEXT:   }
441// UNROLL-BY-4-NEXT: }
442// UNROLL-BY-4-NEXT: return
443  affine.for %i = 0 to 100 step 2 {
444    affine.for %j = 0 to affine_map<(d0) -> (d0 - d0 mod 4)> (%i) {
445      %x = "foo"() : () -> i32
446    }
447  }
448  return
449}
450
451// No cleanup will be generated here.
452// UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
453func @loop_nest_operand2() {
454// UNROLL-BY-4:      affine.for %arg0 = 0 to 100 step 2 {
455// UNROLL-BY-4-NEXT:   affine.for %arg1 = [[$MAP11]](%arg0) to #map{{[0-9]+}}(%arg0) step 4 {
456// UNROLL-BY-4-NEXT:     %0 = "foo"() : () -> i32
457// UNROLL-BY-4-NEXT:     %1 = "foo"() : () -> i32
458// UNROLL-BY-4-NEXT:     %2 = "foo"() : () -> i32
459// UNROLL-BY-4-NEXT:     %3 = "foo"() : () -> i32
460// UNROLL-BY-4-NEXT:   }
461// UNROLL-BY-4-NEXT: }
462// UNROLL-BY-4-NEXT: return
463  affine.for %i = 0 to 100 step 2 {
464    affine.for %j = affine_map<(d0) -> (d0)> (%i) to affine_map<(d0) -> (5*d0 + 4)> (%i) {
465      %x = "foo"() : () -> i32
466    }
467  }
468  return
469}
470
471// Difference between loop bounds is constant, but not a multiple of unroll
472// factor. The cleanup loop happens to be a single iteration one and is promoted.
473// UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
474func @loop_nest_operand3() {
475  // UNROLL-BY-4: affine.for %arg0 = 0 to 100 step 2 {
476  affine.for %i = 0 to 100 step 2 {
477    // UNROLL-BY-4: affine.for %arg1 = [[$MAP11]](%arg0) to #map{{[0-9]+}}(%arg0) step 4 {
478    // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
479    // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
480    // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
481    // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
482    // UNROLL-BY-4-NEXT: }
483    // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
484    affine.for %j = affine_map<(d0) -> (d0)> (%i) to affine_map<(d0) -> (d0 + 9)> (%i) {
485      %x = "foo"() : () -> i32
486    }
487  } // UNROLL-BY-4: }
488  return
489}
490
491// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound(%arg0: index) {
492func @loop_nest_symbolic_bound(%N : index) {
493  // UNROLL-BY-4: affine.for %arg1 = 0 to 100 {
494  affine.for %i = 0 to 100 {
495    // UNROLL-BY-4: affine.for %arg2 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
496    // UNROLL-BY-4: %0 = "foo"() : () -> i32
497    // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
498    // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
499    // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
500    // UNROLL-BY-4-NEXT: }
501    // A cleanup loop will be be generated here.
502    // UNROLL-BY-4-NEXT: affine.for %arg2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
503    // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
504    // UNROLL-BY-4-NEXT: }
505    affine.for %j = 0 to %N {
506      %x = "foo"() : () -> i32
507    }
508  }
509  return
510}
511
512// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound_with_step
513// UNROLL-BY-4-SAME: %[[N:.*]]: index
514func @loop_nest_symbolic_bound_with_step(%N : index) {
515  // UNROLL-BY-4: affine.for %arg1 = 0 to 100 {
516  affine.for %i = 0 to 100 {
517    affine.for %j = 0 to %N step 3 {
518      %x = "foo"() : () -> i32
519    }
520// UNROLL-BY-4:      affine.for %{{.*}} = 0 to #map{{[0-9]+}}()[%[[N]]] step 12 {
521// UNROLL-BY-4:        "foo"()
522// UNROLL-BY-4-NEXT:   "foo"()
523// UNROLL-BY-4-NEXT:   "foo"()
524// UNROLL-BY-4-NEXT:   "foo"()
525// UNROLL-BY-4-NEXT: }
526// A cleanup loop will be be generated here.
527// UNROLL-BY-4-NEXT: affine.for %{{.*}} = #map{{[0-9]+}}()[%[[N]]] to %[[N]] step 3 {
528// UNROLL-BY-4-NEXT:   "foo"()
529// UNROLL-BY-4-NEXT: }
530  }
531  return
532}
533
534// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
535func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
536  affine.for %i = %M to min affine_map<()[s0, s1] -> (s0, s1, 1024)>()[%N, %K] {
537    "foo"() : () -> ()
538  }
539  return
540}
541// CHECK-NEXT:  affine.for %arg0 = %arg0 to min [[$MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
542// CHECK-NEXT:    "foo"() : () -> ()
543// CHECK-NEXT:    "foo"() : () -> ()
544// CHECK-NEXT:    "foo"() : () -> ()
545// CHECK-NEXT:    "foo"() : () -> ()
546// CHECK-NEXT:  }
547// CHECK-NEXT:  affine.for %arg1 = max [[$MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
548// CHECK-NEXT:    "foo"() : () -> ()
549// CHECK-NEXT:  }
550// CHECK-NEXT:  return
551
552// The trip count here is a multiple of four, but this can be inferred only
553// through composition. Check for no cleanup scf.
554// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound
555func @loop_nest_non_trivial_multiple_upper_bound(%M : index, %N : index) {
556  %T = affine.apply affine_map<(d0) -> (4*d0 + 1)>(%M)
557  %K = affine.apply affine_map<(d0) -> (d0 - 1)> (%T)
558  affine.for %i = 0 to min affine_map<(d0, d1) -> (4 * d0, d1, 1024)>(%N, %K) {
559    "foo"() : () -> ()
560  }
561  return
562}
563// UNROLL-BY-4: affine.for %arg2 = 0 to min
564// UNROLL-BY-4-NOT: for
565// UNROLL-BY-4: return
566
567// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound_alt
568func @loop_nest_non_trivial_multiple_upper_bound_alt(%M : index, %N : index) {
569  %K = affine.apply affine_map<(d0) -> (4*d0)> (%M)
570  affine.for %i = 0 to min affine_map<()[s0, s1] -> (4 * s0, s1, 1024)>()[%N, %K] {
571    "foo"() : () -> ()
572  }
573  // UNROLL-BY-4: affine.for %arg2 = 0 to min
574  // UNROLL-BY-4-NEXT: "foo"
575  // UNROLL-BY-4-NEXT: "foo"
576  // UNROLL-BY-4-NEXT: "foo"
577  // UNROLL-BY-4-NEXT: "foo"
578  // UNROLL-BY-4-NOT for
579  // UNROLL-BY-4: return
580  return
581}
582
583// UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
584func @unroll_by_one_should_promote_single_iteration_loop() {
585  affine.for %i = 0 to 1 {
586    %x = "foo"(%i) : (index) -> i32
587  }
588  return
589// UNROLL-BY-1-NEXT: %c0 = constant 0 : index
590// UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
591// UNROLL-BY-1-NEXT: return
592}
593