1// RUN: mlir-opt %s -linalg-fold-unit-extent-dims -split-input-file | FileCheck %s
2
3#accesses = [
4  affine_map<(i, j, k, l, m) -> (i, k, m)>,
5  affine_map<(i, j, k, l, m) -> (i, k, j, l, m)>
6]
7
8#trait = {
9  iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
10  indexing_maps = #accesses,
11  library_call = "some_external_func"
12}
13
14func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>) -> tensor<?x1x?x1x?xf32>
15{
16  %0 = linalg.generic #trait
17    ins(%arg0 : tensor<?x1x?xf32>) {
18       ^bb0(%arg1 : f32) :
19         linalg.yield %arg1 : f32
20       } -> tensor<?x1x?x1x?xf32>
21  return %0 : tensor<?x1x?x1x?xf32>
22}
23//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
24//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d2)>
25//   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
26//   CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
27//   CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1)>
28//   CHECK-DAG: #[[$MAP5:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d2, d3)>
29//   CHECK-DAG: #[[$MAP6:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d4)>
30// CHECK-LABEL: func @drop_one_trip_loops
31//       CHECK: linalg.tensor_reshape %{{.*}} [#[[$MAP0]], #[[$MAP1]]]
32//       CHECK: linalg.generic
33//  CHECK-SAME:   indexing_maps = [#[[$MAP2]], #[[$MAP3]]]
34//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel"]
35//       CHECK: linalg.tensor_reshape %{{.*}} [#[[$MAP4]], #[[$MAP5]], #[[$MAP6]]]
36
37// -----
38
39#accesses = [
40  affine_map<(i, j, k, l, m) -> (i, k, m)>,
41  affine_map<(i, j, k, l, m) -> (i, k, j, l, m)>
42]
43
44#trait = {
45  iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
46  indexing_maps = #accesses,
47  library_call = "some_external_func"
48}
49
50func @drop_one_trip_loops_indexed_generic
51  (%arg0 : tensor<?x1x?xi32>) -> tensor<?x1x?x1x?xi32>
52{
53  %0 = linalg.indexed_generic #trait
54    ins(%arg0 : tensor<?x1x?xi32>) {
55       ^bb0(%arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
56            %arg5 : index, %arg6 : i32) :
57	 %1 = addi %arg1, %arg2 : index
58	 %2 = addi %1, %arg3 : index
59	 %3 = addi %2, %arg4 : index
60	 %4 = addi %3, %arg5 : index
61	 %5 = index_cast %4 : index to i32
62	 %6 = addi %5, %arg6 : i32
63         linalg.yield %6 : i32
64       } -> tensor<?x1x?x1x?xi32>
65  return %0 : tensor<?x1x?x1x?xi32>
66}
67// CHECK-LABEL: func @drop_one_trip_loops_indexed_generic
68//       CHECK:   linalg.indexed_generic
69//       CHECK:   ^{{.+}}(
70//  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index
71//  CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]: index, %[[ARG4:[a-zA-Z0-9]+]]: i32)
72//       CHECK:     %[[T3:.+]] = addi %[[ARG1]], %[[ARG2]]
73//       CHECK:     %[[T4:.+]] = addi %[[T3]], %[[ARG3]]
74//       CHECK:     %[[T5:.+]] = index_cast %[[T4]] : index to i32
75//       CHECK:     %[[T6:.+]] = addi %[[T5]], %[[ARG4]] : i32
76//       CHECK:     linalg.yield %[[T6]] : i32
77
78// -----
79
80#map0 = affine_map<(i, j) -> (i, j)>
81#access = [#map0, #map0]
82#trait = {
83  iterator_types = ["parallel", "parallel"],
84  indexing_maps = #access,
85  library_call = "some_external_func"
86}
87
88func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32>
89{
90  %0 = linalg.generic #trait
91    ins(%arg0 : tensor<1x1xf32>) {
92       ^bb0(%arg1: f32) :
93         linalg.yield %arg1 : f32
94       } -> tensor<1x1xf32>
95  return %0 : tensor<1x1xf32>
96}
97//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<() -> ()>
98// CHECK-LABEL: func @drop_all_loops
99//       CHECK:   linalg.tensor_reshape %{{.*}} []
100//       CHECK:   linalg.generic
101//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP0]]]
102//  CHECK-SAME:     iterator_types = []
103
104// -----
105
106#map0 = affine_map<(i, j) -> (i, j)>
107#access = [#map0, #map0]
108#trait = {
109  iterator_types = ["parallel", "parallel"],
110  indexing_maps = #access,
111  library_call = "some_external_func"
112}
113
114func @drop_all_loops_indexed_generic
115  (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>
116{
117  %0 = linalg.indexed_generic #trait
118    ins(%arg0 : tensor<1x1xi32>) {
119       ^bb0(%arg1 : index, %arg2 : index, %arg3: i32) :
120         %1 = addi %arg1, %arg2 : index
121	 %2 = index_cast %1 : index to i32
122	 %3 = addi %2, %arg3 : i32
123         linalg.yield %3 : i32
124       } -> tensor<1x1xi32>
125  return %0 : tensor<1x1xi32>
126}
127
128// CHECK-LABEL: func @drop_all_loops_indexed_generic
129//       CHECK:   linalg.indexed_generic
130//       CHECK:   ^{{.+}}(%[[ARG1:.+]]: i32)
131//       CHECK:     linalg.yield %[[ARG1]] : i32
132
133// -----
134
135#accesses = [
136  affine_map<(d0) -> (0, d0)>,
137  affine_map<(d0) -> (d0)>
138]
139
140#trait = {
141  indexing_maps = #accesses,
142  iterator_types = ["parallel"],
143  library_call = "some_external_fn"
144}
145
146func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>) -> tensor<5xf32> {
147  %0 = linalg.generic #trait
148    ins(%arg0 : tensor<1x5xf32>) {
149  ^bb0(%arg2: f32):     // no predecessors
150    linalg.yield %arg2 : f32
151  } -> tensor<5xf32>
152  return %0 : tensor<5xf32>
153}
154//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
155// CHECK-LABEL: func @leading_dim_1_canonicalization
156//       CHECK:   linalg.tensor_reshape %{{.*}} [#[[$MAP0]]]
157//       CHECK:   linalg.generic
158//  CHECK-SAME:     indexing_maps = [#[[$MAP1]], #[[$MAP1]]]
159//  CHECK-SAME:     iterator_types = ["parallel"]
160
161// -----
162
163#accesses = [
164  affine_map<(d0, d1) -> (0, d1)>,
165  affine_map<(d0, d1) -> (d0, 0)>,
166  affine_map<(d0, d1) -> (d0, d1)>
167]
168
169#trait = {
170  indexing_maps = #accesses,
171  iterator_types = ["parallel", "parallel"],
172  library_call = "some_external_fn"
173}
174
175func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) -> tensor<5x5xf32>
176{
177  %0 = linalg.tensor_reshape %arg0 [affine_map<(d0, d1) -> (d0, d1)>] :
178       tensor<5xf32> into tensor<1x5xf32>
179  %1 = linalg.tensor_reshape %arg1 [affine_map<(d0, d1) -> (d0, d1)>] :
180       tensor<5xf32> into tensor<5x1xf32>
181  %2 = linalg.generic #trait
182    ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>) {
183       ^bb0(%arg2: f32, %arg3: f32):
184         %3 = addf %arg2, %arg3 : f32
185         linalg.yield %3 : f32
186       } -> tensor<5x5xf32>
187  return %2 : tensor<5x5xf32>
188}
189//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1)>
190//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
191//   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
192// CHECK-LABEL: func @broadcast_test
193//   CHECK-NOT:   linalg.tensor_reshape
194//       CHECK:   linalg.generic
195//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
196//  CHECK-SAME:     iterator_types = ["parallel", "parallel"]
197//   CHECK-NOT:   linalg.tensor_reshape
198
199// -----
200
201#accesses = [
202  affine_map<(d0, d1) -> (0, 0)>,
203  affine_map<(d0, d1) -> (d0, d1)>
204]
205
206#trait = {
207  indexing_maps = #accesses,
208  iterator_types = ["parallel", "parallel"],
209  library_call = "some_external_fn"
210}
211
212func @broadcast_scalar(%arg0 : tensor<1x1xf32>) -> tensor<?x?xf32>
213{
214   %0 = linalg.generic #trait
215    ins(%arg0 : tensor<1x1xf32>) {
216      ^bb0(%arg1 : f32):
217        linalg.yield %arg1 : f32
218   } -> tensor<?x?xf32>
219   return %0 : tensor<?x?xf32>
220}
221//   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> ()>
222//   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
223// CHECK-LABEL: func @broadcast_scalar
224//  CHECK-SAME:   %[[ARG0:.*]]: tensor<1x1xf32>
225//       CHECK:   %[[A:.*]] = linalg.tensor_reshape %[[ARG0]] []
226//  CHECK-SAME:     tensor<1x1xf32> into tensor<f32>
227//       CHECK:   linalg.generic
228//  CHECK-SAME:     indexing_maps = [#[[$MAP0]], #[[$MAP1]]]
229//  CHECK-SAME:     iterator_types = ["parallel", "parallel"]
230//  CHECK-SAME:     %[[A]]
231
232// -----
233
234//       CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
235//       CHECK: func @fold_reshape
236//       CHECK: linalg.tensor_reshape %{{.*}} [#[[MAP0]]]
237//  CHECK-SAME:   tensor<2048xf32> into tensor<4x512xf32>
238func @fold_reshape(%arg0 : tensor<2048xf32>) -> tensor<4x512xf32>
239{
240  %0 = linalg.tensor_reshape %arg0
241    [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>]
242    : tensor<2048xf32> into tensor<1x4x1x512xf32>
243  %1 = linalg.tensor_reshape %0
244    [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
245     affine_map<(d0, d1, d2, d3) -> (d3)>]
246    : tensor<1x4x1x512xf32> into tensor<4x512xf32>
247  return %1 : tensor<4x512xf32>
248}
249
250// -----
251
252//       CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
253//       CHECK: func @fold_reshape
254//       CHECK: linalg.tensor_reshape %{{.*}} [#[[MAP0]]]
255//  CHECK-SAME:   tensor<4x512xf32> into tensor<2048xf32>
256func @fold_reshape(%arg0 : tensor<4x512xf32>) -> tensor<2048xf32>
257{
258  %0 = linalg.tensor_reshape %arg0
259    [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
260     affine_map<(d0, d1, d2, d3) -> (d3)>]
261    : tensor<4x512xf32> into tensor<1x4x1x512xf32>
262  %1 = linalg.tensor_reshape %0
263    [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>]
264    : tensor<1x4x1x512xf32> into tensor<2048xf32>
265  return %1 : tensor<2048xf32>
266}
267
268// -----
269
270//   CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
271//   CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2)>
272//       CHECK: func @fold_reshape
273//       CHECK: linalg.tensor_reshape %{{.*}} [#[[MAP0]], #[[MAP1]]]
274//  CHECK-SAME:   tensor<2048x1xf32> into tensor<4x512x1xf32>
275func @fold_reshape(%arg0 : tensor<2048x1xf32>) -> tensor<4x512x1xf32>
276{
277  %0 = linalg.tensor_reshape %arg0
278    [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>,
279     affine_map<(d0, d1, d2, d3, d4) -> (d4)>]
280    : tensor<2048x1xf32> into tensor<1x4x1x512x1xf32>
281  %1 = linalg.tensor_reshape %0
282    [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
283     affine_map<(d0, d1, d2, d3, d4) -> (d3)>,
284     affine_map<(d0, d1, d2, d3, d4) -> (d4)>]
285    : tensor<1x4x1x512x1xf32> into tensor<4x512x1xf32>
286  return %1 : tensor<4x512x1xf32>
287}
288
289// -----
290
291//   CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1)>
292//   CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d2)>
293//   CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d3, d4)>
294//       CHECK: func @fold_reshape
295//       CHECK: linalg.tensor_reshape %{{.*}} [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
296//  CHECK-SAME:   tensor<2048x1x2048xf32> into tensor<4x512x1x512x4xf32>
297func @fold_reshape(%arg0 : tensor<2048x1x2048xf32>) -> tensor<4x512x1x512x4xf32>
298{
299  %0 = linalg.tensor_reshape %arg0
300    [affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1, d2, d3, d4)>,
301     affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d5)>,
302     affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d6, d7, d8)>]
303    : tensor<2048x1x2048xf32> into tensor<1x4x1x512x1x1x512x1x4xf32>
304  %1 = linalg.tensor_reshape %0
305    [affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1, d2)>,
306     affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d3, d4)>,
307     affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d5)>,
308     affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d6, d7)>,
309     affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d8)>]
310    : tensor<1x4x1x512x1x1x512x1x4xf32> into tensor<4x512x1x512x4xf32>
311  return %1 : tensor<4x512x1x512x4xf32>
312}
313
314// -----
315
316//   CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
317//       CHECK: func @fold_reshape
318//       CHECK: linalg.tensor_reshape %{{.*}} [#[[MAP0]]
319//  CHECK-SAME:   tensor<2xf32> into tensor<2x1xf32>
320func @fold_reshape(%arg0: tensor<2xf32>) -> tensor<2x1xf32>
321{
322  %0 = linalg.tensor_reshape %arg0 [affine_map<(d0, d1, d2) -> (d0, d1, d2)>] : tensor<2xf32> into tensor<2x1x1xf32>
323  %1 = linalg.tensor_reshape %0
324  [affine_map<(d0, d1, d2) -> (d0)>,
325   affine_map<(d0, d1, d2) -> (d1, d2)>
326  ] : tensor<2x1x1xf32> into tensor<2x1xf32>
327  return %1 : tensor<2x1xf32>
328}
329