1// RUN: mlir-opt %s -convert-linalg-to-parallel-loops -split-input-file | FileCheck %s
2
3#map0 = affine_map<(d0, d1) -> (d0, d1)>
4func @linalg_generic_sum(%lhs: memref<2x2xf32>,
5                         %rhs: memref<2x2xf32>,
6                         %sum: memref<2x2xf32>) {
7  linalg.generic {
8    indexing_maps = [#map0, #map0, #map0],
9    iterator_types = ["parallel", "parallel"]}
10      ins(%lhs, %rhs : memref<2x2xf32>, memref<2x2xf32>)
11     outs(%sum : memref<2x2xf32>) {
12    ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):   // no predecessors
13      %0 = addf %lhs_in, %rhs_in : f32
14      linalg.yield %0 : f32
15  }
16  return
17}
18// CHECK-LABEL: @linalg_generic_sum
19// CHECK:   (%[[LHS:.*]]:{{.*}}, %[[RHS:.*]]:{{.*}}, %[[SUM:.*]]:{{.*}})
20// CHECK-DAG: %[[C2:.*]] = constant 2
21// CHECK-DAG: %[[C0:.*]] = constant 0
22// CHECK-DAG: %[[C1:.*]] = constant 1
23// CHECK: scf.parallel (%[[I:.*]], %[[J:.*]]) = {{.*}}
24// CHECK:   %[[LHS_ELEM:.*]] = load %[[LHS]][%[[I]], %[[J]]]
25// CHECK:   %[[RHS_ELEM:.*]] = load %[[RHS]][%[[I]], %[[J]]]
26// CHECK:   %[[SUM:.*]] = addf %[[LHS_ELEM]], %[[RHS_ELEM]] : f32
27// CHECK:   store %[[SUM]], %{{.*}}[%[[I]], %[[J]]]
28// CHECK:   scf.yield
29
30// -----
31
32#accesses = [
33  affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
34  affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
35]
36#trait = {
37  iterator_types = ["parallel", "parallel", "reduction", "parallel"],
38  indexing_maps = #accesses
39}
40
41func @lower_outer_parallel(%A: memref<?x?x?x?xf32>, %B: memref<?x?x?xf32>) {
42  linalg.generic #trait
43      ins(%A : memref<?x?x?x?xf32>)
44     outs(%B : memref<?x?x?xf32>) {
45    ^bb0(%a: f32, %b: f32):
46      linalg.yield %a: f32
47  }
48  return
49}
50// CHECK-LABEL: @lower_outer_parallel
51//   CHECK-DAG: %[[C0:.*]] = constant 0
52//   CHECK-DAG: %[[C1:.*]] = constant 1
53//   CHECK-DAG: %[[D0:.*]] = dim %{{.*}}, %c0
54//   CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, %c1
55//   CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, %c2
56//   CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, %c3
57//       CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
58//       CHECK:   scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
59//       CHECK:     scf.parallel (%[[IV3:.*]]) = (%[[C0]]) to (%[[D3]]) step (%[[C1]])
60//       CHECK:       load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
61//       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]
62
63// -----
64
65#accesses = [
66  affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>,
67  affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d4, d5)>
68]
69#trait = {
70  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"],
71  indexing_maps = #accesses
72}
73
74func @lower_mixed_parallel(%A: memref<?x?x?x?x?x?xf32>, %B: memref<?x?x?x?xf32>) {
75  linalg.generic #trait
76      ins(%A : memref<?x?x?x?x?x?xf32>)
77     outs(%B : memref<?x?x?x?xf32>) {
78    ^bb0(%a: f32, %b: f32):
79      linalg.yield %a: f32
80  }
81  return
82}
83// CHECK-LABEL: @lower_mixed_parallel
84//   CHECK-DAG: %[[C0:.*]] = constant 0
85//   CHECK-DAG: %[[C1:.*]] = constant 1
86//   CHECK-DAG: %[[D0:.*]] = dim %{{.*}}, %c0
87//   CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, %c1
88//   CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, %c2
89//   CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, %c3
90//   CHECK-DAG: %[[D4:.*]] = dim %{{.*}}, %c4
91//   CHECK-DAG: %[[D5:.*]] = dim %{{.*}}, %c5
92//       CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
93//       CHECK:   scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
94//       CHECK:     scf.parallel (%[[IV3:.*]], %[[IV4:.*]]) = (%[[C0]], %[[C0]]) to (%[[D3]], %[[D4]]) step (%[[C1]], %[[C1]])
95//       CHECK:       scf.for %[[IV5:.*]] = %[[C0]] to %[[D5]] step %[[C1]]
96//       CHECK:       load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]], %[[IV4]], %[[IV5]]]
97//       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV2]], %[[IV4]], %[[IV5]]]
98