1// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=4,8" | FileCheck %s -check-prefix=VECT
2// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=32,256 test-fastest-varying=1,0" | FileCheck %s
3
4// Permutation maps used in vectorization.
5// CHECK-DAG: #[[$map_id1:map[0-9]+]] = affine_map<(d0) -> (d0)>
6// CHECK-DAG: #[[$map_proj_d0d1_zerod1:map[0-9]+]] = affine_map<(d0, d1) -> (0, d1)>
7// CHECK-DAG: #[[$map_proj_d0d1_d0zero:map[0-9]+]] = affine_map<(d0, d1) -> (d0, 0)>
8// VECT-DAG: #[[$map_id1:map[0-9]+]] = affine_map<(d0) -> (d0)>
9// VECT-DAG: #[[$map_proj_d0d1_zerod1:map[0-9]+]] = affine_map<(d0, d1) -> (0, d1)>
10// VECT-DAG: #[[$map_proj_d0d1_d0zero:map[0-9]+]] = affine_map<(d0, d1) -> (d0, 0)>
11
12func @vec2d(%A : memref<?x?x?xf32>) {
13   %c0 = constant 0 : index
14   %c1 = constant 1 : index
15   %c2 = constant 2 : index
16   %M = dim %A, %c0 : memref<?x?x?xf32>
17   %N = dim %A, %c1 : memref<?x?x?xf32>
18   %P = dim %A, %c2 : memref<?x?x?xf32>
19   // CHECK: for  {{.*}} = 0 to %{{.*}} {
20   // CHECK:   for {{.*}} = 0 to %{{.*}} step 32
21   // CHECK:     for {{.*}} = 0 to %{{.*}} step 256
22   // Example:
23   // affine.for %{{.*}} = 0 to %{{.*}} {
24   //   affine.for %{{.*}} = 0 to %{{.*}} step 32 {
25   //     affine.for %{{.*}} = 0 to %{{.*}} step 256 {
26   //       %{{.*}} = "vector.transfer_read"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
27   affine.for %i0 = 0 to %M {
28     affine.for %i1 = 0 to %N {
29       affine.for %i2 = 0 to %P {
30         %a2 = affine.load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
31       }
32     }
33   }
34   // CHECK: for  {{.*}} = 0 to %{{.*}} {
35   // CHECK:   for  {{.*}} = 0 to %{{.*}} {
36   // CHECK:     for  {{.*}} = 0 to %{{.*}} {
37   // For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
38   // vectorization happens because of loop nesting order .
39   affine.for %i3 = 0 to %M {
40     affine.for %i4 = 0 to %N {
41       affine.for %i5 = 0 to %P {
42         %a5 = affine.load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
43       }
44     }
45   }
46   return
47}
48
49func @vector_add_2d(%M : index, %N : index) -> f32 {
50  %A = alloc (%M, %N) : memref<?x?xf32, 0>
51  %B = alloc (%M, %N) : memref<?x?xf32, 0>
52  %C = alloc (%M, %N) : memref<?x?xf32, 0>
53  %f1 = constant 1.0 : f32
54  %f2 = constant 2.0 : f32
55  affine.for %i0 = 0 to %M {
56    affine.for %i1 = 0 to %N {
57      // CHECK: [[C1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32>
58      // CHECK: vector.transfer_write [[C1]], {{.*}} : vector<32x256xf32>, memref<?x?xf32>
59      // non-scoped %f1
60      affine.store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
61    }
62  }
63  affine.for %i2 = 0 to %M {
64    affine.for %i3 = 0 to %N {
65      // CHECK: [[C3:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32>
66      // CHECK: vector.transfer_write [[C3]], {{.*}}  : vector<32x256xf32>, memref<?x?xf32>
67      // non-scoped %f2
68      affine.store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
69    }
70  }
71  affine.for %i4 = 0 to %M {
72    affine.for %i5 = 0 to %N {
73      // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref<?x?xf32>, vector<32x256xf32>
74      // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref<?x?xf32>, vector<32x256xf32>
75      // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
76      // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32>
77      // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<32x256xf32>
78      // CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32>
79      // CHECK: [[S7:%.*]] = addf [[S5]], [[SPLAT2]] : vector<32x256xf32>
80      // CHECK: [[S8:%.*]] = addf [[S7]], [[S6]] : vector<32x256xf32>
81      // CHECK: vector.transfer_write [[S8]], {{.*}} : vector<32x256xf32>, memref<?x?xf32>
82      //
83      %a5 = affine.load %A[%i4, %i5] : memref<?x?xf32, 0>
84      %b5 = affine.load %B[%i4, %i5] : memref<?x?xf32, 0>
85      %s5 = addf %a5, %b5 : f32
86      // non-scoped %f1
87      %s6 = addf %s5, %f1 : f32
88      // non-scoped %f2
89      %s7 = addf %s5, %f2 : f32
90      // diamond dependency.
91      %s8 = addf %s7, %s6 : f32
92      affine.store %s8, %C[%i4, %i5] : memref<?x?xf32, 0>
93    }
94  }
95  %c7 = constant 7 : index
96  %c42 = constant 42 : index
97  %res = affine.load %C[%c7, %c42] : memref<?x?xf32, 0>
98  return %res : f32
99}
100
101// VECT-LABEL: func @vectorize_matmul
102func @vectorize_matmul(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
103  %c0 = constant 0 : index
104  %c1 = constant 1 : index
105  %M = dim %arg0, %c0 : memref<?x?xf32>
106  %K = dim %arg0, %c1 : memref<?x?xf32>
107  %N = dim %arg2, %c1 : memref<?x?xf32>
108  //      VECT: %[[C0:.*]] = constant 0 : index
109  // VECT-NEXT: %[[C1:.*]] = constant 1 : index
110  // VECT-NEXT: %[[M:.*]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
111  // VECT-NEXT: %[[K:.*]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
112  // VECT-NEXT: %[[N:.*]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
113  //      VECT: {{.*}} #[[$map_id1]](%[[M]]) step 4 {
114  // VECT-NEXT:   {{.*}} #[[$map_id1]](%[[N]]) step 8 {
115  //      VECT:     %[[VC0:.*]] = constant dense<0.000000e+00> : vector<4x8xf32>
116  // VECT-NEXT:     vector.transfer_write %[[VC0]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x8xf32>, memref<?x?xf32>
117  affine.for %i0 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) {
118    affine.for %i1 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) {
119      %cst = constant 0.000000e+00 : f32
120      affine.store %cst, %arg2[%i0, %i1] : memref<?x?xf32>
121    }
122  }
123  //      VECT:  affine.for %[[I2:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[M]]) step 4 {
124  // VECT-NEXT:    affine.for %[[I3:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[N]]) step 8 {
125  // VECT-NEXT:      affine.for %[[I4:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[K]]) {
126  //      VECT:        %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {permutation_map = #[[$map_proj_d0d1_zerod1]]} : memref<?x?xf32>, vector<4x8xf32>
127  //      VECT:        %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {permutation_map = #[[$map_proj_d0d1_d0zero]]} : memref<?x?xf32>, vector<4x8xf32>
128  // VECT-NEXT:        %[[C:.*]] = mulf %[[B]], %[[A]] : vector<4x8xf32>
129  //      VECT:        %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]], %{{.*}} : memref<?x?xf32>, vector<4x8xf32>
130  // VECT-NEXT:        %[[E:.*]] = addf %[[D]], %[[C]] : vector<4x8xf32>
131  //      VECT:        vector.transfer_write %[[E]], %{{.*}}[%[[I2]], %[[I3]]] : vector<4x8xf32>, memref<?x?xf32>
132  affine.for %i2 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) {
133    affine.for %i3 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) {
134      affine.for %i4 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%K) {
135        %6 = affine.load %arg1[%i4, %i3] : memref<?x?xf32>
136        %7 = affine.load %arg0[%i2, %i4] : memref<?x?xf32>
137        %8 = mulf %7, %6 : f32
138        %9 = affine.load %arg2[%i2, %i3] : memref<?x?xf32>
139        %10 = addf %9, %8 : f32
140        affine.store %10, %arg2[%i2, %i3] : memref<?x?xf32>
141      }
142    }
143  }
144  return
145}
146