1// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=4,8" | FileCheck %s -check-prefix=VECT 2// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=32,256 test-fastest-varying=1,0" | FileCheck %s 3 4// Permutation maps used in vectorization. 5// CHECK-DAG: #[[$map_id1:map[0-9]+]] = affine_map<(d0) -> (d0)> 6// CHECK-DAG: #[[$map_proj_d0d1_zerod1:map[0-9]+]] = affine_map<(d0, d1) -> (0, d1)> 7// CHECK-DAG: #[[$map_proj_d0d1_d0zero:map[0-9]+]] = affine_map<(d0, d1) -> (d0, 0)> 8// VECT-DAG: #[[$map_id1:map[0-9]+]] = affine_map<(d0) -> (d0)> 9// VECT-DAG: #[[$map_proj_d0d1_zerod1:map[0-9]+]] = affine_map<(d0, d1) -> (0, d1)> 10// VECT-DAG: #[[$map_proj_d0d1_d0zero:map[0-9]+]] = affine_map<(d0, d1) -> (d0, 0)> 11 12func @vec2d(%A : memref<?x?x?xf32>) { 13 %c0 = constant 0 : index 14 %c1 = constant 1 : index 15 %c2 = constant 2 : index 16 %M = dim %A, %c0 : memref<?x?x?xf32> 17 %N = dim %A, %c1 : memref<?x?x?xf32> 18 %P = dim %A, %c2 : memref<?x?x?xf32> 19 // CHECK: for {{.*}} = 0 to %{{.*}} { 20 // CHECK: for {{.*}} = 0 to %{{.*}} step 32 21 // CHECK: for {{.*}} = 0 to %{{.*}} step 256 22 // Example: 23 // affine.for %{{.*}} = 0 to %{{.*}} { 24 // affine.for %{{.*}} = 0 to %{{.*}} step 32 { 25 // affine.for %{{.*}} = 0 to %{{.*}} step 256 { 26 // %{{.*}} = "vector.transfer_read"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32> 27 affine.for %i0 = 0 to %M { 28 affine.for %i1 = 0 to %N { 29 affine.for %i2 = 0 to %P { 30 %a2 = affine.load %A[%i0, %i1, %i2] : memref<?x?x?xf32> 31 } 32 } 33 } 34 // CHECK: for {{.*}} = 0 to %{{.*}} { 35 // CHECK: for {{.*}} = 0 to %{{.*}} { 36 // CHECK: for {{.*}} = 0 to %{{.*}} { 37 // For the case: --test-fastest-varying=1 --test-fastest-varying=0 no 38 // vectorization happens because of loop nesting order . 39 affine.for %i3 = 0 to %M { 40 affine.for %i4 = 0 to %N { 41 affine.for %i5 = 0 to %P { 42 %a5 = affine.load %A[%i4, %i5, %i3] : memref<?x?x?xf32> 43 } 44 } 45 } 46 return 47} 48 49func @vector_add_2d(%M : index, %N : index) -> f32 { 50 %A = alloc (%M, %N) : memref<?x?xf32, 0> 51 %B = alloc (%M, %N) : memref<?x?xf32, 0> 52 %C = alloc (%M, %N) : memref<?x?xf32, 0> 53 %f1 = constant 1.0 : f32 54 %f2 = constant 2.0 : f32 55 affine.for %i0 = 0 to %M { 56 affine.for %i1 = 0 to %N { 57 // CHECK: [[C1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32> 58 // CHECK: vector.transfer_write [[C1]], {{.*}} : vector<32x256xf32>, memref<?x?xf32> 59 // non-scoped %f1 60 affine.store %f1, %A[%i0, %i1] : memref<?x?xf32, 0> 61 } 62 } 63 affine.for %i2 = 0 to %M { 64 affine.for %i3 = 0 to %N { 65 // CHECK: [[C3:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32> 66 // CHECK: vector.transfer_write [[C3]], {{.*}} : vector<32x256xf32>, memref<?x?xf32> 67 // non-scoped %f2 68 affine.store %f2, %B[%i2, %i3] : memref<?x?xf32, 0> 69 } 70 } 71 affine.for %i4 = 0 to %M { 72 affine.for %i5 = 0 to %N { 73 // CHECK: [[A5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref<?x?xf32>, vector<32x256xf32> 74 // CHECK: [[B5:%.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} : memref<?x?xf32>, vector<32x256xf32> 75 // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32> 76 // CHECK: [[SPLAT1:%.*]] = constant dense<1.000000e+00> : vector<32x256xf32> 77 // CHECK: [[S6:%.*]] = addf [[S5]], [[SPLAT1]] : vector<32x256xf32> 78 // CHECK: [[SPLAT2:%.*]] = constant dense<2.000000e+00> : vector<32x256xf32> 79 // CHECK: [[S7:%.*]] = addf [[S5]], [[SPLAT2]] : vector<32x256xf32> 80 // CHECK: [[S8:%.*]] = addf [[S7]], [[S6]] : vector<32x256xf32> 81 // CHECK: vector.transfer_write [[S8]], {{.*}} : vector<32x256xf32>, memref<?x?xf32> 82 // 83 %a5 = affine.load %A[%i4, %i5] : memref<?x?xf32, 0> 84 %b5 = affine.load %B[%i4, %i5] : memref<?x?xf32, 0> 85 %s5 = addf %a5, %b5 : f32 86 // non-scoped %f1 87 %s6 = addf %s5, %f1 : f32 88 // non-scoped %f2 89 %s7 = addf %s5, %f2 : f32 90 // diamond dependency. 91 %s8 = addf %s7, %s6 : f32 92 affine.store %s8, %C[%i4, %i5] : memref<?x?xf32, 0> 93 } 94 } 95 %c7 = constant 7 : index 96 %c42 = constant 42 : index 97 %res = affine.load %C[%c7, %c42] : memref<?x?xf32, 0> 98 return %res : f32 99} 100 101// VECT-LABEL: func @vectorize_matmul 102func @vectorize_matmul(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) { 103 %c0 = constant 0 : index 104 %c1 = constant 1 : index 105 %M = dim %arg0, %c0 : memref<?x?xf32> 106 %K = dim %arg0, %c1 : memref<?x?xf32> 107 %N = dim %arg2, %c1 : memref<?x?xf32> 108 // VECT: %[[C0:.*]] = constant 0 : index 109 // VECT-NEXT: %[[C1:.*]] = constant 1 : index 110 // VECT-NEXT: %[[M:.*]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32> 111 // VECT-NEXT: %[[K:.*]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32> 112 // VECT-NEXT: %[[N:.*]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32> 113 // VECT: {{.*}} #[[$map_id1]](%[[M]]) step 4 { 114 // VECT-NEXT: {{.*}} #[[$map_id1]](%[[N]]) step 8 { 115 // VECT: %[[VC0:.*]] = constant dense<0.000000e+00> : vector<4x8xf32> 116 // VECT-NEXT: vector.transfer_write %[[VC0]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x8xf32>, memref<?x?xf32> 117 affine.for %i0 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) { 118 affine.for %i1 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) { 119 %cst = constant 0.000000e+00 : f32 120 affine.store %cst, %arg2[%i0, %i1] : memref<?x?xf32> 121 } 122 } 123 // VECT: affine.for %[[I2:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[M]]) step 4 { 124 // VECT-NEXT: affine.for %[[I3:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[N]]) step 8 { 125 // VECT-NEXT: affine.for %[[I4:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[K]]) { 126 // VECT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {permutation_map = #[[$map_proj_d0d1_zerod1]]} : memref<?x?xf32>, vector<4x8xf32> 127 // VECT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {permutation_map = #[[$map_proj_d0d1_d0zero]]} : memref<?x?xf32>, vector<4x8xf32> 128 // VECT-NEXT: %[[C:.*]] = mulf %[[B]], %[[A]] : vector<4x8xf32> 129 // VECT: %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]], %{{.*}} : memref<?x?xf32>, vector<4x8xf32> 130 // VECT-NEXT: %[[E:.*]] = addf %[[D]], %[[C]] : vector<4x8xf32> 131 // VECT: vector.transfer_write %[[E]], %{{.*}}[%[[I2]], %[[I3]]] : vector<4x8xf32>, memref<?x?xf32> 132 affine.for %i2 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%M) { 133 affine.for %i3 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%N) { 134 affine.for %i4 = affine_map<(d0) -> (d0)>(%c0) to affine_map<(d0) -> (d0)>(%K) { 135 %6 = affine.load %arg1[%i4, %i3] : memref<?x?xf32> 136 %7 = affine.load %arg0[%i2, %i4] : memref<?x?xf32> 137 %8 = mulf %7, %6 : f32 138 %9 = affine.load %arg2[%i2, %i3] : memref<?x?xf32> 139 %10 = addf %9, %8 : f32 140 affine.store %10, %arg2[%i2, %i3] : memref<?x?xf32> 141 } 142 } 143 } 144 return 145} 146