1// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ 2// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ 3// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 4// RUN: FileCheck %s 5 6// Illustrates an 8x8 Sparse Matrix x Vector implemented with only operations 7// of the vector dialect (and some std/scf). Essentially, this example performs 8// the following multiplication: 9// 10// 0 1 2 3 4 5 6 7 11// +------------------------+ 12// 0 | 1 0 2 0 0 1 0 1 | | 1 | | 21 | 13// 1 | 1 8 0 0 3 0 1 0 | | 2 | | 39 | 14// 2 | 0 0 1 0 0 2 6 2 | | 3 | | 73 | 15// 3 | 0 3 0 1 0 1 0 1 | x | 4 | = | 24 | 16// 4 | 5 0 0 1 1 1 0 0 | | 5 | | 20 | 17// 5 | 0 3 0 0 2 1 2 0 | | 6 | | 36 | 18// 6 | 4 0 7 0 1 0 1 0 | | 7 | | 37 | 19// 7 | 0 3 0 2 0 0 1 1 | | 8 | | 29 | 20// +------------------------+ 21// 22// The sparse storage scheme used is an extended column scheme (also referred 23// to as jagged diagonal, which is essentially a vector friendly variant of 24// the general sparse row-wise scheme (also called compressed row storage), 25// using fixed length vectors and no explicit pointer indexing into the 26// value array to find the rows. 27// 28// The extended column storage for the matrix shown above is as follows. 29// 30// VALUE INDEX 31// +---------+ +---------+ 32// 0 | 1 2 1 1 | | 0 2 5 7 | 33// 1 | 1 8 3 1 | | 0 1 4 6 | 34// 2 | 1 2 6 2 | | 2 5 6 7 | 35// 3 | 3 1 1 1 | | 1 3 5 7 | 36// 4 | 5 1 1 1 | | 0 3 4 5 | 37// 5 | 3 2 1 2 | | 1 4 5 6 | 38// 6 | 4 7 1 1 | | 0 2 4 6 | 39// 7 | 3 2 1 1 | | 1 3 6 7 | 40// +---------+ +---------+ 41// 42// This example illustrates an effective SAXPY version that operates 43// on the transposed jagged diagonal storage to obtain higher vector 44// lengths. Another example in this directory illustrates a DOT 45// version of the operation. 46 47func @spmv8x8(%AVAL: memref<4xvector<8xf32>>, 48 %AIDX: memref<4xvector<8xi32>>, 49 %X: memref<?xf32>, %B: memref<1xvector<8xf32>>) { 50 %c0 = constant 0 : index 51 %c1 = constant 1 : index 52 %cn = constant 4 : index 53 %mask = vector.constant_mask [8] : vector<8xi1> 54 %b = load %B[%c0] : memref<1xvector<8xf32>> 55 %b_out = scf.for %k = %c0 to %cn step %c1 iter_args(%b_iter = %b) -> (vector<8xf32>) { 56 %aval = load %AVAL[%k] : memref<4xvector<8xf32>> 57 %aidx = load %AIDX[%k] : memref<4xvector<8xi32>> 58 %0 = vector.gather %X, %aidx, %mask : (memref<?xf32>, vector<8xi32>, vector<8xi1>) -> vector<8xf32> 59 %b_new = vector.fma %aval, %0, %b_iter : vector<8xf32> 60 scf.yield %b_new : vector<8xf32> 61 } 62 store %b_out, %B[%c0] : memref<1xvector<8xf32>> 63 return 64} 65 66func @entry() { 67 %c0 = constant 0 : index 68 %c1 = constant 1 : index 69 %c2 = constant 2 : index 70 %c3 = constant 3 : index 71 %c4 = constant 4 : index 72 %c5 = constant 5 : index 73 %c6 = constant 6 : index 74 %c7 = constant 7 : index 75 %c8 = constant 8 : index 76 77 %f0 = constant 0.0 : f32 78 %f1 = constant 1.0 : f32 79 %f2 = constant 2.0 : f32 80 %f3 = constant 3.0 : f32 81 %f4 = constant 4.0 : f32 82 %f5 = constant 5.0 : f32 83 %f6 = constant 6.0 : f32 84 %f7 = constant 7.0 : f32 85 %f8 = constant 8.0 : f32 86 87 %i0 = constant 0 : i32 88 %i1 = constant 1 : i32 89 %i2 = constant 2 : i32 90 %i3 = constant 3 : i32 91 %i4 = constant 4 : i32 92 %i5 = constant 5 : i32 93 %i6 = constant 6 : i32 94 %i7 = constant 7 : i32 95 96 // 97 // Allocate. 98 // 99 100 %AVAL = alloc() {alignment = 64} : memref<4xvector<8xf32>> 101 %AIDX = alloc() {alignment = 64} : memref<4xvector<8xi32>> 102 %X = alloc(%c8) {alignment = 64} : memref<?xf32> 103 %B = alloc() {alignment = 64} : memref<1xvector<8xf32>> 104 105 // 106 // Initialize. 107 // 108 109 %vf1 = vector.broadcast %f1 : f32 to vector<8xf32> 110 111 %0 = vector.insert %f3, %vf1[3] : f32 into vector<8xf32> 112 %1 = vector.insert %f5, %0[4] : f32 into vector<8xf32> 113 %2 = vector.insert %f3, %1[5] : f32 into vector<8xf32> 114 %3 = vector.insert %f4, %2[6] : f32 into vector<8xf32> 115 %4 = vector.insert %f3, %3[7] : f32 into vector<8xf32> 116 store %4, %AVAL[%c0] : memref<4xvector<8xf32>> 117 118 %5 = vector.insert %f2, %vf1[0] : f32 into vector<8xf32> 119 %6 = vector.insert %f8, %5[1] : f32 into vector<8xf32> 120 %7 = vector.insert %f2, %6[2] : f32 into vector<8xf32> 121 %8 = vector.insert %f2, %7[5] : f32 into vector<8xf32> 122 %9 = vector.insert %f7, %8[6] : f32 into vector<8xf32> 123 %10 = vector.insert %f2, %9[7] : f32 into vector<8xf32> 124 store %10, %AVAL[%c1] : memref<4xvector<8xf32>> 125 126 %11 = vector.insert %f3, %vf1[1] : f32 into vector<8xf32> 127 %12 = vector.insert %f6, %11[2] : f32 into vector<8xf32> 128 store %12, %AVAL[%c2] : memref<4xvector<8xf32>> 129 130 %13 = vector.insert %f2, %vf1[2] : f32 into vector<8xf32> 131 %14 = vector.insert %f2, %13[5] : f32 into vector<8xf32> 132 store %14, %AVAL[%c3] : memref<4xvector<8xf32>> 133 134 %vi0 = vector.broadcast %i0 : i32 to vector<8xi32> 135 136 %20 = vector.insert %i2, %vi0[2] : i32 into vector<8xi32> 137 %21 = vector.insert %i1, %20[3] : i32 into vector<8xi32> 138 %22 = vector.insert %i1, %21[5] : i32 into vector<8xi32> 139 %23 = vector.insert %i1, %22[7] : i32 into vector<8xi32> 140 store %23, %AIDX[%c0] : memref<4xvector<8xi32>> 141 142 %24 = vector.insert %i2, %vi0[0] : i32 into vector<8xi32> 143 %25 = vector.insert %i1, %24[1] : i32 into vector<8xi32> 144 %26 = vector.insert %i5, %25[2] : i32 into vector<8xi32> 145 %27 = vector.insert %i3, %26[3] : i32 into vector<8xi32> 146 %28 = vector.insert %i3, %27[4] : i32 into vector<8xi32> 147 %29 = vector.insert %i4, %28[5] : i32 into vector<8xi32> 148 %30 = vector.insert %i2, %29[6] : i32 into vector<8xi32> 149 %31 = vector.insert %i3, %30[7] : i32 into vector<8xi32> 150 store %31, %AIDX[%c1] : memref<4xvector<8xi32>> 151 152 %32 = vector.insert %i5, %vi0[0] : i32 into vector<8xi32> 153 %33 = vector.insert %i4, %32[1] : i32 into vector<8xi32> 154 %34 = vector.insert %i6, %33[2] : i32 into vector<8xi32> 155 %35 = vector.insert %i5, %34[3] : i32 into vector<8xi32> 156 %36 = vector.insert %i4, %35[4] : i32 into vector<8xi32> 157 %37 = vector.insert %i5, %36[5] : i32 into vector<8xi32> 158 %38 = vector.insert %i4, %37[6] : i32 into vector<8xi32> 159 %39 = vector.insert %i6, %38[7] : i32 into vector<8xi32> 160 store %39, %AIDX[%c2] : memref<4xvector<8xi32>> 161 162 %40 = vector.insert %i7, %vi0[0] : i32 into vector<8xi32> 163 %41 = vector.insert %i6, %40[1] : i32 into vector<8xi32> 164 %42 = vector.insert %i7, %41[2] : i32 into vector<8xi32> 165 %43 = vector.insert %i7, %42[3] : i32 into vector<8xi32> 166 %44 = vector.insert %i5, %43[4] : i32 into vector<8xi32> 167 %45 = vector.insert %i6, %44[5] : i32 into vector<8xi32> 168 %46 = vector.insert %i6, %45[6] : i32 into vector<8xi32> 169 %47 = vector.insert %i7, %46[7] : i32 into vector<8xi32> 170 store %47, %AIDX[%c3] : memref<4xvector<8xi32>> 171 172 %vf0 = vector.broadcast %f0 : f32 to vector<8xf32> 173 store %vf0, %B[%c0] : memref<1xvector<8xf32>> 174 175 scf.for %i = %c0 to %c8 step %c1 { 176 %ix = addi %i, %c1 : index 177 %kx = index_cast %ix : index to i32 178 %fx = sitofp %kx : i32 to f32 179 store %fx, %X[%i] : memref<?xf32> 180 } 181 182 // 183 // Multiply. 184 // 185 186 call @spmv8x8(%AVAL, %AIDX, %X, %B) : (memref<4xvector<8xf32>>, 187 memref<4xvector<8xi32>>, 188 memref<?xf32>, 189 memref<1xvector<8xf32>>) -> () 190 191 // 192 // Print and verify. 193 // 194 195 scf.for %i = %c0 to %c4 step %c1 { 196 %aval = load %AVAL[%i] : memref<4xvector<8xf32>> 197 vector.print %aval : vector<8xf32> 198 } 199 200 scf.for %i = %c0 to %c4 step %c1 { 201 %aidx = load %AIDX[%i] : memref<4xvector<8xi32>> 202 vector.print %aidx : vector<8xi32> 203 } 204 205 %ldb = load %B[%c0] : memref<1xvector<8xf32>> 206 vector.print %ldb : vector<8xf32> 207 208 // 209 // CHECK: ( 1, 1, 1, 3, 5, 3, 4, 3 ) 210 // CHECK-NEXT: ( 2, 8, 2, 1, 1, 2, 7, 2 ) 211 // CHECK-NEXT: ( 1, 3, 6, 1, 1, 1, 1, 1 ) 212 // CHECK-NEXT: ( 1, 1, 2, 1, 1, 2, 1, 1 ) 213 // 214 // CHECK-NEXT: ( 0, 0, 2, 1, 0, 1, 0, 1 ) 215 // CHECK-NEXT: ( 2, 1, 5, 3, 3, 4, 2, 3 ) 216 // CHECK-NEXT: ( 5, 4, 6, 5, 4, 5, 4, 6 ) 217 // CHECK-NEXT: ( 7, 6, 7, 7, 5, 6, 6, 7 ) 218 // 219 // CHECK-NEXT: ( 21, 39, 73, 24, 20, 36, 37, 29 ) 220 // 221 222 // 223 // Free. 224 // 225 226 dealloc %AVAL : memref<4xvector<8xf32>> 227 dealloc %AIDX : memref<4xvector<8xi32>> 228 dealloc %X : memref<?xf32> 229 dealloc %B : memref<1xvector<8xf32>> 230 231 return 232} 233