1// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
2// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
3// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
4// RUN: FileCheck %s
5
6// Illustrates an 8x8 Sparse Matrix x Vector implemented with only operations
7// of the vector dialect (and some std/scf). Essentially, this example performs
8// the following multiplication:
9//
10//     0  1  2  3  4  5  6  7
11//   +------------------------+
12// 0 | 1  0  2  0  0  1  0  1 |   | 1 |   | 21 |
13// 1 | 1  8  0  0  3  0  1  0 |   | 2 |   | 39 |
14// 2 | 0  0  1  0  0  2  6  2 |   | 3 |   | 73 |
15// 3 | 0  3  0  1  0  1  0  1 | x | 4 | = | 24 |
16// 4 | 5  0  0  1  1  1  0  0 |   | 5 |   | 20 |
17// 5 | 0  3  0  0  2  1  2  0 |   | 6 |   | 36 |
18// 6 | 4  0  7  0  1  0  1  0 |   | 7 |   | 37 |
19// 7 | 0  3  0  2  0  0  1  1 |   | 8 |   | 29 |
20//   +------------------------+
21//
22// The sparse storage scheme used is an extended column scheme (also referred
23// to as jagged diagonal, which is essentially a vector friendly variant of
24// the general sparse row-wise scheme (also called compressed row storage),
25// using fixed length vectors and no explicit pointer indexing into the
26// value array to find the rows.
27//
28// The extended column storage for the matrix shown above is as follows.
29//
30//      VALUE           INDEX
31//   +---------+     +---------+
32// 0 | 1 2 1 1 |     | 0 2 5 7 |
33// 1 | 1 8 3 1 |     | 0 1 4 6 |
34// 2 | 1 2 6 2 |     | 2 5 6 7 |
35// 3 | 3 1 1 1 |     | 1 3 5 7 |
36// 4 | 5 1 1 1 |     | 0 3 4 5 |
37// 5 | 3 2 1 2 |     | 1 4 5 6 |
38// 6 | 4 7 1 1 |     | 0 2 4 6 |
39// 7 | 3 2 1 1 |     | 1 3 6 7 |
40//   +---------+     +---------+
41//
42// This example illustrates an effective SAXPY version that operates
43// on the transposed jagged diagonal storage to obtain higher vector
44// lengths. Another example in this directory illustrates a DOT
45// version of the operation.
46
47func @spmv8x8(%AVAL: memref<4xvector<8xf32>>,
48              %AIDX: memref<4xvector<8xi32>>,
49	      %X: memref<?xf32>, %B: memref<1xvector<8xf32>>) {
50  %c0 = constant 0 : index
51  %c1 = constant 1 : index
52  %cn = constant 4 : index
53  %mask = vector.constant_mask [8] : vector<8xi1>
54  %b = load %B[%c0] : memref<1xvector<8xf32>>
55  %b_out = scf.for %k = %c0 to %cn step %c1 iter_args(%b_iter = %b) -> (vector<8xf32>) {
56    %aval = load %AVAL[%k] : memref<4xvector<8xf32>>
57    %aidx = load %AIDX[%k] : memref<4xvector<8xi32>>
58    %0 = vector.gather %X, %aidx, %mask : (memref<?xf32>, vector<8xi32>, vector<8xi1>) -> vector<8xf32>
59    %b_new = vector.fma %aval, %0, %b_iter : vector<8xf32>
60    scf.yield %b_new : vector<8xf32>
61  }
62  store %b_out, %B[%c0] : memref<1xvector<8xf32>>
63  return
64}
65
66func @entry() {
67  %c0 = constant 0 : index
68  %c1 = constant 1 : index
69  %c2 = constant 2 : index
70  %c3 = constant 3 : index
71  %c4 = constant 4 : index
72  %c5 = constant 5 : index
73  %c6 = constant 6 : index
74  %c7 = constant 7 : index
75  %c8 = constant 8 : index
76
77  %f0 = constant 0.0 : f32
78  %f1 = constant 1.0 : f32
79  %f2 = constant 2.0 : f32
80  %f3 = constant 3.0 : f32
81  %f4 = constant 4.0 : f32
82  %f5 = constant 5.0 : f32
83  %f6 = constant 6.0 : f32
84  %f7 = constant 7.0 : f32
85  %f8 = constant 8.0 : f32
86
87  %i0 = constant 0 : i32
88  %i1 = constant 1 : i32
89  %i2 = constant 2 : i32
90  %i3 = constant 3 : i32
91  %i4 = constant 4 : i32
92  %i5 = constant 5 : i32
93  %i6 = constant 6 : i32
94  %i7 = constant 7 : i32
95
96  //
97  // Allocate.
98  //
99
100  %AVAL = alloc()    {alignment = 64} : memref<4xvector<8xf32>>
101  %AIDX = alloc()    {alignment = 64} : memref<4xvector<8xi32>>
102  %X    = alloc(%c8) {alignment = 64} : memref<?xf32>
103  %B    = alloc()    {alignment = 64} : memref<1xvector<8xf32>>
104
105  //
106  // Initialize.
107  //
108
109  %vf1 = vector.broadcast %f1 : f32 to vector<8xf32>
110
111  %0 = vector.insert %f3, %vf1[3] : f32 into vector<8xf32>
112  %1 = vector.insert %f5, %0[4] : f32 into vector<8xf32>
113  %2 = vector.insert %f3, %1[5] : f32 into vector<8xf32>
114  %3 = vector.insert %f4, %2[6] : f32 into vector<8xf32>
115  %4 = vector.insert %f3, %3[7] : f32 into vector<8xf32>
116  store %4, %AVAL[%c0] : memref<4xvector<8xf32>>
117
118  %5 = vector.insert %f2, %vf1[0] : f32 into vector<8xf32>
119  %6 = vector.insert %f8, %5[1] : f32 into vector<8xf32>
120  %7 = vector.insert %f2, %6[2] : f32 into vector<8xf32>
121  %8 = vector.insert %f2, %7[5] : f32 into vector<8xf32>
122  %9 = vector.insert %f7, %8[6] : f32 into vector<8xf32>
123  %10 = vector.insert %f2, %9[7] : f32 into vector<8xf32>
124  store %10, %AVAL[%c1] : memref<4xvector<8xf32>>
125
126  %11 = vector.insert %f3, %vf1[1] : f32 into vector<8xf32>
127  %12 = vector.insert %f6, %11[2] : f32 into vector<8xf32>
128  store %12, %AVAL[%c2] : memref<4xvector<8xf32>>
129
130  %13 = vector.insert %f2, %vf1[2] : f32 into vector<8xf32>
131  %14 = vector.insert %f2, %13[5] : f32 into vector<8xf32>
132  store %14, %AVAL[%c3] : memref<4xvector<8xf32>>
133
134  %vi0 = vector.broadcast %i0 : i32 to vector<8xi32>
135
136  %20 = vector.insert %i2, %vi0[2] : i32 into vector<8xi32>
137  %21 = vector.insert %i1, %20[3] : i32 into vector<8xi32>
138  %22 = vector.insert %i1, %21[5] : i32 into vector<8xi32>
139  %23 = vector.insert %i1, %22[7] : i32 into vector<8xi32>
140  store %23, %AIDX[%c0] : memref<4xvector<8xi32>>
141
142  %24 = vector.insert %i2, %vi0[0] : i32 into vector<8xi32>
143  %25 = vector.insert %i1, %24[1] : i32 into vector<8xi32>
144  %26 = vector.insert %i5, %25[2] : i32 into vector<8xi32>
145  %27 = vector.insert %i3, %26[3] : i32 into vector<8xi32>
146  %28 = vector.insert %i3, %27[4] : i32 into vector<8xi32>
147  %29 = vector.insert %i4, %28[5] : i32 into vector<8xi32>
148  %30 = vector.insert %i2, %29[6] : i32 into vector<8xi32>
149  %31 = vector.insert %i3, %30[7] : i32 into vector<8xi32>
150  store %31, %AIDX[%c1] : memref<4xvector<8xi32>>
151
152  %32 = vector.insert %i5, %vi0[0] : i32 into vector<8xi32>
153  %33 = vector.insert %i4, %32[1] : i32 into vector<8xi32>
154  %34 = vector.insert %i6, %33[2] : i32 into vector<8xi32>
155  %35 = vector.insert %i5, %34[3] : i32 into vector<8xi32>
156  %36 = vector.insert %i4, %35[4] : i32 into vector<8xi32>
157  %37 = vector.insert %i5, %36[5] : i32 into vector<8xi32>
158  %38 = vector.insert %i4, %37[6] : i32 into vector<8xi32>
159  %39 = vector.insert %i6, %38[7] : i32 into vector<8xi32>
160  store %39, %AIDX[%c2] : memref<4xvector<8xi32>>
161
162  %40 = vector.insert %i7, %vi0[0] : i32 into vector<8xi32>
163  %41 = vector.insert %i6, %40[1] : i32 into vector<8xi32>
164  %42 = vector.insert %i7, %41[2] : i32 into vector<8xi32>
165  %43 = vector.insert %i7, %42[3] : i32 into vector<8xi32>
166  %44 = vector.insert %i5, %43[4] : i32 into vector<8xi32>
167  %45 = vector.insert %i6, %44[5] : i32 into vector<8xi32>
168  %46 = vector.insert %i6, %45[6] : i32 into vector<8xi32>
169  %47 = vector.insert %i7, %46[7] : i32 into vector<8xi32>
170  store %47, %AIDX[%c3] : memref<4xvector<8xi32>>
171
172  %vf0 = vector.broadcast %f0 : f32 to vector<8xf32>
173  store %vf0, %B[%c0] : memref<1xvector<8xf32>>
174
175  scf.for %i = %c0 to %c8 step %c1 {
176    %ix = addi %i, %c1 : index
177    %kx = index_cast %ix : index to i32
178    %fx = sitofp %kx : i32 to f32
179    store %fx, %X[%i] : memref<?xf32>
180  }
181
182  //
183  // Multiply.
184  //
185
186  call @spmv8x8(%AVAL, %AIDX, %X, %B) : (memref<4xvector<8xf32>>,
187                                         memref<4xvector<8xi32>>,
188					 memref<?xf32>,
189					 memref<1xvector<8xf32>>) -> ()
190
191  //
192  // Print and verify.
193  //
194
195  scf.for %i = %c0 to %c4 step %c1 {
196    %aval = load %AVAL[%i] : memref<4xvector<8xf32>>
197    vector.print %aval : vector<8xf32>
198  }
199
200  scf.for %i = %c0 to %c4 step %c1 {
201    %aidx = load %AIDX[%i] : memref<4xvector<8xi32>>
202    vector.print %aidx : vector<8xi32>
203  }
204
205  %ldb = load %B[%c0] : memref<1xvector<8xf32>>
206  vector.print %ldb : vector<8xf32>
207
208  //
209  // CHECK:      ( 1, 1, 1, 3, 5, 3, 4, 3 )
210  // CHECK-NEXT: ( 2, 8, 2, 1, 1, 2, 7, 2 )
211  // CHECK-NEXT: ( 1, 3, 6, 1, 1, 1, 1, 1 )
212  // CHECK-NEXT: ( 1, 1, 2, 1, 1, 2, 1, 1 )
213  //
214  // CHECK-NEXT: ( 0, 0, 2, 1, 0, 1, 0, 1 )
215  // CHECK-NEXT: ( 2, 1, 5, 3, 3, 4, 2, 3 )
216  // CHECK-NEXT: ( 5, 4, 6, 5, 4, 5, 4, 6 )
217  // CHECK-NEXT: ( 7, 6, 7, 7, 5, 6, 6, 7 )
218  //
219  // CHECK-NEXT: ( 21, 39, 73, 24, 20, 36, 37, 29 )
220  //
221
222  //
223  // Free.
224  //
225
226  dealloc %AVAL : memref<4xvector<8xf32>>
227  dealloc %AIDX : memref<4xvector<8xi32>>
228  dealloc %X    : memref<?xf32>
229  dealloc %B    : memref<1xvector<8xf32>>
230
231  return
232}
233