1// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
2// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
3// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
4// RUN: FileCheck %s
5
6func @compress16(%base: memref<?xf32>,
7                 %mask: vector<16xi1>, %value: vector<16xf32>) {
8  vector.compressstore %base, %mask, %value
9    : memref<?xf32>, vector<16xi1>, vector<16xf32>
10  return
11}
12
13func @printmem16(%A: memref<?xf32>) {
14  %c0 = constant 0: index
15  %c1 = constant 1: index
16  %c16 = constant 16: index
17  %z = constant 0.0: f32
18  %m = vector.broadcast %z : f32 to vector<16xf32>
19  %mem = scf.for %i = %c0 to %c16 step %c1
20    iter_args(%m_iter = %m) -> (vector<16xf32>) {
21    %c = load %A[%i] : memref<?xf32>
22    %i32 = index_cast %i : index to i32
23    %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<16xf32>
24    scf.yield %m_new : vector<16xf32>
25  }
26  vector.print %mem : vector<16xf32>
27  return
28}
29
30func @entry() {
31  // Set up memory.
32  %c0 = constant 0: index
33  %c1 = constant 1: index
34  %c16 = constant 16: index
35  %A = alloc(%c16) : memref<?xf32>
36  %z = constant 0.0: f32
37  %v = vector.broadcast %z : f32 to vector<16xf32>
38  %value = scf.for %i = %c0 to %c16 step %c1
39    iter_args(%v_iter = %v) -> (vector<16xf32>) {
40    store %z, %A[%i] : memref<?xf32>
41    %i32 = index_cast %i : index to i32
42    %fi = sitofp %i32 : i32 to f32
43    %v_new = vector.insertelement %fi, %v_iter[%i32 : i32] : vector<16xf32>
44    scf.yield %v_new : vector<16xf32>
45  }
46
47  // Set up masks.
48  %f = constant 0: i1
49  %t = constant 1: i1
50  %none = vector.constant_mask [0] : vector<16xi1>
51  %all = vector.constant_mask [16] : vector<16xi1>
52  %some1 = vector.constant_mask [4] : vector<16xi1>
53  %0 = vector.insert %f, %some1[0] : i1 into vector<16xi1>
54  %1 = vector.insert %t, %0[7] : i1 into vector<16xi1>
55  %2 = vector.insert %t, %1[11] : i1 into vector<16xi1>
56  %3 = vector.insert %t, %2[13] : i1 into vector<16xi1>
57  %some2 = vector.insert %t, %3[15] : i1 into vector<16xi1>
58  %some3 = vector.insert %f, %some2[2] : i1 into vector<16xi1>
59
60  //
61  // Expanding load tests.
62  //
63
64  call @compress16(%A, %none, %value)
65    : (memref<?xf32>, vector<16xi1>, vector<16xf32>) -> ()
66  call @printmem16(%A) : (memref<?xf32>) -> ()
67  // CHECK: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
68
69  call @compress16(%A, %all, %value)
70    : (memref<?xf32>, vector<16xi1>, vector<16xf32>) -> ()
71  call @printmem16(%A) : (memref<?xf32>) -> ()
72  // CHECK-NEXT: ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 )
73
74  call @compress16(%A, %some3, %value)
75    : (memref<?xf32>, vector<16xi1>, vector<16xf32>) -> ()
76  call @printmem16(%A) : (memref<?xf32>) -> ()
77  // CHECK-NEXT: ( 1, 3, 7, 11, 13, 15, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 )
78
79  call @compress16(%A, %some2, %value)
80    : (memref<?xf32>, vector<16xi1>, vector<16xf32>) -> ()
81  call @printmem16(%A) : (memref<?xf32>) -> ()
82  // CHECK-NEXT: ( 1, 2, 3, 7, 11, 13, 15, 7, 8, 9, 10, 11, 12, 13, 14, 15 )
83
84  call @compress16(%A, %some1, %value)
85    : (memref<?xf32>, vector<16xi1>, vector<16xf32>) -> ()
86  call @printmem16(%A) : (memref<?xf32>) -> ()
87  // CHECK-NEXT: ( 0, 1, 2, 3, 11, 13, 15, 7, 8, 9, 10, 11, 12, 13, 14, 15 )
88
89  return
90}
91