1; Test vector subtraction.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
4
5; Test a v16i8 subtraction.
6define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
7; CHECK-LABEL: f1:
8; CHECK: vsb %v24, %v26, %v28
9; CHECK: br %r14
10  %ret = sub <16 x i8> %val1, %val2
11  ret <16 x i8> %ret
12}
13
14; Test a v8i16 subtraction.
15define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
16; CHECK-LABEL: f2:
17; CHECK: vsh %v24, %v26, %v28
18; CHECK: br %r14
19  %ret = sub <8 x i16> %val1, %val2
20  ret <8 x i16> %ret
21}
22
23; Test a v4i32 subtraction.
24define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
25; CHECK-LABEL: f3:
26; CHECK: vsf %v24, %v26, %v28
27; CHECK: br %r14
28  %ret = sub <4 x i32> %val1, %val2
29  ret <4 x i32> %ret
30}
31
32; Test a v2i64 subtraction.
33define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
34; CHECK-LABEL: f4:
35; CHECK: vsg %v24, %v26, %v28
36; CHECK: br %r14
37  %ret = sub <2 x i64> %val1, %val2
38  ret <2 x i64> %ret
39}
40
41; Test a v4f32 subtraction, as an example of an operation that needs to be
42; scalarized and reassembled.  At present there's an unnecessary move that
43; could be avoided with smarter ordering.  It also isn't important whether
44; the VSLDBs use the result of the VLRs or use %v24 and %v26 directly.
45define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) {
46; CHECK-LABEL: f5:
47; CHECK-DAG: vlr %v[[A1:[0-5]]], %v24
48; CHECK-DAG: vlr %v[[A2:[0-5]]], %v26
49; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v[[A1]], 1
50; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v[[A2]], 1
51; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v[[A1]], 2
52; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2
53; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3
54; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3
55; CHECK-DAG: ldr %f[[A1copy:[0-5]]], %f[[A1]]
56; CHECK-DAG: sebr %f[[A1copy]], %f[[A2]]
57; CHECK-DAG: sebr %f[[B1]], %f[[B2]]
58; CHECK-DAG: sebr %f[[C1]], %f[[C2]]
59; CHECK-DAG: sebr %f[[D1]], %f[[D2]]
60; CHECK-DAG: vmrhf [[HIGH:%v[0-9]+]], %v[[A1copy]], %v[[B1]]
61; CHECK-DAG: vmrhf [[LOW:%v[0-9]+]], %v[[C1]], %v[[D1]]
62; CHECK: vmrhg %v24, [[HIGH]], [[LOW]]
63; CHECK: br %r14
64  %ret = fsub <4 x float> %val1, %val2
65  ret <4 x float> %ret
66}
67
68; Test a v2f64 subtraction.
69define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
70                        <2 x double> %val2) {
71; CHECK-LABEL: f6:
72; CHECK: vfsdb %v24, %v26, %v28
73; CHECK: br %r14
74  %ret = fsub <2 x double> %val1, %val2
75  ret <2 x double> %ret
76}
77
78; Test an f64 subtraction that uses vector registers.
79define double @f7(<2 x double> %val1, <2 x double> %val2) {
80; CHECK-LABEL: f7:
81; CHECK: wfsdb %f0, %v24, %v26
82; CHECK: br %r14
83  %scalar1 = extractelement <2 x double> %val1, i32 0
84  %scalar2 = extractelement <2 x double> %val2, i32 0
85  %ret = fsub double %scalar1, %scalar2
86  ret double %ret
87}
88
89; Test a v2i8 subtraction, which gets promoted to v16i8.
90define <2 x i8> @f8(<2 x i8> %dummy, <2 x i8> %val1, <2 x i8> %val2) {
91; CHECK-LABEL: f8:
92; CHECK: vsb %v24, %v26, %v28
93; CHECK: br %r14
94  %ret = sub <2 x i8> %val1, %val2
95  ret <2 x i8> %ret
96}
97
98; Test a v4i8 subtraction, which gets promoted to v16i8.
99define <4 x i8> @f9(<4 x i8> %dummy, <4 x i8> %val1, <4 x i8> %val2) {
100; CHECK-LABEL: f9:
101; CHECK: vsb %v24, %v26, %v28
102; CHECK: br %r14
103  %ret = sub <4 x i8> %val1, %val2
104  ret <4 x i8> %ret
105}
106
107; Test a v8i8 subtraction, which gets promoted to v16i8.
108define <8 x i8> @f10(<8 x i8> %dummy, <8 x i8> %val1, <8 x i8> %val2) {
109; CHECK-LABEL: f10:
110; CHECK: vsb %v24, %v26, %v28
111; CHECK: br %r14
112  %ret = sub <8 x i8> %val1, %val2
113  ret <8 x i8> %ret
114}
115
116; Test a v2i16 subtraction, which gets promoted to v8i16.
117define <2 x i16> @f11(<2 x i16> %dummy, <2 x i16> %val1, <2 x i16> %val2) {
118; CHECK-LABEL: f11:
119; CHECK: vsh %v24, %v26, %v28
120; CHECK: br %r14
121  %ret = sub <2 x i16> %val1, %val2
122  ret <2 x i16> %ret
123}
124
125; Test a v4i16 subtraction, which gets promoted to v8i16.
126define <4 x i16> @f12(<4 x i16> %dummy, <4 x i16> %val1, <4 x i16> %val2) {
127; CHECK-LABEL: f12:
128; CHECK: vsh %v24, %v26, %v28
129; CHECK: br %r14
130  %ret = sub <4 x i16> %val1, %val2
131  ret <4 x i16> %ret
132}
133
134; Test a v2i32 subtraction, which gets promoted to v4i32.
135define <2 x i32> @f13(<2 x i32> %dummy, <2 x i32> %val1, <2 x i32> %val2) {
136; CHECK-LABEL: f13:
137; CHECK: vsf %v24, %v26, %v28
138; CHECK: br %r14
139  %ret = sub <2 x i32> %val1, %val2
140  ret <2 x i32> %ret
141}
142
143; Test a v2f32 subtraction, which gets promoted to v4f32.
144define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) {
145; No particular output expected, but must compile.
146  %ret = fsub <2 x float> %val1, %val2
147  ret <2 x float> %ret
148}
149