1; Test various target-specific DAG combiner patterns.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
4
5; Check that an extraction followed by a truncation is effectively treated
6; as a bitcast.
7define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) {
8; CHECK-LABEL: f1:
9; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26
10; CHECK-DAG: vsteb [[REG]], 0(%r2), 3
11; CHECK-DAG: vsteb [[REG]], 0(%r3), 15
12; CHECK: br %r14
13  %add = add <4 x i32> %v1, %v2
14  %elem1 = extractelement <4 x i32> %add, i32 0
15  %elem2 = extractelement <4 x i32> %add, i32 3
16  %trunc1 = trunc i32 %elem1 to i8
17  %trunc2 = trunc i32 %elem2 to i8
18  store i8 %trunc1, i8 *%ptr1
19  store i8 %trunc2, i8 *%ptr2
20  ret void
21}
22
23; Test a case where a pack-type shuffle can be eliminated.
24define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
25; CHECK-LABEL: f2:
26; CHECK-NOT: vpk
27; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26
28; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28
29; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3
30; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7
31; CHECK: br %r14
32  %add1 = add <4 x i32> %v1, %v2
33  %add2 = add <4 x i32> %v2, %v3
34  %shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2,
35                           <4 x i32> <i32 1, i32 3, i32 5, i32 7>
36  %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
37  %elem1 = extractelement <8 x i16> %bitcast, i32 1
38  %elem2 = extractelement <8 x i16> %bitcast, i32 7
39  %res = add i16 %elem1, %elem2
40  ret i16 %res
41}
42
43; ...and again in a case where there's also a splat and a bitcast.
44define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
45; CHECK-LABEL: f3:
46; CHECK-NOT: vrepg
47; CHECK-NOT: vpk
48; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
49; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
50; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
51; CHECK: br %r14
52  %add = add <4 x i32> %v1, %v2
53  %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
54                         <2 x i32> <i32 0, i32 0>
55  %splatcast = bitcast <2 x i64> %splat to <4 x i32>
56  %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
57                           <4 x i32> <i32 1, i32 3, i32 5, i32 7>
58  %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
59  %elem1 = extractelement <8 x i16> %bitcast, i32 2
60  %elem2 = extractelement <8 x i16> %bitcast, i32 7
61  %res = add i16 %elem1, %elem2
62  ret i16 %res
63}
64
65; ...and again with a merge low instead of a pack.
66define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
67; CHECK-LABEL: f4:
68; CHECK-NOT: vrepg
69; CHECK-NOT: vmr
70; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
71; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
72; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
73; CHECK: br %r14
74  %add = add <4 x i32> %v1, %v2
75  %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
76                         <2 x i32> <i32 0, i32 0>
77  %splatcast = bitcast <2 x i64> %splat to <4 x i32>
78  %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
79                           <4 x i32> <i32 2, i32 6, i32 3, i32 7>
80  %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
81  %elem1 = extractelement <8 x i16> %bitcast, i32 4
82  %elem2 = extractelement <8 x i16> %bitcast, i32 7
83  %res = add i16 %elem1, %elem2
84  ret i16 %res
85}
86
87; ...and again with a merge high.
88define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
89; CHECK-LABEL: f5:
90; CHECK-NOT: vrepg
91; CHECK-NOT: vmr
92; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
93; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2
94; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
95; CHECK: br %r14
96  %add = add <4 x i32> %v1, %v2
97  %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
98                         <2 x i32> <i32 0, i32 0>
99  %splatcast = bitcast <2 x i64> %splat to <4 x i32>
100  %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
101                           <4 x i32> <i32 0, i32 4, i32 1, i32 5>
102  %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
103  %elem1 = extractelement <8 x i16> %bitcast, i32 4
104  %elem2 = extractelement <8 x i16> %bitcast, i32 7
105  %res = add i16 %elem1, %elem2
106  ret i16 %res
107}
108
109; Test a case where an unpack high can be eliminated from the usual
110; load-extend sequence.
111define void @f6(<8 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
112; CHECK-LABEL: f6:
113; CHECK: vlrepg [[REG:%v[0-9]+]], 0(%r2)
114; CHECK-NOT: vup
115; CHECK-DAG: vsteb [[REG]], 0(%r3), 1
116; CHECK-DAG: vsteb [[REG]], 0(%r4), 2
117; CHECK-DAG: vsteb [[REG]], 0(%r5), 7
118; CHECK: br %r14
119  %vec = load <8 x i8>, <8 x i8> *%ptr1
120  %ext = sext <8 x i8> %vec to <8 x i16>
121  %elem1 = extractelement <8 x i16> %ext, i32 1
122  %elem2 = extractelement <8 x i16> %ext, i32 2
123  %elem3 = extractelement <8 x i16> %ext, i32 7
124  %trunc1 = trunc i16 %elem1 to i8
125  %trunc2 = trunc i16 %elem2 to i8
126  %trunc3 = trunc i16 %elem3 to i8
127  store i8 %trunc1, i8 *%ptr2
128  store i8 %trunc2, i8 *%ptr3
129  store i8 %trunc3, i8 *%ptr4
130  ret void
131}
132
133; ...and again with a bitcast inbetween.
134define void @f7(<4 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
135; CHECK-LABEL: f7:
136; CHECK: vlrepf [[REG:%v[0-9]+]], 0(%r2)
137; CHECK-NOT: vup
138; CHECK-DAG: vsteb [[REG]], 0(%r3), 0
139; CHECK-DAG: vsteb [[REG]], 0(%r4), 1
140; CHECK-DAG: vsteb [[REG]], 0(%r5), 3
141; CHECK: br %r14
142  %vec = load <4 x i8>, <4 x i8> *%ptr1
143  %ext = sext <4 x i8> %vec to <4 x i32>
144  %bitcast = bitcast <4 x i32> %ext to <8 x i16>
145  %elem1 = extractelement <8 x i16> %bitcast, i32 1
146  %elem2 = extractelement <8 x i16> %bitcast, i32 3
147  %elem3 = extractelement <8 x i16> %bitcast, i32 7
148  %trunc1 = trunc i16 %elem1 to i8
149  %trunc2 = trunc i16 %elem2 to i8
150  %trunc3 = trunc i16 %elem3 to i8
151  store i8 %trunc1, i8 *%ptr2
152  store i8 %trunc2, i8 *%ptr3
153  store i8 %trunc3, i8 *%ptr4
154  ret void
155}
156