1; RUN: opt < %s -instcombine -S | FileCheck %s
2
3; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
4
5define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
6  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
7  ret <4 x double> %res
8
9; CHECK-LABEL: @perm2pd_non_const_imm
10; CHECK-NEXT:  call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
11; CHECK-NEXT:  ret <4 x double>
12}
13
14
15; In the following 4 tests, both zero mask bits of the immediate are set.
16
17define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
18  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
19  ret <4 x double> %res
20
21; CHECK-LABEL: @perm2pd_0x88
22; CHECK-NEXT:  ret <4 x double> zeroinitializer
23}
24
25define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
26  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136)
27  ret <8 x float> %res
28
29; CHECK-LABEL: @perm2ps_0x88
30; CHECK-NEXT:  ret <8 x float> zeroinitializer
31}
32
33define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
34  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136)
35  ret <8 x i32> %res
36
37; CHECK-LABEL: @perm2si_0x88
38; CHECK-NEXT:  ret <8 x i32> zeroinitializer
39}
40
41define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
42  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136)
43  ret <4 x i64> %res
44
45; CHECK-LABEL: @perm2i_0x88
46; CHECK-NEXT:  ret <4 x i64> zeroinitializer
47}
48
49
50; The other control bits are ignored when zero mask bits of the immediate are set.
51
52define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
53  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255)
54  ret <4 x double> %res
55
56; CHECK-LABEL: @perm2pd_0xff
57; CHECK-NEXT:  ret <4 x double> zeroinitializer
58}
59
60
61; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
62; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
63
64define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
65  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
66  ret <4 x double> %res
67
68; CHECK-LABEL: @perm2pd_0x00
69; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
70; CHECK-NEXT:  ret <4 x double> %1
71}
72
73define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
74  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
75  ret <4 x double> %res
76
77; CHECK-LABEL: @perm2pd_0x01
78; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
79; CHECK-NEXT:  ret <4 x double> %1
80}
81
82define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
83  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
84  ret <4 x double> %res
85
86; CHECK-LABEL: @perm2pd_0x02
87; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
88; CHECK-NEXT:  ret <4 x double> %1
89}
90
91define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
92  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
93  ret <4 x double> %res
94
95; CHECK-LABEL: @perm2pd_0x03
96; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
97; CHECK-NEXT:  ret <4 x double> %1
98}
99
100define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
101  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
102  ret <4 x double> %res
103
104; CHECK-LABEL: @perm2pd_0x10
105; CHECK-NEXT:  ret <4 x double> %a0
106}
107
108define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
109  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
110  ret <4 x double> %res
111
112; CHECK-LABEL: @perm2pd_0x11
113; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
114; CHECK-NEXT:  ret <4 x double> %1
115}
116
117define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
118  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
119  ret <4 x double> %res
120
121; CHECK-LABEL: @perm2pd_0x12
122; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
123; CHECK-NEXT:  ret <4 x double> %1
124}
125
126define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
127  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
128  ret <4 x double> %res
129
130; CHECK-LABEL: @perm2pd_0x13
131; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
132; CHECK-NEXT:  ret <4 x double> %1
133}
134
135define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
136  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
137  ret <4 x double> %res
138
139; CHECK-LABEL: @perm2pd_0x20
140; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
141; CHECK-NEXT:  ret <4 x double> %1
142}
143
144define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
145  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
146  ret <4 x double> %res
147
148; CHECK-LABEL: @perm2pd_0x21
149; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
150; CHECK-NEXT:  ret <4 x double> %1
151}
152
153define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
154  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
155  ret <4 x double> %res
156
157; CHECK-LABEL: @perm2pd_0x22
158; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
159; CHECK-NEXT:  ret <4 x double> %1
160}
161
162define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
163  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
164  ret <4 x double> %res
165
166; CHECK-LABEL: @perm2pd_0x23
167; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
168; CHECK-NEXT:  ret <4 x double> %1
169}
170
171define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
172  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
173  ret <4 x double> %res
174
175; CHECK-LABEL: @perm2pd_0x30
176; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
177; CHECK-NEXT:  ret <4 x double> %1
178}
179
180define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
181  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
182  ret <4 x double> %res
183
184; CHECK-LABEL: @perm2pd_0x31
185; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
186; CHECK-NEXT:  ret <4 x double> %1
187}
188
189define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
190  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
191  ret <4 x double> %res
192
193; CHECK-LABEL: @perm2pd_0x32
194; CHECK-NEXT:  ret <4 x double> %a1
195}
196
197define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
198  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
199  ret <4 x double> %res
200
201; CHECK-LABEL: @perm2pd_0x33
202; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
203; CHECK-NEXT:  ret <4 x double> %1
204}
205
206; Confirm that a mask for 32-bit elements is also correct.
207
208define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
209  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
210  ret <8 x float> %res
211
212; CHECK-LABEL: @perm2ps_0x31
213; CHECK-NEXT:  %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
214; CHECK-NEXT:  ret <8 x float> %1
215}
216
217
218; Confirm that the AVX2 version works the same.
219
220define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
221  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
222  ret <4 x i64> %res
223
224; CHECK-LABEL: @perm2i_0x33
225; CHECK-NEXT:  %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
226; CHECK-NEXT:  ret <4 x i64> %1
227}
228
229
230; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
231
232define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
233  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
234  ret <4 x double> %res
235
236; CHECK-LABEL: @perm2pd_0x81
237; CHECK-NEXT:  shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
238; CHECK-NEXT:  ret <4 x double>
239}
240
241define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
242  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
243  ret <4 x double> %res
244
245; CHECK-LABEL: @perm2pd_0x83
246; CHECK-NEXT:  shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
247; CHECK-NEXT:  ret <4 x double>
248}
249
250define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
251  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
252  ret <4 x double> %res
253
254; CHECK-LABEL: @perm2pd_0x28
255; CHECK-NEXT:  shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
256; CHECK-NEXT:  ret <4 x double>
257}
258
259define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
260  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
261  ret <4 x double> %res
262
263; CHECK-LABEL: @perm2pd_0x08
264; CHECK-NEXT:  shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
265; CHECK-NEXT:  ret <4 x double>
266}
267
268; Check one more with the AVX2 version.
269
270define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
271  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
272  ret <4 x i64> %res
273
274; CHECK-LABEL: @perm2i_0x28
275; CHECK-NEXT:  shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
276; CHECK-NEXT:  ret <4 x i64>
277}
278
279declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
280declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
281declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
282declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone
283
284