1; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
2
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx                             -o /dev/null
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2                 -o /dev/null
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl                                    -o /dev/null
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl                  -o /dev/null
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512bw                  -o /dev/null
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
9; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx                                    -o /dev/null
10
11define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
12  ; Force the execution domain with an add.
13  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
14  %x = and <4 x i64> %a2, %b
15  ret <4 x i64> %x
16}
17
18define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
19  ; Force the execution domain with an add.
20  %a2 = add <2 x i64> %a, <i64 1, i64 1>
21  %x = and <2 x i64> %a2, %b
22  ret <2 x i64> %x
23}
24
25define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
26  ; Force the execution domain with an add.
27  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
28  %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
29  %x = and <4 x i64> %a, %y
30  ret <4 x i64> %x
31}
32
33define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
34  ; Force the execution domain with an add.
35  %a2 = add <2 x i64> %a, <i64 1, i64 1>
36  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
37  %x = and <2 x i64> %a, %y
38  ret <2 x i64> %x
39}
40
41define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
42  ; Force the execution domain with an add.
43  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
44  %x = or <4 x i64> %a2, %b
45  ret <4 x i64> %x
46}
47
48define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
49  ; Force the execution domain with an add.
50  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
51  %x = xor <4 x i64> %a2, %b
52  ret <4 x i64> %x
53}
54
55define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
56  ; Force the execution domain with an add.
57  %a2 = add <2 x i64> %a, <i64 1, i64 1>
58  %x = or <2 x i64> %a2, %b
59  ret <2 x i64> %x
60}
61
62define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
63  ; Force the execution domain with an add.
64  %a2 = add <2 x i64> %a, <i64 1, i64 1>
65  %x = xor <2 x i64> %a2, %b
66  ret <2 x i64> %x
67}
68
69define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
70  %x = add <4 x i64> %i, %j
71  ret <4 x i64> %x
72}
73
74define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
75  %x = add <8 x i32> %i, %j
76  ret <8 x i32> %x
77}
78
79define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
80  %x = add <16 x i16> %i, %j
81  ret <16 x i16> %x
82}
83
84define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
85  %x = add <32 x i8> %i, %j
86  ret <32 x i8> %x
87}
88
89define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
90  %x = sub <4 x i64> %i, %j
91  ret <4 x i64> %x
92}
93
94define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
95  %x = sub <8 x i32> %i, %j
96  ret <8 x i32> %x
97}
98
99define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
100  %x = sub <16 x i16> %i, %j
101  ret <16 x i16> %x
102}
103
104define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
105  %x = sub <32 x i8> %i, %j
106  ret <32 x i8> %x
107}
108
109define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
110  %x = mul <16 x i16> %i, %j
111  ret <16 x i16> %x
112}
113
114define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
115  %bincmp = icmp slt <8 x i32> %i, %j
116  %x = sext <8 x i1> %bincmp to <8 x i32>
117  ret <8 x i32> %x
118}
119
120define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
121  %bincmp = icmp eq <32 x i8> %i, %j
122  %x = sext <32 x i1> %bincmp to <32 x i8>
123  ret <32 x i8> %x
124}
125
126define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
127  %bincmp = icmp eq <16 x i16> %i, %j
128  %x = sext <16 x i1> %bincmp to <16 x i16>
129  ret <16 x i16> %x
130}
131
132define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
133  %bincmp = icmp slt <32 x i8> %i, %j
134  %x = sext <32 x i1> %bincmp to <32 x i8>
135  ret <32 x i8> %x
136}
137
138define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
139  %bincmp = icmp slt <16 x i16> %i, %j
140  %x = sext <16 x i1> %bincmp to <16 x i16>
141  ret <16 x i16> %x
142}
143
144define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
145  %bincmp = icmp eq <8 x i32> %i, %j
146  %x = sext <8 x i1> %bincmp to <8 x i32>
147  ret <8 x i32> %x
148}
149
150define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
151  %x = add <2 x i64> %i, %j
152  ret <2 x i64> %x
153}
154
155define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
156  %x = add <4 x i32> %i, %j
157  ret <4 x i32> %x
158}
159
160define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
161  %x = add <8 x i16> %i, %j
162  ret <8 x i16> %x
163}
164
165define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
166  %x = add <16 x i8> %i, %j
167  ret <16 x i8> %x
168}
169
170define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
171  %x = sub <2 x i64> %i, %j
172  ret <2 x i64> %x
173}
174
175define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
176  %x = sub <4 x i32> %i, %j
177  ret <4 x i32> %x
178}
179
180define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
181  %x = sub <8 x i16> %i, %j
182  ret <8 x i16> %x
183}
184
185define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
186  %x = sub <16 x i8> %i, %j
187  ret <16 x i8> %x
188}
189
190define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
191  %x = mul <8 x i16> %i, %j
192  ret <8 x i16> %x
193}
194
195define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
196  %bincmp = icmp slt <8 x i16> %i, %j
197  %x = sext <8 x i1> %bincmp to <8 x i16>
198  ret <8 x i16> %x
199}
200
201define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
202  %bincmp = icmp slt <16 x i8> %i, %j
203  %x = sext <16 x i1> %bincmp to <16 x i8>
204  ret <16 x i8> %x
205}
206
207define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
208  %bincmp = icmp eq <8 x i16> %i, %j
209  %x = sext <8 x i1> %bincmp to <8 x i16>
210  ret <8 x i16> %x
211}
212
213define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
214  %bincmp = icmp eq <16 x i8> %i, %j
215  %x = sext <16 x i1> %bincmp to <16 x i8>
216  ret <16 x i8> %x
217}
218
219define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
220  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
221  ret <8 x i16> %shuffle
222}
223
224define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
225  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
226  ret <16 x i16> %shuffle
227}
228
229define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
230  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
231  ret <16 x i8> %shuffle
232}
233
234define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
235  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
236  ret <32 x i8> %shuffle
237}
238
239define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
240  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
241  ret <2 x i64> %shuffle
242}
243
244define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
245  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
246  ret <4 x i32> %shuffle
247}
248
249define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
250  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
251  ret <8 x i32> %shuffle
252}
253
254define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
255  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
256  ret <4 x double> %shuffle
257}
258
259define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
260  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
261  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
262  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
263  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
264  ret <2 x double> %bitcast64
265}
266
267define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
268  %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
269  ret <16 x i16> %shuffle
270}
271
272define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
273  %r1 = extractelement <2 x i64> %x, i32 0
274  %r2 = extractelement <2 x i64> %x, i32 1
275  store i64 %r2, i64* %dst, align 1
276  ret i64 %r1
277}
278
279define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
280  %r1 = extractelement <4 x i32> %x, i32 1
281  %r2 = extractelement <4 x i32> %x, i32 3
282  store i32 %r2, i32* %dst, align 1
283  ret i32 %r1
284}
285
286define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
287  %r1 = extractelement <8 x i16> %x, i32 1
288  %r2 = extractelement <8 x i16> %x, i32 3
289  store i16 %r2, i16* %dst, align 1
290  ret i16 %r1
291}
292
293define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
294  %r1 = extractelement <16 x i8> %x, i32 1
295  %r2 = extractelement <16 x i8> %x, i32 3
296  store i8 %r2, i8* %dst, align 1
297  ret i8 %r1
298}
299
300define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
301  %val = load i64, i64* %ptr
302  %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
303  %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
304  ret <2 x i64> %r2
305}
306
307define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
308  %val = load i32, i32* %ptr
309  %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
310  %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
311  ret <4 x i32> %r2
312}
313
314define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
315  %val = load i16, i16* %ptr
316  %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
317  %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
318  ret <8 x i16> %r2
319}
320
321define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
322  %val = load i8, i8* %ptr
323  %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
324  %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
325  ret <16 x i8> %r2
326}
327
328define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
329  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
330  ret <4 x i32> %shuffle
331}
332
333define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
334 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
335  ret <4 x i32> %shuffle
336}
337
338define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
339  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
340  ret <16 x i8> %shuffle
341}
342
343define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
344  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
345  ret <16 x i16> %shuffle
346}
347
348define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
349; vmovshdup 256 test
350  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
351  ret <8 x float> %shuffle
352}
353
354define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
355; vmovshdup 128 test
356  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
357  ret <4 x float> %shuffle
358}
359
360define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
361; vmovsldup 256 test
362  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
363  ret <8 x float> %shuffle
364}
365
366define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
367; vmovsldup 128 test
368  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
369  ret <4 x float> %shuffle
370}
371
372define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
373  %a = load double, double* %ptr
374  %v = insertelement <2 x double> undef, double %a, i32 0
375  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
376  ret <2 x double> %shuffle
377}
378
379define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
380  %a = load double, double* %ptr
381  %v = insertelement <2 x double> undef, double %a, i32 0
382  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
383  ret <2 x double> %shuffle
384}
385
386define void @store_floats(<4 x float> %x, i64* %p) {
387  %a = fadd <4 x float> %x, %x
388  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
389  %c = bitcast <2 x float> %b to i64
390  store i64 %c, i64* %p
391  ret void
392}
393
394define void @store_double(<2 x double> %x, i64* %p) {
395  %a = fadd <2 x double> %x, %x
396  %b = extractelement <2 x double> %a, i32 0
397  %c = bitcast double %b to i64
398  store i64 %c, i64* %p
399  ret void
400}
401
402define void @store_h_double(<2 x double> %x, i64* %p) {
403  %a = fadd <2 x double> %x, %x
404  %b = extractelement <2 x double> %a, i32 1
405  %c = bitcast double %b to i64
406  store i64 %c, i64* %p
407  ret void
408}
409
410define <2 x double> @test39(double* %ptr) nounwind {
411  %a = load double, double* %ptr
412  %v = insertelement <2 x double> undef, double %a, i32 0
413  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
414  ret <2 x double> %shuffle
415  }
416
417define <2 x double> @test40(<2 x double>* %ptr) nounwind {
418  %v = load  <2 x double>,  <2 x double>* %ptr
419  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
420  ret <2 x double> %shuffle
421  }
422
423define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
424  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
425  ret <2 x double> %shuffle
426}
427
428define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
429  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
430  ret <4 x double> %shuffle
431}
432
433define <8 x i32> @ashr_v8i32(<8 x i32> %a, <8 x i32> %b) {
434  %shift = ashr <8 x i32> %a, %b
435  ret <8 x i32> %shift
436}
437
438define <8 x i32> @lshr_v8i32(<8 x i32> %a, <8 x i32> %b) {
439  %shift = lshr <8 x i32> %a, %b
440  ret <8 x i32> %shift
441}
442
443define <8 x i32> @shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
444  %shift = shl <8 x i32> %a, %b
445  ret <8 x i32> %shift
446}
447
448define <8 x i32> @ashr_const_v8i32(<8 x i32> %a) {
449  %shift = ashr <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
450  ret <8 x i32> %shift
451}
452
453define <8 x i32> @lshr_const_v8i32(<8 x i32> %a) {
454  %shift = lshr <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
455  ret <8 x i32> %shift
456}
457
458define <8 x i32> @shl_const_v8i32(<8 x i32> %a) {
459  %shift = shl <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
460  ret <8 x i32> %shift
461}
462
463define <4 x i64> @ashr_v4i64(<4 x i64> %a, <4 x i64> %b) {
464  %shift = ashr <4 x i64> %a, %b
465  ret <4 x i64> %shift
466}
467
468define <4 x i64> @lshr_v4i64(<4 x i64> %a, <4 x i64> %b) {
469  %shift = lshr <4 x i64> %a, %b
470  ret <4 x i64> %shift
471}
472
473define <4 x i64> @shl_v4i64(<4 x i64> %a, <4 x i64> %b) {
474  %shift = shl <4 x i64> %a, %b
475  ret <4 x i64> %shift
476}
477
478define <4 x i64> @ashr_const_v4i64(<4 x i64> %a) {
479  %shift = ashr <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
480  ret <4 x i64> %shift
481}
482
483define <4 x i64> @lshr_const_v4i64(<4 x i64> %a) {
484  %shift = lshr <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
485  ret <4 x i64> %shift
486}
487
488define <4 x i64> @shl_const_v4i64(<4 x i64> %a) {
489  %shift = shl <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
490  ret <4 x i64> %shift
491}
492
493define <16 x i16> @ashr_v16i16(<16 x i16> %a, <16 x i16> %b) {
494  %shift = ashr <16 x i16> %a, %b
495  ret <16 x i16> %shift
496}
497
498define <16 x i16> @lshr_v16i16(<16 x i16> %a, <16 x i16> %b) {
499  %shift = lshr <16 x i16> %a, %b
500  ret <16 x i16> %shift
501}
502
503define <16 x i16> @shl_v16i16(<16 x i16> %a, <16 x i16> %b) {
504  %shift = shl <16 x i16> %a, %b
505  ret <16 x i16> %shift
506}
507
508define <16 x i16> @ashr_const_v16i16(<16 x i16> %a) {
509  %shift = ashr <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
510  ret <16 x i16> %shift
511}
512
513define <16 x i16> @lshr_const_v16i16(<16 x i16> %a) {
514  %shift = lshr <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
515  ret <16 x i16> %shift
516}
517
518define <16 x i16> @shl_const_v16i16(<16 x i16> %a) {
519  %shift = shl <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
520  ret <16 x i16> %shift
521}
522
523define <4 x i32> @ashr_v4i32(<4 x i32> %a, <4 x i32> %b) {
524  %shift = ashr <4 x i32> %a, %b
525  ret <4 x i32> %shift
526}
527
528define <4 x i32> @shl_const_v4i32(<4 x i32> %a) {
529  %shift = shl <4 x i32> %a,  <i32 3, i32 3, i32 3, i32 3>
530  ret <4 x i32> %shift
531}
532
533define <2 x i64> @ashr_v2i64(<2 x i64> %a, <2 x i64> %b) {
534  %shift = ashr <2 x i64> %a, %b
535  ret <2 x i64> %shift
536}
537
538define <2 x i64> @shl_const_v2i64(<2 x i64> %a) {
539  %shift = shl <2 x i64> %a,  <i64 3, i64 3>
540  ret <2 x i64> %shift
541}
542
543define <8 x i16> @ashr_v8i16(<8 x i16> %a, <8 x i16> %b) {
544  %shift = ashr <8 x i16> %a, %b
545  ret <8 x i16> %shift
546}
547
548define <8 x i16> @lshr_v8i16(<8 x i16> %a, <8 x i16> %b) {
549  %shift = lshr <8 x i16> %a, %b
550  ret <8 x i16> %shift
551}
552
553define <8 x i16> @shl_v8i16(<8 x i16> %a, <8 x i16> %b) {
554  %shift = shl <8 x i16> %a, %b
555  ret <8 x i16> %shift
556}
557
558define <8 x i16> @ashr_const_v8i16(<8 x i16> %a) {
559  %shift = ashr <8 x i16> %a,<i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
560  ret <8 x i16> %shift
561}
562
563define <8 x i16> @lshr_const_v8i16(<8 x i16> %a) {
564  %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
565  ret <8 x i16> %shift
566}
567
568define <8 x i16> @shl_const_v8i16(<8 x i16> %a) {
569  %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
570  ret <8 x i16> %shift
571}
572
573define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
574entry:
575  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
576  %C = zext <8 x i8> %B to <8 x i16>
577  ret <8 x i16> %C
578}
579
580define   <32 x i8> @_broadcast32xi8(i8 %a) {
581  %b = insertelement <32 x i8> undef, i8 %a, i32 0
582  %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
583  ret <32 x i8> %c
584}
585
586define   <16 x i8> @_broadcast16xi8(i8 %a) {
587  %b = insertelement <16 x i8> undef, i8 %a, i32 0
588  %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
589  ret <16 x i8> %c
590}
591
592define   <16 x i16> @_broadcast16xi16(i16 %a) {
593  %b = insertelement <16 x i16> undef, i16 %a, i32 0
594  %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
595  ret <16 x i16> %c
596}
597
598define   <8 x i16> @_broadcast8xi16(i16 %a) {
599  %b = insertelement <8 x i16> undef, i16 %a, i32 0
600  %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
601  ret <8 x i16> %c
602}
603
604define <8 x i32> @_broadcast8xi32(i32 %a) {
605  %b = insertelement <8 x i32> undef, i32 %a, i32 0
606  %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
607  ret <8 x i32> %c
608}
609
610define <4 x i32> @_broadcast4xi32(i32 %a) {
611  %b = insertelement <4 x i32> undef, i32 %a, i32 0
612  %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
613  ret <4 x i32> %c
614}
615
616define <4 x i64> @_broadcast4xi64(i64 %a) {
617  %b = insertelement <4 x i64> undef, i64 %a, i64 0
618  %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
619  ret <4 x i64> %c
620}
621
622define <2 x i64> @_broadcast2xi64(i64 %a) {
623  %b = insertelement <2 x i64> undef, i64 %a, i64 0
624  %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
625  ret <2 x i64> %c
626}
627
628define   <8 x float> @_broadcast8xfloat(float %a) {
629  %b = insertelement <8 x float> undef, float %a, i32 0
630  %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
631  ret <8 x float> %c
632}
633
634define   <4 x float> @_broadcast4xfloat(float %a) {
635  %b = insertelement <4 x float> undef, float %a, i32 0
636  %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
637  ret <4 x float> %c
638}
639
640define   <4 x double> @_broadcast4xdouble(double %a) {
641  %b = insertelement <4 x double> undef, double %a, i32 0
642  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
643  ret <4 x double> %c
644}
645
646define   <2 x double> @_broadcast2xdouble(double %a) {
647  %b = insertelement <2 x double> undef, double %a, i32 0
648  %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
649  ret <2 x double> %c
650}
651
652define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
653  %x = fmul <4 x float> %a0, %a1
654  %res = fsub <4 x float> %x, %a2
655  ret <4 x float> %res
656}
657
658define <32 x i8> @test_cmpgtb(<32 x i8> %A) {
659; generate the follow code
660;	 vpxor	 %ymm1, %ymm1, %ymm1
661;  vpcmpgtb %ymm0, %ymm1, %ymm0
662  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
663  ret <32 x i8> %B
664}
665
666define   <4 x float> @_inreg4xfloat(float %a) {
667  %b = insertelement <4 x float> undef, float %a, i32 0
668  %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
669  ret <4 x float> %c
670}
671
672define   <8 x float> @_inreg8xfloat(float %a) {
673  %b = insertelement <8 x float> undef, float %a, i32 0
674  %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
675  ret <8 x float> %c
676}
677
678define   <4 x double> @_inreg4xdouble(double %a) {
679  %b = insertelement <4 x double> undef, double %a, i32 0
680  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
681  ret <4 x double> %c
682}
683