1; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
2
3define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
4; CHECK-LABEL: test_x86_aesni_aesdec:
5; CHECK:       # BB#0:
6; CHECK-NEXT:    vaesdec %xmm1, %xmm0, %xmm0
7; CHECK-NEXT:    retl
8  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
9  ret <2 x i64> %res
10}
11declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
12
13
14define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
15; CHECK-LABEL: test_x86_aesni_aesdeclast:
16; CHECK:       # BB#0:
17; CHECK-NEXT:    vaesdeclast %xmm1, %xmm0, %xmm0
18; CHECK-NEXT:    retl
19  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
20  ret <2 x i64> %res
21}
22declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
23
24
25define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
26; CHECK-LABEL: test_x86_aesni_aesenc:
27; CHECK:       # BB#0:
28; CHECK-NEXT:    vaesenc %xmm1, %xmm0, %xmm0
29; CHECK-NEXT:    retl
30  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
31  ret <2 x i64> %res
32}
33declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
34
35
36define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
37; CHECK-LABEL: test_x86_aesni_aesenclast:
38; CHECK:       # BB#0:
39; CHECK-NEXT:    vaesenclast %xmm1, %xmm0, %xmm0
40; CHECK-NEXT:    retl
41  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
42  ret <2 x i64> %res
43}
44declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
45
46
47define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
48; CHECK-LABEL: test_x86_aesni_aesimc:
49; CHECK:       # BB#0:
50; CHECK-NEXT:    vaesimc %xmm0, %xmm0
51; CHECK-NEXT:    retl
52  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
53  ret <2 x i64> %res
54}
55declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
56
57
58define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
59; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
60; CHECK:       # BB#0:
61; CHECK-NEXT:    vaeskeygenassist $7, %xmm0, %xmm0
62; CHECK-NEXT:    retl
63  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
64  ret <2 x i64> %res
65}
66declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
67
68
69define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
70; CHECK-LABEL: test_x86_sse2_add_sd:
71; CHECK:       # BB#0:
72; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
73; CHECK-NEXT:    retl
74  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
75  ret <2 x double> %res
76}
77declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
78
79
80define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
81; CHECK-LABEL: test_x86_sse2_cmp_pd:
82; CHECK:       # BB#0:
83; CHECK-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
84; CHECK-NEXT:    retl
85  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
86  ret <2 x double> %res
87}
88declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
89
90
91define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
92; CHECK-LABEL: test_x86_sse2_cmp_sd:
93; CHECK:       # BB#0:
94; CHECK-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
95; CHECK-NEXT:    retl
96  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
97  ret <2 x double> %res
98}
99declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
100
101
102define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
103; CHECK-LABEL: test_x86_sse2_comieq_sd:
104; CHECK:       # BB#0:
105; CHECK-NEXT:    vcomisd %xmm1, %xmm0
106; CHECK-NEXT:    sete %al
107; CHECK-NEXT:    movzbl %al, %eax
108; CHECK-NEXT:    retl
109  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
110  ret i32 %res
111}
112declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
113
114
115define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
116; CHECK-LABEL: test_x86_sse2_comige_sd:
117; CHECK:       # BB#0:
118; CHECK-NEXT:    vcomisd %xmm1, %xmm0
119; CHECK-NEXT:    setae %al
120; CHECK-NEXT:    movzbl %al, %eax
121; CHECK-NEXT:    retl
122  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
123  ret i32 %res
124}
125declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
126
127
128define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
129; CHECK-LABEL: test_x86_sse2_comigt_sd:
130; CHECK:       # BB#0:
131; CHECK-NEXT:    vcomisd %xmm1, %xmm0
132; CHECK-NEXT:    seta %al
133; CHECK-NEXT:    movzbl %al, %eax
134; CHECK-NEXT:    retl
135  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
136  ret i32 %res
137}
138declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
139
140
141define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
142; CHECK-LABEL: test_x86_sse2_comile_sd:
143; CHECK:       # BB#0:
144; CHECK-NEXT:    vcomisd %xmm1, %xmm0
145; CHECK-NEXT:    setbe %al
146; CHECK-NEXT:    movzbl %al, %eax
147; CHECK-NEXT:    retl
148  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
149  ret i32 %res
150}
151declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
152
153
154define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
155; CHECK-LABEL: test_x86_sse2_comilt_sd:
156; CHECK:       # BB#0:
157; CHECK-NEXT:    vcomisd %xmm1, %xmm0
158; CHECK-NEXT:    sbbl %eax, %eax
159; CHECK-NEXT:    andl $1, %eax
160; CHECK-NEXT:    retl
161  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
162  ret i32 %res
163}
164declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
165
166
167define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
168; CHECK-LABEL: test_x86_sse2_comineq_sd:
169; CHECK:       # BB#0:
170; CHECK-NEXT:    vcomisd %xmm1, %xmm0
171; CHECK-NEXT:    setne %al
172; CHECK-NEXT:    movzbl %al, %eax
173; CHECK-NEXT:    retl
174  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
175  ret i32 %res
176}
177declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
178
179
180define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
181; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
182; CHECK:       # BB#0:
183; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
184; CHECK-NEXT:    retl
185  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
186  ret <2 x double> %res
187}
188declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
189
190
191define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
192; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
193; CHECK:       # BB#0:
194; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
195; CHECK-NEXT:    retl
196  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
197  ret <4 x float> %res
198}
199declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
200
201
202define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
203; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
204; CHECK:       # BB#0:
205; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm0
206; CHECK-NEXT:    retl
207  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
208  ret <4 x i32> %res
209}
210declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
211
212
213define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
214; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
215; CHECK:       # BB#0:
216; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0
217; CHECK-NEXT:    retl
218  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
219  ret <4 x float> %res
220}
221declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
222
223
224define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
225; CHECK-LABEL: test_x86_sse2_cvtps2dq:
226; CHECK:       # BB#0:
227; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm0
228; CHECK-NEXT:    retl
229  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
230  ret <4 x i32> %res
231}
232declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
233
234
235define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
236; CHECK-LABEL: test_x86_sse2_cvtps2pd:
237; CHECK:       # BB#0:
238; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0
239; CHECK-NEXT:    retl
240  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
241  ret <2 x double> %res
242}
243declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
244
245
246define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
247; CHECK-LABEL: test_x86_sse2_cvtsd2si:
248; CHECK:       # BB#0:
249; CHECK-NEXT:    vcvtsd2si %xmm0, %eax
250; CHECK-NEXT:    retl
251  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
252  ret i32 %res
253}
254declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
255
256
257define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
258; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
259; CHECK:       # BB#0:
260; CHECK-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
261; CHECK-NEXT:    retl
262  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
263  ret <4 x float> %res
264}
265declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
266
267
268define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
269; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
270; CHECK:       # BB#0:
271; CHECK-NEXT:    movl $7, %eax
272; CHECK-NEXT:    vcvtsi2sdl %eax, %xmm0, %xmm0
273; CHECK-NEXT:    retl
274  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
275  ret <2 x double> %res
276}
277declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
278
279
280define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
281; CHECK-LABEL: test_x86_sse2_cvtss2sd:
282; CHECK:       # BB#0:
283; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
284; CHECK-NEXT:    retl
285  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
286  ret <2 x double> %res
287}
288declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
289
290
291define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
292; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
293; CHECK:       # BB#0:
294; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0
295; CHECK-NEXT:    retl
296  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
297  ret <4 x i32> %res
298}
299declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
300
301
302define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
303; CHECK-LABEL: test_x86_sse2_cvttps2dq:
304; CHECK:       # BB#0:
305; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
306; CHECK-NEXT:    retl
307  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
308  ret <4 x i32> %res
309}
310declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
311
312
313define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
314; CHECK-LABEL: test_x86_sse2_cvttsd2si:
315; CHECK:       # BB#0:
316; CHECK-NEXT:    vcvttsd2si %xmm0, %eax
317; CHECK-NEXT:    retl
318  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
319  ret i32 %res
320}
321declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
322
323
324define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
325; CHECK-LABEL: test_x86_sse2_div_sd:
326; CHECK:       # BB#0:
327; CHECK-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
328; CHECK-NEXT:    retl
329  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
330  ret <2 x double> %res
331}
332declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
333
334
335
336define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
337; CHECK-LABEL: test_x86_sse2_max_pd:
338; CHECK:       # BB#0:
339; CHECK-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
340; CHECK-NEXT:    retl
341  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
342  ret <2 x double> %res
343}
344declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
345
346
347define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
348; CHECK-LABEL: test_x86_sse2_max_sd:
349; CHECK:       # BB#0:
350; CHECK-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
351; CHECK-NEXT:    retl
352  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
353  ret <2 x double> %res
354}
355declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
356
357
358define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
359; CHECK-LABEL: test_x86_sse2_min_pd:
360; CHECK:       # BB#0:
361; CHECK-NEXT:    vminpd %xmm1, %xmm0, %xmm0
362; CHECK-NEXT:    retl
363  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
364  ret <2 x double> %res
365}
366declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
367
368
369define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
370; CHECK-LABEL: test_x86_sse2_min_sd:
371; CHECK:       # BB#0:
372; CHECK-NEXT:    vminsd %xmm1, %xmm0, %xmm0
373; CHECK-NEXT:    retl
374  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
375  ret <2 x double> %res
376}
377declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
378
379
380define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
381; CHECK-LABEL: test_x86_sse2_movmsk_pd:
382; CHECK:       # BB#0:
383; CHECK-NEXT:    vmovmskpd %xmm0, %eax
384; CHECK-NEXT:    retl
385  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
386  ret i32 %res
387}
388declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
389
390
391
392
393define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
394; CHECK-LABEL: test_x86_sse2_mul_sd:
395; CHECK:       # BB#0:
396; CHECK-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
397; CHECK-NEXT:    retl
398  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
399  ret <2 x double> %res
400}
401declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
402
403
404define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
405; CHECK-LABEL: test_x86_sse2_packssdw_128:
406; CHECK:       # BB#0:
407; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
408; CHECK-NEXT:    retl
409  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
410  ret <8 x i16> %res
411}
412declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
413
414
415define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
416; CHECK-LABEL: test_x86_sse2_packsswb_128:
417; CHECK:       # BB#0:
418; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
419; CHECK-NEXT:    retl
420  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
421  ret <16 x i8> %res
422}
423declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
424
425
426define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
427; CHECK-LABEL: test_x86_sse2_packuswb_128:
428; CHECK:       # BB#0:
429; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
430; CHECK-NEXT:    retl
431  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
432  ret <16 x i8> %res
433}
434declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
435
436
437define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
438; CHECK-LABEL: test_x86_sse2_padds_b:
439; CHECK:       # BB#0:
440; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
441; CHECK-NEXT:    retl
442  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
443  ret <16 x i8> %res
444}
445declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
446
447
448define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
449; CHECK-LABEL: test_x86_sse2_padds_w:
450; CHECK:       # BB#0:
451; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
452; CHECK-NEXT:    retl
453  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
454  ret <8 x i16> %res
455}
456declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
457
458
459define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
460; CHECK-LABEL: test_x86_sse2_paddus_b:
461; CHECK:       # BB#0:
462; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
463; CHECK-NEXT:    retl
464  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
465  ret <16 x i8> %res
466}
467declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
468
469
470define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
471; CHECK-LABEL: test_x86_sse2_paddus_w:
472; CHECK:       # BB#0:
473; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
474; CHECK-NEXT:    retl
475  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
476  ret <8 x i16> %res
477}
478declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
479
480
481define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
482; CHECK-LABEL: test_x86_sse2_pavg_b:
483; CHECK:       # BB#0:
484; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
485; CHECK-NEXT:    retl
486  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
487  ret <16 x i8> %res
488}
489declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
490
491
492define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
493; CHECK-LABEL: test_x86_sse2_pavg_w:
494; CHECK:       # BB#0:
495; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
496; CHECK-NEXT:    retl
497  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
498  ret <8 x i16> %res
499}
500declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
501
502
503define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
504; CHECK-LABEL: test_x86_sse2_pmadd_wd:
505; CHECK:       # BB#0:
506; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
507; CHECK-NEXT:    retl
508  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
509  ret <4 x i32> %res
510}
511declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
512
513
514define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
515; CHECK-LABEL: test_x86_sse2_pmaxs_w:
516; CHECK:       # BB#0:
517; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
518; CHECK-NEXT:    retl
519  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
520  ret <8 x i16> %res
521}
522declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
523
524
525define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
526; CHECK-LABEL: test_x86_sse2_pmaxu_b:
527; CHECK:       # BB#0:
528; CHECK-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
529; CHECK-NEXT:    retl
530  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
531  ret <16 x i8> %res
532}
533declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
534
535
536define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
537; CHECK-LABEL: test_x86_sse2_pmins_w:
538; CHECK:       # BB#0:
539; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
540; CHECK-NEXT:    retl
541  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
542  ret <8 x i16> %res
543}
544declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
545
546
547define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
548; CHECK-LABEL: test_x86_sse2_pminu_b:
549; CHECK:       # BB#0:
550; CHECK-NEXT:    vpminub %xmm1, %xmm0, %xmm0
551; CHECK-NEXT:    retl
552  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
553  ret <16 x i8> %res
554}
555declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
556
557
558define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
559; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
560; CHECK:       # BB#0:
561; CHECK-NEXT:    vpmovmskb %xmm0, %eax
562; CHECK-NEXT:    retl
563  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
564  ret i32 %res
565}
566declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
567
568
569define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
570; CHECK-LABEL: test_x86_sse2_pmulh_w:
571; CHECK:       # BB#0:
572; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
573; CHECK-NEXT:    retl
574  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
575  ret <8 x i16> %res
576}
577declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
578
579
580define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
581; CHECK-LABEL: test_x86_sse2_pmulhu_w:
582; CHECK:       # BB#0:
583; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
584; CHECK-NEXT:    retl
585  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
586  ret <8 x i16> %res
587}
588declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
589
590
591define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
592; CHECK-LABEL: test_x86_sse2_pmulu_dq:
593; CHECK:       # BB#0:
594; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
595; CHECK-NEXT:    retl
596  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
597  ret <2 x i64> %res
598}
599declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
600
601
602define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
603; CHECK-LABEL: test_x86_sse2_psad_bw:
604; CHECK:       # BB#0:
605; CHECK-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
606; CHECK-NEXT:    retl
607  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
608  ret <2 x i64> %res
609}
610declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
611
612
613define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
614; CHECK-LABEL: test_x86_sse2_psll_d:
615; CHECK:       # BB#0:
616; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm0
617; CHECK-NEXT:    retl
618  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
619  ret <4 x i32> %res
620}
621declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
622
623
624define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
625; CHECK-LABEL: test_x86_sse2_psll_q:
626; CHECK:       # BB#0:
627; CHECK-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
628; CHECK-NEXT:    retl
629  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
630  ret <2 x i64> %res
631}
632declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
633
634
635define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
636; CHECK-LABEL: test_x86_sse2_psll_w:
637; CHECK:       # BB#0:
638; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
639; CHECK-NEXT:    retl
640  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
641  ret <8 x i16> %res
642}
643declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
644
645
646define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
647; CHECK-LABEL: test_x86_sse2_pslli_d:
648; CHECK:       # BB#0:
649; CHECK-NEXT:    vpslld $7, %xmm0, %xmm0
650; CHECK-NEXT:    retl
651  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
652  ret <4 x i32> %res
653}
654declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
655
656
657define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
658; CHECK-LABEL: test_x86_sse2_pslli_q:
659; CHECK:       # BB#0:
660; CHECK-NEXT:    vpsllq $7, %xmm0, %xmm0
661; CHECK-NEXT:    retl
662  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
663  ret <2 x i64> %res
664}
665declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
666
667
668define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
669; CHECK-LABEL: test_x86_sse2_pslli_w:
670; CHECK:       # BB#0:
671; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
672; CHECK-NEXT:    retl
673  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
674  ret <8 x i16> %res
675}
676declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
677
678
679define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
680; CHECK-LABEL: test_x86_sse2_psra_d:
681; CHECK:       # BB#0:
682; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
683; CHECK-NEXT:    retl
684  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
685  ret <4 x i32> %res
686}
687declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
688
689
690define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
691; CHECK-LABEL: test_x86_sse2_psra_w:
692; CHECK:       # BB#0:
693; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
694; CHECK-NEXT:    retl
695  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
696  ret <8 x i16> %res
697}
698declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
699
700
701define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
702; CHECK-LABEL: test_x86_sse2_psrai_d:
703; CHECK:       # BB#0:
704; CHECK-NEXT:    vpsrad $7, %xmm0, %xmm0
705; CHECK-NEXT:    retl
706  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
707  ret <4 x i32> %res
708}
709declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
710
711
712define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
713; CHECK-LABEL: test_x86_sse2_psrai_w:
714; CHECK:       # BB#0:
715; CHECK-NEXT:    vpsraw $7, %xmm0, %xmm0
716; CHECK-NEXT:    retl
717  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
718  ret <8 x i16> %res
719}
720declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
721
722
723define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
724; CHECK-LABEL: test_x86_sse2_psrl_d:
725; CHECK:       # BB#0:
726; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
727; CHECK-NEXT:    retl
728  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
729  ret <4 x i32> %res
730}
731declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
732
733
734define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
735; CHECK-LABEL: test_x86_sse2_psrl_q:
736; CHECK:       # BB#0:
737; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
738; CHECK-NEXT:    retl
739  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
740  ret <2 x i64> %res
741}
742declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
743
744
745define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
746; CHECK-LABEL: test_x86_sse2_psrl_w:
747; CHECK:       # BB#0:
748; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
749; CHECK-NEXT:    retl
750  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
751  ret <8 x i16> %res
752}
753declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
754
755
756define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
757; CHECK-LABEL: test_x86_sse2_psrli_d:
758; CHECK:       # BB#0:
759; CHECK-NEXT:    vpsrld $7, %xmm0, %xmm0
760; CHECK-NEXT:    retl
761  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
762  ret <4 x i32> %res
763}
764declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
765
766
767define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
768; CHECK-LABEL: test_x86_sse2_psrli_q:
769; CHECK:       # BB#0:
770; CHECK-NEXT:    vpsrlq $7, %xmm0, %xmm0
771; CHECK-NEXT:    retl
772  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
773  ret <2 x i64> %res
774}
775declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
776
777
778define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
779; CHECK-LABEL: test_x86_sse2_psrli_w:
780; CHECK:       # BB#0:
781; CHECK-NEXT:    vpsrlw $7, %xmm0, %xmm0
782; CHECK-NEXT:    retl
783  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
784  ret <8 x i16> %res
785}
786declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
787
788
789define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
790; CHECK-LABEL: test_x86_sse2_psubs_b:
791; CHECK:       # BB#0:
792; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
793; CHECK-NEXT:    retl
794  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
795  ret <16 x i8> %res
796}
797declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
798
799
800define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
801; CHECK-LABEL: test_x86_sse2_psubs_w:
802; CHECK:       # BB#0:
803; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
804; CHECK-NEXT:    retl
805  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
806  ret <8 x i16> %res
807}
808declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
809
810
811define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
812; CHECK-LABEL: test_x86_sse2_psubus_b:
813; CHECK:       # BB#0:
814; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
815; CHECK-NEXT:    retl
816  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
817  ret <16 x i8> %res
818}
819declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
820
821
822define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
823; CHECK-LABEL: test_x86_sse2_psubus_w:
824; CHECK:       # BB#0:
825; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
826; CHECK-NEXT:    retl
827  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
828  ret <8 x i16> %res
829}
830declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
831
832
833define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
834; CHECK-LABEL: test_x86_sse2_sqrt_pd:
835; CHECK:       # BB#0:
836; CHECK-NEXT:    vsqrtpd %xmm0, %xmm0
837; CHECK-NEXT:    retl
838  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
839  ret <2 x double> %res
840}
841declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
842
843
844define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
845; CHECK-LABEL: test_x86_sse2_sqrt_sd:
846; CHECK:       # BB#0:
847; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
848; CHECK-NEXT:    retl
849  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
850  ret <2 x double> %res
851}
852declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
853
854
855define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
856; CHECK-LABEL: test_x86_sse2_storel_dq:
857; CHECK:       # BB#0:
858; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
859; CHECK-NEXT:    vmovq %xmm0, (%eax)
860; CHECK-NEXT:    retl
861  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
862  ret void
863}
864declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
865
866
867define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
868  ; add operation forces the execution domain.
869; CHECK-LABEL: test_x86_sse2_storeu_dq:
870; CHECK:       # BB#0:
871; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
872; CHECK-NEXT:    vpaddb LCPI77_0, %xmm0, %xmm0
873; CHECK-NEXT:    vmovdqu %xmm0, (%eax)
874; CHECK-NEXT:    retl
875  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
876  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
877  ret void
878}
879declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
880
881
882define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
883  ; fadd operation forces the execution domain.
884; CHECK-LABEL: test_x86_sse2_storeu_pd:
885; CHECK:       # BB#0:
886; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
887; CHECK-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
888; CHECK-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
889; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
890; CHECK-NEXT:    vmovupd %xmm0, (%eax)
891; CHECK-NEXT:    retl
892  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
893  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
894  ret void
895}
896declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
897
898
899define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
900; CHECK-LABEL: test_x86_sse2_sub_sd:
901; CHECK:       # BB#0:
902; CHECK-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
903; CHECK-NEXT:    retl
904  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
905  ret <2 x double> %res
906}
907declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
908
909
910define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
911; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
912; CHECK:       # BB#0:
913; CHECK-NEXT:    vucomisd %xmm1, %xmm0
914; CHECK-NEXT:    sete %al
915; CHECK-NEXT:    movzbl %al, %eax
916; CHECK-NEXT:    retl
917  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
918  ret i32 %res
919}
920declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
921
922
923define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
924; CHECK-LABEL: test_x86_sse2_ucomige_sd:
925; CHECK:       # BB#0:
926; CHECK-NEXT:    vucomisd %xmm1, %xmm0
927; CHECK-NEXT:    setae %al
928; CHECK-NEXT:    movzbl %al, %eax
929; CHECK-NEXT:    retl
930  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
931  ret i32 %res
932}
933declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
934
935
936define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
937; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
938; CHECK:       # BB#0:
939; CHECK-NEXT:    vucomisd %xmm1, %xmm0
940; CHECK-NEXT:    seta %al
941; CHECK-NEXT:    movzbl %al, %eax
942; CHECK-NEXT:    retl
943  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
944  ret i32 %res
945}
946declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
947
948
949define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
950; CHECK-LABEL: test_x86_sse2_ucomile_sd:
951; CHECK:       # BB#0:
952; CHECK-NEXT:    vucomisd %xmm1, %xmm0
953; CHECK-NEXT:    setbe %al
954; CHECK-NEXT:    movzbl %al, %eax
955; CHECK-NEXT:    retl
956  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
957  ret i32 %res
958}
959declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
960
961
962define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
963; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
964; CHECK:       # BB#0:
965; CHECK-NEXT:    vucomisd %xmm1, %xmm0
966; CHECK-NEXT:    sbbl %eax, %eax
967; CHECK-NEXT:    andl $1, %eax
968; CHECK-NEXT:    retl
969  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
970  ret i32 %res
971}
972declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
973
974
975define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
976; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
977; CHECK:       # BB#0:
978; CHECK-NEXT:    vucomisd %xmm1, %xmm0
979; CHECK-NEXT:    setne %al
980; CHECK-NEXT:    movzbl %al, %eax
981; CHECK-NEXT:    retl
982  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
983  ret i32 %res
984}
985declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
986
987
988define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
989; CHECK-LABEL: test_x86_sse3_addsub_pd:
990; CHECK:       # BB#0:
991; CHECK-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
992; CHECK-NEXT:    retl
993  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
994  ret <2 x double> %res
995}
996declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
997
998
999define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
1000; CHECK-LABEL: test_x86_sse3_addsub_ps:
1001; CHECK:       # BB#0:
1002; CHECK-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
1003; CHECK-NEXT:    retl
1004  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1005  ret <4 x float> %res
1006}
1007declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
1008
1009
1010define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
1011; CHECK-LABEL: test_x86_sse3_hadd_pd:
1012; CHECK:       # BB#0:
1013; CHECK-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
1014; CHECK-NEXT:    retl
1015  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1016  ret <2 x double> %res
1017}
1018declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
1019
1020
1021define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
1022; CHECK-LABEL: test_x86_sse3_hadd_ps:
1023; CHECK:       # BB#0:
1024; CHECK-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
1025; CHECK-NEXT:    retl
1026  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1027  ret <4 x float> %res
1028}
1029declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
1030
1031
1032define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
1033; CHECK-LABEL: test_x86_sse3_hsub_pd:
1034; CHECK:       # BB#0:
1035; CHECK-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
1036; CHECK-NEXT:    retl
1037  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1038  ret <2 x double> %res
1039}
1040declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
1041
1042
1043define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
1044; CHECK-LABEL: test_x86_sse3_hsub_ps:
1045; CHECK:       # BB#0:
1046; CHECK-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
1047; CHECK-NEXT:    retl
1048  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1049  ret <4 x float> %res
1050}
1051declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
1052
1053
1054define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
1055; CHECK-LABEL: test_x86_sse3_ldu_dq:
1056; CHECK:       # BB#0:
1057; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1058; CHECK-NEXT:    vlddqu (%eax), %xmm0
1059; CHECK-NEXT:    retl
1060  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
1061  ret <16 x i8> %res
1062}
1063declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
1064
1065
1066define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
1067; CHECK-LABEL: test_x86_sse41_blendvpd:
1068; CHECK:       # BB#0:
1069; CHECK-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
1070; CHECK-NEXT:    retl
1071  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
1072  ret <2 x double> %res
1073}
1074declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
1075
1076
1077define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
1078; CHECK-LABEL: test_x86_sse41_blendvps:
1079; CHECK:       # BB#0:
1080; CHECK-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
1081; CHECK-NEXT:    retl
1082  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
1083  ret <4 x float> %res
1084}
1085declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
1086
1087
1088define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
1089; CHECK-LABEL: test_x86_sse41_dppd:
1090; CHECK:       # BB#0:
1091; CHECK-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0
1092; CHECK-NEXT:    retl
1093  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
1094  ret <2 x double> %res
1095}
1096declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
1097
1098
1099define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
1100; CHECK-LABEL: test_x86_sse41_dpps:
1101; CHECK:       # BB#0:
1102; CHECK-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0
1103; CHECK-NEXT:    retl
1104  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1105  ret <4 x float> %res
1106}
1107declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
1108
1109
1110define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
1111; CHECK-LABEL: test_x86_sse41_insertps:
1112; CHECK:       # BB#0:
1113; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
1114; CHECK-NEXT:    retl
1115  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1116  ret <4 x float> %res
1117}
1118declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
1119
1120
1121
1122define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
1123; CHECK-LABEL: test_x86_sse41_mpsadbw:
1124; CHECK:       # BB#0:
1125; CHECK-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0
1126; CHECK-NEXT:    retl
1127  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
1128  ret <8 x i16> %res
1129}
1130declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
1131
1132
1133define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
1134; CHECK-LABEL: test_x86_sse41_packusdw:
1135; CHECK:       # BB#0:
1136; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
1137; CHECK-NEXT:    retl
1138  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
1139  ret <8 x i16> %res
1140}
1141declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
1142
1143
1144define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
1145; CHECK-LABEL: test_x86_sse41_pblendvb:
1146; CHECK:       # BB#0:
1147; CHECK-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
1148; CHECK-NEXT:    retl
1149  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
1150  ret <16 x i8> %res
1151}
1152declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
1153
1154
1155define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
1156; CHECK-LABEL: test_x86_sse41_phminposuw:
1157; CHECK:       # BB#0:
1158; CHECK-NEXT:    vphminposuw %xmm0, %xmm0
1159; CHECK-NEXT:    retl
1160  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
1161  ret <8 x i16> %res
1162}
1163declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
1164
1165
1166define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
1167; CHECK-LABEL: test_x86_sse41_pmaxsb:
1168; CHECK:       # BB#0:
1169; CHECK-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
1170; CHECK-NEXT:    retl
1171  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1172  ret <16 x i8> %res
1173}
1174declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
1175
1176
1177define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
1178; CHECK-LABEL: test_x86_sse41_pmaxsd:
1179; CHECK:       # BB#0:
1180; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
1181; CHECK-NEXT:    retl
1182  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1183  ret <4 x i32> %res
1184}
1185declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
1186
1187
1188define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
1189; CHECK-LABEL: test_x86_sse41_pmaxud:
1190; CHECK:       # BB#0:
1191; CHECK-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
1192; CHECK-NEXT:    retl
1193  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1194  ret <4 x i32> %res
1195}
1196declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
1197
1198
1199define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
1200; CHECK-LABEL: test_x86_sse41_pmaxuw:
1201; CHECK:       # BB#0:
1202; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
1203; CHECK-NEXT:    retl
1204  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1205  ret <8 x i16> %res
1206}
1207declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
1208
1209
1210define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
1211; CHECK-LABEL: test_x86_sse41_pminsb:
1212; CHECK:       # BB#0:
1213; CHECK-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1214; CHECK-NEXT:    retl
1215  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1216  ret <16 x i8> %res
1217}
1218declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
1219
1220
1221define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
1222; CHECK-LABEL: test_x86_sse41_pminsd:
1223; CHECK:       # BB#0:
1224; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1225; CHECK-NEXT:    retl
1226  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1227  ret <4 x i32> %res
1228}
1229declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
1230
1231
1232define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
1233; CHECK-LABEL: test_x86_sse41_pminud:
1234; CHECK:       # BB#0:
1235; CHECK-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1236; CHECK-NEXT:    retl
1237  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1238  ret <4 x i32> %res
1239}
1240declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
1241
1242
1243define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
1244; CHECK-LABEL: test_x86_sse41_pminuw:
1245; CHECK:       # BB#0:
1246; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1247; CHECK-NEXT:    retl
1248  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1249  ret <8 x i16> %res
1250}
1251declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
1252
1253
1254define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
1255; CHECK-LABEL: test_x86_sse41_pmovsxbd:
1256; CHECK:       # BB#0:
1257; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
1258; CHECK-NEXT:    retl
1259  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1260  ret <4 x i32> %res
1261}
1262declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
1263
1264
1265define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
1266; CHECK-LABEL: test_x86_sse41_pmovsxbq:
1267; CHECK:       # BB#0:
1268; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
1269; CHECK-NEXT:    retl
1270  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1271  ret <2 x i64> %res
1272}
1273declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
1274
1275
1276define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
1277; CHECK-LABEL: test_x86_sse41_pmovsxbw:
1278; CHECK:       # BB#0:
1279; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
1280; CHECK-NEXT:    retl
1281  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1282  ret <8 x i16> %res
1283}
1284declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
1285
1286
1287define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
1288; CHECK-LABEL: test_x86_sse41_pmovsxdq:
1289; CHECK:       # BB#0:
1290; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
1291; CHECK-NEXT:    retl
1292  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1293  ret <2 x i64> %res
1294}
1295declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
1296
1297
1298define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
1299; CHECK-LABEL: test_x86_sse41_pmovsxwd:
1300; CHECK:       # BB#0:
1301; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
1302; CHECK-NEXT:    retl
1303  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1304  ret <4 x i32> %res
1305}
1306declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
1307
1308
1309define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
1310; CHECK-LABEL: test_x86_sse41_pmovsxwq:
1311; CHECK:       # BB#0:
1312; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0
1313; CHECK-NEXT:    retl
1314  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1315  ret <2 x i64> %res
1316}
1317declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
1318
1319
1320define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
1321; CHECK-LABEL: test_x86_sse41_pmovzxbd:
1322; CHECK:       # BB#0:
1323; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1324; CHECK-NEXT:    retl
1325  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
1326  ret <4 x i32> %res
1327}
1328declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
1329
1330
1331define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
1332; CHECK-LABEL: test_x86_sse41_pmovzxbq:
1333; CHECK:       # BB#0:
1334; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1335; CHECK-NEXT:    retl
1336  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
1337  ret <2 x i64> %res
1338}
1339declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
1340
1341
1342define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
1343; CHECK-LABEL: test_x86_sse41_pmovzxbw:
1344; CHECK:       # BB#0:
1345; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1346; CHECK-NEXT:    retl
1347  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
1348  ret <8 x i16> %res
1349}
1350declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
1351
1352
1353define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
1354; CHECK-LABEL: test_x86_sse41_pmovzxdq:
1355; CHECK:       # BB#0:
1356; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
1357; CHECK-NEXT:    retl
1358  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
1359  ret <2 x i64> %res
1360}
1361declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
1362
1363
1364define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
1365; CHECK-LABEL: test_x86_sse41_pmovzxwd:
1366; CHECK:       # BB#0:
1367; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1368; CHECK-NEXT:    retl
1369  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
1370  ret <4 x i32> %res
1371}
1372declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
1373
1374
1375define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
1376; CHECK-LABEL: test_x86_sse41_pmovzxwq:
1377; CHECK:       # BB#0:
1378; CHECK-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1379; CHECK-NEXT:    retl
1380  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
1381  ret <2 x i64> %res
1382}
1383declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
1384
1385
1386define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
1387; CHECK-LABEL: test_x86_sse41_pmuldq:
1388; CHECK:       # BB#0:
1389; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
1390; CHECK-NEXT:    retl
1391  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
1392  ret <2 x i64> %res
1393}
1394declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
1395
1396
1397define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
1398; CHECK-LABEL: test_x86_sse41_ptestc:
1399; CHECK:       # BB#0:
1400; CHECK-NEXT:    vptest %xmm1, %xmm0
1401; CHECK-NEXT:    sbbl %eax, %eax
1402; CHECK-NEXT:    andl $1, %eax
1403; CHECK-NEXT:    retl
1404  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1405  ret i32 %res
1406}
1407declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
1408
1409
1410define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
1411; CHECK-LABEL: test_x86_sse41_ptestnzc:
1412; CHECK:       # BB#0:
1413; CHECK-NEXT:    vptest %xmm1, %xmm0
1414; CHECK-NEXT:    seta %al
1415; CHECK-NEXT:    movzbl %al, %eax
1416; CHECK-NEXT:    retl
1417  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1418  ret i32 %res
1419}
1420declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
1421
1422
1423define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
1424; CHECK-LABEL: test_x86_sse41_ptestz:
1425; CHECK:       # BB#0:
1426; CHECK-NEXT:    vptest %xmm1, %xmm0
1427; CHECK-NEXT:    sete %al
1428; CHECK-NEXT:    movzbl %al, %eax
1429; CHECK-NEXT:    retl
1430  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
1431  ret i32 %res
1432}
1433declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
1434
1435
1436define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
1437; CHECK-LABEL: test_x86_sse41_round_pd:
1438; CHECK:       # BB#0:
1439; CHECK-NEXT:    vroundpd $7, %xmm0, %xmm0
1440; CHECK-NEXT:    retl
1441  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
1442  ret <2 x double> %res
1443}
1444declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
1445
1446
1447define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
1448; CHECK-LABEL: test_x86_sse41_round_ps:
1449; CHECK:       # BB#0:
1450; CHECK-NEXT:    vroundps $7, %xmm0, %xmm0
1451; CHECK-NEXT:    retl
1452  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1453  ret <4 x float> %res
1454}
1455declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
1456
1457
1458define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
1459; CHECK-LABEL: test_x86_sse41_round_sd:
1460; CHECK:       # BB#0:
1461; CHECK-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0
1462; CHECK-NEXT:    retl
1463  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
1464  ret <2 x double> %res
1465}
1466declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
1467
1468
1469define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
1470; CHECK-LABEL: test_x86_sse41_round_ss:
1471; CHECK:       # BB#0:
1472; CHECK-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0
1473; CHECK-NEXT:    retl
1474  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
1475  ret <4 x float> %res
1476}
1477declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
1478
1479
1480define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
1481; CHECK-LABEL: test_x86_sse42_pcmpestri128:
1482; CHECK:       # BB#0:
1483; CHECK-NEXT:    movl $7, %eax
1484; CHECK-NEXT:    movl $7, %edx
1485; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1486; CHECK-NEXT:    movl %ecx, %eax
1487; CHECK-NEXT:    retl
1488  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1489  ret i32 %res
1490}
1491declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1492
1493
1494define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
1495; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
1496; CHECK:       # BB#0:
1497; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1498; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1499; CHECK-NEXT:    vmovdqa (%eax), %xmm0
1500; CHECK-NEXT:    movl $7, %eax
1501; CHECK-NEXT:    movl $7, %edx
1502; CHECK-NEXT:    vpcmpestri $7, (%ecx), %xmm0
1503; CHECK-NEXT:    movl %ecx, %eax
1504; CHECK-NEXT:    retl
1505  %1 = load <16 x i8>, <16 x i8>* %a0
1506  %2 = load <16 x i8>, <16 x i8>* %a2
1507  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
1508  ret i32 %res
1509}
1510
1511
1512define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
1513; CHECK-LABEL: test_x86_sse42_pcmpestria128:
1514; CHECK:       # BB#0:
1515; CHECK-NEXT:    movl $7, %eax
1516; CHECK-NEXT:    movl $7, %edx
1517; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1518; CHECK-NEXT:    seta %al
1519; CHECK-NEXT:    movzbl %al, %eax
1520; CHECK-NEXT:    retl
1521  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1522  ret i32 %res
1523}
1524declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1525
1526
1527define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
1528; CHECK-LABEL: test_x86_sse42_pcmpestric128:
1529; CHECK:       # BB#0:
1530; CHECK-NEXT:    movl $7, %eax
1531; CHECK-NEXT:    movl $7, %edx
1532; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1533; CHECK-NEXT:    sbbl %eax, %eax
1534; CHECK-NEXT:    andl $1, %eax
1535; CHECK-NEXT:    retl
1536  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1537  ret i32 %res
1538}
1539declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1540
1541
1542define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
1543; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
1544; CHECK:       # BB#0:
1545; CHECK-NEXT:    movl $7, %eax
1546; CHECK-NEXT:    movl $7, %edx
1547; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1548; CHECK-NEXT:    seto %al
1549; CHECK-NEXT:    movzbl %al, %eax
1550; CHECK-NEXT:    retl
1551  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1552  ret i32 %res
1553}
1554declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1555
1556
1557define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
1558; CHECK-LABEL: test_x86_sse42_pcmpestris128:
1559; CHECK:       # BB#0:
1560; CHECK-NEXT:    movl $7, %eax
1561; CHECK-NEXT:    movl $7, %edx
1562; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1563; CHECK-NEXT:    sets %al
1564; CHECK-NEXT:    movzbl %al, %eax
1565; CHECK-NEXT:    retl
1566  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1567  ret i32 %res
1568}
1569declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1570
1571
1572define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
1573; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
1574; CHECK:       # BB#0:
1575; CHECK-NEXT:    movl $7, %eax
1576; CHECK-NEXT:    movl $7, %edx
1577; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
1578; CHECK-NEXT:    sete %al
1579; CHECK-NEXT:    movzbl %al, %eax
1580; CHECK-NEXT:    retl
1581  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
1582  ret i32 %res
1583}
1584declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1585
1586
1587define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
1588; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
1589; CHECK:       # BB#0:
1590; CHECK-NEXT:    movl $7, %eax
1591; CHECK-NEXT:    movl $7, %edx
1592; CHECK-NEXT:    vpcmpestrm $7, %xmm1, %xmm0
1593; CHECK-NEXT:    retl
1594  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1595  ret <16 x i8> %res
1596}
1597declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
1598
1599
1600define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
1601; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
1602; CHECK:       # BB#0:
1603; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1604; CHECK-NEXT:    movl $7, %eax
1605; CHECK-NEXT:    movl $7, %edx
1606; CHECK-NEXT:    vpcmpestrm $7, (%ecx), %xmm0
1607; CHECK-NEXT:    retl
1608  %1 = load <16 x i8>, <16 x i8>* %a2
1609  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
1610  ret <16 x i8> %res
1611}
1612
1613
1614define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
1615; CHECK-LABEL: test_x86_sse42_pcmpistri128:
1616; CHECK:       # BB#0:
1617; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1618; CHECK-NEXT:    movl %ecx, %eax
1619; CHECK-NEXT:    retl
1620  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1621  ret i32 %res
1622}
1623declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1624
1625
1626define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
1627; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
1628; CHECK:       # BB#0:
1629; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1630; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1631; CHECK-NEXT:    vmovdqa (%ecx), %xmm0
1632; CHECK-NEXT:    vpcmpistri $7, (%eax), %xmm0
1633; CHECK-NEXT:    movl %ecx, %eax
1634; CHECK-NEXT:    retl
1635  %1 = load <16 x i8>, <16 x i8>* %a0
1636  %2 = load <16 x i8>, <16 x i8>* %a1
1637  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
1638  ret i32 %res
1639}
1640
1641
1642define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
1643; CHECK-LABEL: test_x86_sse42_pcmpistria128:
1644; CHECK:       # BB#0:
1645; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1646; CHECK-NEXT:    seta %al
1647; CHECK-NEXT:    movzbl %al, %eax
1648; CHECK-NEXT:    retl
1649  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1650  ret i32 %res
1651}
1652declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1653
1654
1655define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
1656; CHECK-LABEL: test_x86_sse42_pcmpistric128:
1657; CHECK:       # BB#0:
1658; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1659; CHECK-NEXT:    sbbl %eax, %eax
1660; CHECK-NEXT:    andl $1, %eax
1661; CHECK-NEXT:    retl
1662  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1663  ret i32 %res
1664}
1665declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1666
1667
1668define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
1669; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
1670; CHECK:       # BB#0:
1671; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1672; CHECK-NEXT:    seto %al
1673; CHECK-NEXT:    movzbl %al, %eax
1674; CHECK-NEXT:    retl
1675  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1676  ret i32 %res
1677}
1678declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1679
1680
1681define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
1682; CHECK-LABEL: test_x86_sse42_pcmpistris128:
1683; CHECK:       # BB#0:
1684; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1685; CHECK-NEXT:    sets %al
1686; CHECK-NEXT:    movzbl %al, %eax
1687; CHECK-NEXT:    retl
1688  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1689  ret i32 %res
1690}
1691declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1692
1693
1694define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
1695; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
1696; CHECK:       # BB#0:
1697; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
1698; CHECK-NEXT:    sete %al
1699; CHECK-NEXT:    movzbl %al, %eax
1700; CHECK-NEXT:    retl
1701  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
1702  ret i32 %res
1703}
1704declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1705
1706
1707define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
1708; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
1709; CHECK:       # BB#0:
1710; CHECK-NEXT:    vpcmpistrm $7, %xmm1, %xmm0
1711; CHECK-NEXT:    retl
1712  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
1713  ret <16 x i8> %res
1714}
1715declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
1716
1717
1718define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
1719; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
1720; CHECK:       # BB#0:
1721; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1722; CHECK-NEXT:    vpcmpistrm $7, (%eax), %xmm0
1723; CHECK-NEXT:    retl
1724  %1 = load <16 x i8>, <16 x i8>* %a1
1725  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
1726  ret <16 x i8> %res
1727}
1728
1729
1730define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
1731; CHECK-LABEL: test_x86_sse_add_ss:
1732; CHECK:       # BB#0:
1733; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
1734; CHECK-NEXT:    retl
1735  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1736  ret <4 x float> %res
1737}
1738declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
1739
1740
1741define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
1742; CHECK-LABEL: test_x86_sse_cmp_ps:
1743; CHECK:       # BB#0:
1744; CHECK-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0
1745; CHECK-NEXT:    retl
1746  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1747  ret <4 x float> %res
1748}
1749declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
1750
1751
1752define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
1753; CHECK-LABEL: test_x86_sse_cmp_ss:
1754; CHECK:       # BB#0:
1755; CHECK-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0
1756; CHECK-NEXT:    retl
1757  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
1758  ret <4 x float> %res
1759}
1760declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
1761
1762
1763define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
1764; CHECK-LABEL: test_x86_sse_comieq_ss:
1765; CHECK:       # BB#0:
1766; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1767; CHECK-NEXT:    sete %al
1768; CHECK-NEXT:    movzbl %al, %eax
1769; CHECK-NEXT:    retl
1770  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1771  ret i32 %res
1772}
1773declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
1774
1775
1776define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
1777; CHECK-LABEL: test_x86_sse_comige_ss:
1778; CHECK:       # BB#0:
1779; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1780; CHECK-NEXT:    setae %al
1781; CHECK-NEXT:    movzbl %al, %eax
1782; CHECK-NEXT:    retl
1783  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1784  ret i32 %res
1785}
1786declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
1787
1788
1789define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
1790; CHECK-LABEL: test_x86_sse_comigt_ss:
1791; CHECK:       # BB#0:
1792; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1793; CHECK-NEXT:    seta %al
1794; CHECK-NEXT:    movzbl %al, %eax
1795; CHECK-NEXT:    retl
1796  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1797  ret i32 %res
1798}
1799declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
1800
1801
1802define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
1803; CHECK-LABEL: test_x86_sse_comile_ss:
1804; CHECK:       # BB#0:
1805; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1806; CHECK-NEXT:    setbe %al
1807; CHECK-NEXT:    movzbl %al, %eax
1808; CHECK-NEXT:    retl
1809  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1810  ret i32 %res
1811}
1812declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
1813
1814
1815define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
1816; CHECK-LABEL: test_x86_sse_comilt_ss:
1817; CHECK:       # BB#0:
1818; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1819; CHECK-NEXT:    sbbl %eax, %eax
1820; CHECK-NEXT:    andl $1, %eax
1821; CHECK-NEXT:    retl
1822  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1823  ret i32 %res
1824}
1825declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
1826
1827
1828define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
1829; CHECK-LABEL: test_x86_sse_comineq_ss:
1830; CHECK:       # BB#0:
1831; CHECK-NEXT:    vcomiss %xmm1, %xmm0
1832; CHECK-NEXT:    setne %al
1833; CHECK-NEXT:    movzbl %al, %eax
1834; CHECK-NEXT:    retl
1835  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1836  ret i32 %res
1837}
1838declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
1839
1840
1841define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
1842; CHECK-LABEL: test_x86_sse_cvtsi2ss:
1843; CHECK:       # BB#0:
1844; CHECK-NEXT:    movl $7, %eax
1845; CHECK-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
1846; CHECK-NEXT:    retl
1847  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
1848  ret <4 x float> %res
1849}
1850declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
1851
1852
1853define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
1854; CHECK-LABEL: test_x86_sse_cvtss2si:
1855; CHECK:       # BB#0:
1856; CHECK-NEXT:    vcvtss2si %xmm0, %eax
1857; CHECK-NEXT:    retl
1858  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
1859  ret i32 %res
1860}
1861declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
1862
1863
1864define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
1865; CHECK-LABEL: test_x86_sse_cvttss2si:
1866; CHECK:       # BB#0:
1867; CHECK-NEXT:    vcvttss2si %xmm0, %eax
1868; CHECK-NEXT:    retl
1869  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
1870  ret i32 %res
1871}
1872declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
1873
1874
1875define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
1876; CHECK-LABEL: test_x86_sse_div_ss:
1877; CHECK:       # BB#0:
1878; CHECK-NEXT:    vdivss %xmm1, %xmm0, %xmm0
1879; CHECK-NEXT:    retl
1880  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1881  ret <4 x float> %res
1882}
1883declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
1884
1885
1886define void @test_x86_sse_ldmxcsr(i8* %a0) {
1887; CHECK-LABEL: test_x86_sse_ldmxcsr:
1888; CHECK:       # BB#0:
1889; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1890; CHECK-NEXT:    vldmxcsr (%eax)
1891; CHECK-NEXT:    retl
1892  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
1893  ret void
1894}
1895declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
1896
1897
1898
1899define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
1900; CHECK-LABEL: test_x86_sse_max_ps:
1901; CHECK:       # BB#0:
1902; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
1903; CHECK-NEXT:    retl
1904  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1905  ret <4 x float> %res
1906}
1907declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1908
1909
1910define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
1911; CHECK-LABEL: test_x86_sse_max_ss:
1912; CHECK:       # BB#0:
1913; CHECK-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
1914; CHECK-NEXT:    retl
1915  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1916  ret <4 x float> %res
1917}
1918declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1919
1920
1921define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
1922; CHECK-LABEL: test_x86_sse_min_ps:
1923; CHECK:       # BB#0:
1924; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0
1925; CHECK-NEXT:    retl
1926  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1927  ret <4 x float> %res
1928}
1929declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1930
1931
1932define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
1933; CHECK-LABEL: test_x86_sse_min_ss:
1934; CHECK:       # BB#0:
1935; CHECK-NEXT:    vminss %xmm1, %xmm0, %xmm0
1936; CHECK-NEXT:    retl
1937  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1938  ret <4 x float> %res
1939}
1940declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1941
1942
1943define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
1944; CHECK-LABEL: test_x86_sse_movmsk_ps:
1945; CHECK:       # BB#0:
1946; CHECK-NEXT:    vmovmskps %xmm0, %eax
1947; CHECK-NEXT:    retl
1948  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
1949  ret i32 %res
1950}
1951declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1952
1953
1954
1955define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
1956; CHECK-LABEL: test_x86_sse_mul_ss:
1957; CHECK:       # BB#0:
1958; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
1959; CHECK-NEXT:    retl
1960  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
1961  ret <4 x float> %res
1962}
1963declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
1964
1965
1966define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
1967; CHECK-LABEL: test_x86_sse_rcp_ps:
1968; CHECK:       # BB#0:
1969; CHECK-NEXT:    vrcpps %xmm0, %xmm0
1970; CHECK-NEXT:    retl
1971  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1972  ret <4 x float> %res
1973}
1974declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
1975
1976
1977define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
1978; CHECK-LABEL: test_x86_sse_rcp_ss:
1979; CHECK:       # BB#0:
1980; CHECK-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
1981; CHECK-NEXT:    retl
1982  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1983  ret <4 x float> %res
1984}
1985declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
1986
1987
1988define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
1989; CHECK-LABEL: test_x86_sse_rsqrt_ps:
1990; CHECK:       # BB#0:
1991; CHECK-NEXT:    vrsqrtps %xmm0, %xmm0
1992; CHECK-NEXT:    retl
1993  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
1994  ret <4 x float> %res
1995}
1996declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
1997
1998
1999define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
2000; CHECK-LABEL: test_x86_sse_rsqrt_ss:
2001; CHECK:       # BB#0:
2002; CHECK-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
2003; CHECK-NEXT:    retl
2004  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2005  ret <4 x float> %res
2006}
2007declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
2008
2009
2010define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
2011; CHECK-LABEL: test_x86_sse_sqrt_ps:
2012; CHECK:       # BB#0:
2013; CHECK-NEXT:    vsqrtps %xmm0, %xmm0
2014; CHECK-NEXT:    retl
2015  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2016  ret <4 x float> %res
2017}
2018declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
2019
2020
2021define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
2022; CHECK-LABEL: test_x86_sse_sqrt_ss:
2023; CHECK:       # BB#0:
2024; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
2025; CHECK-NEXT:    retl
2026  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
2027  ret <4 x float> %res
2028}
2029declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
2030
2031
2032define void @test_x86_sse_stmxcsr(i8* %a0) {
2033; CHECK-LABEL: test_x86_sse_stmxcsr:
2034; CHECK:       # BB#0:
2035; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2036; CHECK-NEXT:    vstmxcsr (%eax)
2037; CHECK-NEXT:    retl
2038  call void @llvm.x86.sse.stmxcsr(i8* %a0)
2039  ret void
2040}
2041declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
2042
2043
2044define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
2045; CHECK-LABEL: test_x86_sse_storeu_ps:
2046; CHECK:       # BB#0:
2047; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2048; CHECK-NEXT:    vmovups %xmm0, (%eax)
2049; CHECK-NEXT:    retl
2050  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
2051  ret void
2052}
2053declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
2054
2055
2056define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
2057; CHECK-LABEL: test_x86_sse_sub_ss:
2058; CHECK:       # BB#0:
2059; CHECK-NEXT:    vsubss %xmm1, %xmm0, %xmm0
2060; CHECK-NEXT:    retl
2061  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2062  ret <4 x float> %res
2063}
2064declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
2065
2066
2067define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
2068; CHECK-LABEL: test_x86_sse_ucomieq_ss:
2069; CHECK:       # BB#0:
2070; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2071; CHECK-NEXT:    sete %al
2072; CHECK-NEXT:    movzbl %al, %eax
2073; CHECK-NEXT:    retl
2074  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2075  ret i32 %res
2076}
2077declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
2078
2079
2080define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
2081; CHECK-LABEL: test_x86_sse_ucomige_ss:
2082; CHECK:       # BB#0:
2083; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2084; CHECK-NEXT:    setae %al
2085; CHECK-NEXT:    movzbl %al, %eax
2086; CHECK-NEXT:    retl
2087  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2088  ret i32 %res
2089}
2090declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
2091
2092
2093define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
2094; CHECK-LABEL: test_x86_sse_ucomigt_ss:
2095; CHECK:       # BB#0:
2096; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2097; CHECK-NEXT:    seta %al
2098; CHECK-NEXT:    movzbl %al, %eax
2099; CHECK-NEXT:    retl
2100  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2101  ret i32 %res
2102}
2103declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
2104
2105
2106define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
2107; CHECK-LABEL: test_x86_sse_ucomile_ss:
2108; CHECK:       # BB#0:
2109; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2110; CHECK-NEXT:    setbe %al
2111; CHECK-NEXT:    movzbl %al, %eax
2112; CHECK-NEXT:    retl
2113  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2114  ret i32 %res
2115}
2116declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
2117
2118
2119define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
2120; CHECK-LABEL: test_x86_sse_ucomilt_ss:
2121; CHECK:       # BB#0:
2122; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2123; CHECK-NEXT:    sbbl %eax, %eax
2124; CHECK-NEXT:    andl $1, %eax
2125; CHECK-NEXT:    retl
2126  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2127  ret i32 %res
2128}
2129declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
2130
2131
2132define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
2133; CHECK-LABEL: test_x86_sse_ucomineq_ss:
2134; CHECK:       # BB#0:
2135; CHECK-NEXT:    vucomiss %xmm1, %xmm0
2136; CHECK-NEXT:    setne %al
2137; CHECK-NEXT:    movzbl %al, %eax
2138; CHECK-NEXT:    retl
2139  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
2140  ret i32 %res
2141}
2142declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
2143
2144
2145define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
2146; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
2147; CHECK:       # BB#0:
2148; CHECK-NEXT:    vpabsb %xmm0, %xmm0
2149; CHECK-NEXT:    retl
2150  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
2151  ret <16 x i8> %res
2152}
2153declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
2154
2155
2156define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
2157; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
2158; CHECK:       # BB#0:
2159; CHECK-NEXT:    vpabsd %xmm0, %xmm0
2160; CHECK-NEXT:    retl
2161  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
2162  ret <4 x i32> %res
2163}
2164declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
2165
2166
2167define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
2168; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
2169; CHECK:       # BB#0:
2170; CHECK-NEXT:    vpabsw %xmm0, %xmm0
2171; CHECK-NEXT:    retl
2172  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
2173  ret <8 x i16> %res
2174}
2175declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
2176
2177
2178define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2179; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
2180; CHECK:       # BB#0:
2181; CHECK-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
2182; CHECK-NEXT:    retl
2183  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2184  ret <4 x i32> %res
2185}
2186declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2187
2188
2189define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2190; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
2191; CHECK:       # BB#0:
2192; CHECK-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0
2193; CHECK-NEXT:    retl
2194  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2195  ret <8 x i16> %res
2196}
2197declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2198
2199
2200define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2201; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
2202; CHECK:       # BB#0:
2203; CHECK-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
2204; CHECK-NEXT:    retl
2205  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2206  ret <8 x i16> %res
2207}
2208declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2209
2210
2211define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2212; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
2213; CHECK:       # BB#0:
2214; CHECK-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
2215; CHECK-NEXT:    retl
2216  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2217  ret <4 x i32> %res
2218}
2219declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2220
2221
2222define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2223; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
2224; CHECK:       # BB#0:
2225; CHECK-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0
2226; CHECK-NEXT:    retl
2227  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2228  ret <8 x i16> %res
2229}
2230declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2231
2232
2233define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2234; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
2235; CHECK:       # BB#0:
2236; CHECK-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
2237; CHECK-NEXT:    retl
2238  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2239  ret <8 x i16> %res
2240}
2241declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2242
2243
2244define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
2245; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
2246; CHECK:       # BB#0:
2247; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0
2248; CHECK-NEXT:    retl
2249  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
2250  ret <8 x i16> %res
2251}
2252declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
2253
2254
2255define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
2256; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
2257; CHECK:       # BB#0:
2258; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0
2259; CHECK-NEXT:    retl
2260  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2261  ret <8 x i16> %res
2262}
2263declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
2264
2265
2266define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
2267; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
2268; CHECK:       # BB#0:
2269; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
2270; CHECK-NEXT:    retl
2271  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
2272  ret <16 x i8> %res
2273}
2274declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
2275
2276
2277define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
2278; CHECK-LABEL: test_x86_ssse3_psign_b_128:
2279; CHECK:       # BB#0:
2280; CHECK-NEXT:    vpsignb %xmm1, %xmm0, %xmm0
2281; CHECK-NEXT:    retl
2282  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
2283  ret <16 x i8> %res
2284}
2285declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
2286
2287
2288define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
2289; CHECK-LABEL: test_x86_ssse3_psign_d_128:
2290; CHECK:       # BB#0:
2291; CHECK-NEXT:    vpsignd %xmm1, %xmm0, %xmm0
2292; CHECK-NEXT:    retl
2293  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
2294  ret <4 x i32> %res
2295}
2296declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
2297
2298
2299define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
2300; CHECK-LABEL: test_x86_ssse3_psign_w_128:
2301; CHECK:       # BB#0:
2302; CHECK-NEXT:    vpsignw %xmm1, %xmm0, %xmm0
2303; CHECK-NEXT:    retl
2304  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
2305  ret <8 x i16> %res
2306}
2307declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
2308
2309
2310define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
2311; CHECK-LABEL: test_x86_avx_addsub_pd_256:
2312; CHECK:       # BB#0:
2313; CHECK-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
2314; CHECK-NEXT:    retl
2315  %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2316  ret <4 x double> %res
2317}
2318declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
2319
2320
2321define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
2322; CHECK-LABEL: test_x86_avx_addsub_ps_256:
2323; CHECK:       # BB#0:
2324; CHECK-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
2325; CHECK-NEXT:    retl
2326  %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2327  ret <8 x float> %res
2328}
2329declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2330
2331
2332define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
2333; CHECK-LABEL: test_x86_avx_blendv_pd_256:
2334; CHECK:       # BB#0:
2335; CHECK-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
2336; CHECK-NEXT:    retl
2337  %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
2338  ret <4 x double> %res
2339}
2340declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
2341
2342
2343define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
2344; CHECK-LABEL: test_x86_avx_blendv_ps_256:
2345; CHECK:       # BB#0:
2346; CHECK-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
2347; CHECK-NEXT:    retl
2348  %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
2349  ret <8 x float> %res
2350}
2351declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
2352
2353
2354define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
2355; CHECK-LABEL: test_x86_avx_cmp_pd_256:
2356; CHECK:       # BB#0:
2357; CHECK-NEXT:    vcmpordpd %ymm1, %ymm0, %ymm0
2358; CHECK-NEXT:    retl
2359  %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2360  ret <4 x double> %res
2361}
2362declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2363
2364
2365define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
2366; CHECK-LABEL: test_x86_avx_cmp_ps_256:
2367; CHECK:       # BB#0:
2368; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm0
2369; CHECK-NEXT:    retl
2370  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2371  ret <8 x float> %res
2372}
2373
2374define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
2375; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
2376; CHECK:       # BB#0:
2377; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1
2378; CHECK-NEXT:    vcmpltps %ymm1, %ymm0, %ymm1
2379; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %ymm1
2380; CHECK-NEXT:    vcmpunordps %ymm1, %ymm0, %ymm1
2381; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm1
2382; CHECK-NEXT:    vcmpnltps %ymm1, %ymm0, %ymm1
2383; CHECK-NEXT:    vcmpnleps %ymm1, %ymm0, %ymm1
2384; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm1
2385; CHECK-NEXT:    vcmpeq_uqps %ymm1, %ymm0, %ymm1
2386; CHECK-NEXT:    vcmpngeps %ymm1, %ymm0, %ymm1
2387; CHECK-NEXT:    vcmpngtps %ymm1, %ymm0, %ymm1
2388; CHECK-NEXT:    vcmpfalseps %ymm1, %ymm0, %ymm1
2389; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %ymm1
2390; CHECK-NEXT:    vcmpgeps %ymm1, %ymm0, %ymm1
2391; CHECK-NEXT:    vcmpgtps %ymm1, %ymm0, %ymm1
2392; CHECK-NEXT:    vcmptrueps %ymm1, %ymm0, %ymm1
2393; CHECK-NEXT:    vcmpeq_osps %ymm1, %ymm0, %ymm1
2394; CHECK-NEXT:    vcmplt_oqps %ymm1, %ymm0, %ymm1
2395; CHECK-NEXT:    vcmple_oqps %ymm1, %ymm0, %ymm1
2396; CHECK-NEXT:    vcmpunord_sps %ymm1, %ymm0, %ymm1
2397; CHECK-NEXT:    vcmpneq_usps %ymm1, %ymm0, %ymm1
2398; CHECK-NEXT:    vcmpnlt_uqps %ymm1, %ymm0, %ymm1
2399; CHECK-NEXT:    vcmpnle_uqps %ymm1, %ymm0, %ymm1
2400; CHECK-NEXT:    vcmpord_sps %ymm1, %ymm0, %ymm1
2401; CHECK-NEXT:    vcmpeq_usps %ymm1, %ymm0, %ymm1
2402; CHECK-NEXT:    vcmpnge_uqps %ymm1, %ymm0, %ymm1
2403; CHECK-NEXT:    vcmpngt_uqps %ymm1, %ymm0, %ymm1
2404; CHECK-NEXT:    vcmpfalse_osps %ymm1, %ymm0, %ymm1
2405; CHECK-NEXT:    vcmpneq_osps %ymm1, %ymm0, %ymm1
2406; CHECK-NEXT:    vcmpge_oqps %ymm1, %ymm0, %ymm1
2407; CHECK-NEXT:    vcmpgt_oqps %ymm1, %ymm0, %ymm1
2408; CHECK-NEXT:    vcmptrue_usps %ymm1, %ymm0, %ymm0
2409; CHECK-NEXT:    retl
2410  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
2411  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
2412  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
2413  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
2414  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
2415  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
2416  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
2417  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
2418  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
2419  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
2420  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
2421  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
2422  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
2423  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
2424  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
2425  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
2426  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
2427  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
2428  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
2429  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
2430  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
2431  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
2432  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
2433  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
2434  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
2435  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
2436  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
2437  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
2438  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
2439  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
2440  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
2441  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
2442  ret <8 x float> %res
2443}
2444declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2445
2446
2447define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
2448; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
2449; CHECK:       # BB#0:
2450; CHECK-NEXT:    vcvtpd2psy %ymm0, %xmm0
2451; CHECK-NEXT:    vzeroupper
2452; CHECK-NEXT:    retl
2453  %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
2454  ret <4 x float> %res
2455}
2456declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
2457
2458
2459define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
2460; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
2461; CHECK:       # BB#0:
2462; CHECK-NEXT:    vcvtpd2dqy %ymm0, %xmm0
2463; CHECK-NEXT:    vzeroupper
2464; CHECK-NEXT:    retl
2465  %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
2466  ret <4 x i32> %res
2467}
2468declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
2469
2470
2471define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
2472; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
2473; CHECK:       # BB#0:
2474; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
2475; CHECK-NEXT:    retl
2476  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
2477  ret <4 x double> %res
2478}
2479declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
2480
2481
2482define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
2483; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
2484; CHECK:       # BB#0:
2485; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0
2486; CHECK-NEXT:    retl
2487  %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
2488  ret <8 x i32> %res
2489}
2490declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
2491
2492
2493define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
2494; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
2495; CHECK:       # BB#0:
2496; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
2497; CHECK-NEXT:    retl
2498  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
2499  ret <4 x double> %res
2500}
2501declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
2502
2503
2504define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
2505; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
2506; CHECK:       # BB#0:
2507; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
2508; CHECK-NEXT:    retl
2509  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
2510  ret <8 x float> %res
2511}
2512declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
2513
2514
2515define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
2516; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
2517; CHECK:       # BB#0:
2518; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
2519; CHECK-NEXT:    vzeroupper
2520; CHECK-NEXT:    retl
2521  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
2522  ret <4 x i32> %res
2523}
2524declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
2525
2526
2527define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
2528; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
2529; CHECK:       # BB#0:
2530; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
2531; CHECK-NEXT:    retl
2532  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
2533  ret <8 x i32> %res
2534}
2535declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
2536
2537
2538define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
2539; CHECK-LABEL: test_x86_avx_dp_ps_256:
2540; CHECK:       # BB#0:
2541; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0
2542; CHECK-NEXT:    retl
2543  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2544  ret <8 x float> %res
2545}
2546declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2547
2548
2549define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
2550; CHECK-LABEL: test_x86_avx_hadd_pd_256:
2551; CHECK:       # BB#0:
2552; CHECK-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
2553; CHECK-NEXT:    retl
2554  %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2555  ret <4 x double> %res
2556}
2557declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
2558
2559
2560define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
2561; CHECK-LABEL: test_x86_avx_hadd_ps_256:
2562; CHECK:       # BB#0:
2563; CHECK-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
2564; CHECK-NEXT:    retl
2565  %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2566  ret <8 x float> %res
2567}
2568declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
2569
2570
2571define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
2572; CHECK-LABEL: test_x86_avx_hsub_pd_256:
2573; CHECK:       # BB#0:
2574; CHECK-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
2575; CHECK-NEXT:    retl
2576  %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2577  ret <4 x double> %res
2578}
2579declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
2580
2581
2582define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
2583; CHECK-LABEL: test_x86_avx_hsub_ps_256:
2584; CHECK:       # BB#0:
2585; CHECK-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
2586; CHECK-NEXT:    retl
2587  %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2588  ret <8 x float> %res
2589}
2590declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
2591
2592
2593define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
2594; CHECK-LABEL: test_x86_avx_ldu_dq_256:
2595; CHECK:       # BB#0:
2596; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2597; CHECK-NEXT:    vlddqu (%eax), %ymm0
2598; CHECK-NEXT:    retl
2599  %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
2600  ret <32 x i8> %res
2601}
2602declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
2603
2604
2605define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
2606; CHECK-LABEL: test_x86_avx_maskload_pd:
2607; CHECK:       # BB#0:
2608; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2609; CHECK-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0
2610; CHECK-NEXT:    retl
2611  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
2612  ret <2 x double> %res
2613}
2614declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
2615
2616
2617define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
2618; CHECK-LABEL: test_x86_avx_maskload_pd_256:
2619; CHECK:       # BB#0:
2620; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2621; CHECK-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0
2622; CHECK-NEXT:    retl
2623  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2624  ret <4 x double> %res
2625}
2626declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
2627
2628
2629define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
2630; CHECK-LABEL: test_x86_avx_maskload_ps:
2631; CHECK:       # BB#0:
2632; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2633; CHECK-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0
2634; CHECK-NEXT:    retl
2635  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2636  ret <4 x float> %res
2637}
2638declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
2639
2640
2641define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
2642; CHECK-LABEL: test_x86_avx_maskload_ps_256:
2643; CHECK:       # BB#0:
2644; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2645; CHECK-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0
2646; CHECK-NEXT:    retl
2647  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2648  ret <8 x float> %res
2649}
2650declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
2651
2652
2653define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
2654; CHECK-LABEL: test_x86_avx_maskstore_pd:
2655; CHECK:       # BB#0:
2656; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2657; CHECK-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax)
2658; CHECK-NEXT:    retl
2659  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
2660  ret void
2661}
2662declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
2663
2664
2665define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
2666; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
2667; CHECK:       # BB#0:
2668; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2669; CHECK-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax)
2670; CHECK-NEXT:    vzeroupper
2671; CHECK-NEXT:    retl
2672  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
2673  ret void
2674}
2675declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
2676
2677
2678define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
2679; CHECK-LABEL: test_x86_avx_maskstore_ps:
2680; CHECK:       # BB#0:
2681; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2682; CHECK-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax)
2683; CHECK-NEXT:    retl
2684  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
2685  ret void
2686}
2687declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
2688
2689
2690define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
2691; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
2692; CHECK:       # BB#0:
2693; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2694; CHECK-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax)
2695; CHECK-NEXT:    vzeroupper
2696; CHECK-NEXT:    retl
2697  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
2698  ret void
2699}
2700declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
2701
2702
2703define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
2704; CHECK-LABEL: test_x86_avx_max_pd_256:
2705; CHECK:       # BB#0:
2706; CHECK-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
2707; CHECK-NEXT:    retl
2708  %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2709  ret <4 x double> %res
2710}
2711declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
2712
2713
2714define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
2715; CHECK-LABEL: test_x86_avx_max_ps_256:
2716; CHECK:       # BB#0:
2717; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
2718; CHECK-NEXT:    retl
2719  %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2720  ret <8 x float> %res
2721}
2722declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
2723
2724
2725define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
2726; CHECK-LABEL: test_x86_avx_min_pd_256:
2727; CHECK:       # BB#0:
2728; CHECK-NEXT:    vminpd %ymm1, %ymm0, %ymm0
2729; CHECK-NEXT:    retl
2730  %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2731  ret <4 x double> %res
2732}
2733declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
2734
2735
2736define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
2737; CHECK-LABEL: test_x86_avx_min_ps_256:
2738; CHECK:       # BB#0:
2739; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0
2740; CHECK-NEXT:    retl
2741  %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2742  ret <8 x float> %res
2743}
2744declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
2745
2746
2747define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
2748; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
2749; CHECK:       # BB#0:
2750; CHECK-NEXT:    vmovmskpd %ymm0, %eax
2751; CHECK-NEXT:    vzeroupper
2752; CHECK-NEXT:    retl
2753  %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
2754  ret i32 %res
2755}
2756declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
2757
2758
2759define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
2760; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
2761; CHECK:       # BB#0:
2762; CHECK-NEXT:    vmovmskps %ymm0, %eax
2763; CHECK-NEXT:    vzeroupper
2764; CHECK-NEXT:    retl
2765  %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
2766  ret i32 %res
2767}
2768declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
2769
2770
2771
2772
2773
2774
2775
2776define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
2777; CHECK-LABEL: test_x86_avx_ptestc_256:
2778; CHECK:       # BB#0:
2779; CHECK-NEXT:    vptest %ymm1, %ymm0
2780; CHECK-NEXT:    sbbl %eax, %eax
2781; CHECK-NEXT:    andl $1, %eax
2782; CHECK-NEXT:    vzeroupper
2783; CHECK-NEXT:    retl
2784  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2785  ret i32 %res
2786}
2787declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
2788
2789
2790define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
2791; CHECK-LABEL: test_x86_avx_ptestnzc_256:
2792; CHECK:       # BB#0:
2793; CHECK-NEXT:    vptest %ymm1, %ymm0
2794; CHECK-NEXT:    seta %al
2795; CHECK-NEXT:    movzbl %al, %eax
2796; CHECK-NEXT:    vzeroupper
2797; CHECK-NEXT:    retl
2798  %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2799  ret i32 %res
2800}
2801declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
2802
2803
2804define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
2805; CHECK-LABEL: test_x86_avx_ptestz_256:
2806; CHECK:       # BB#0:
2807; CHECK-NEXT:    vptest %ymm1, %ymm0
2808; CHECK-NEXT:    sete %al
2809; CHECK-NEXT:    movzbl %al, %eax
2810; CHECK-NEXT:    vzeroupper
2811; CHECK-NEXT:    retl
2812  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
2813  ret i32 %res
2814}
2815declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
2816
2817
2818define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
2819; CHECK-LABEL: test_x86_avx_rcp_ps_256:
2820; CHECK:       # BB#0:
2821; CHECK-NEXT:    vrcpps %ymm0, %ymm0
2822; CHECK-NEXT:    retl
2823  %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2824  ret <8 x float> %res
2825}
2826declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
2827
2828
2829define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
2830; CHECK-LABEL: test_x86_avx_round_pd_256:
2831; CHECK:       # BB#0:
2832; CHECK-NEXT:    vroundpd $7, %ymm0, %ymm0
2833; CHECK-NEXT:    retl
2834  %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
2835  ret <4 x double> %res
2836}
2837declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
2838
2839
2840define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
2841; CHECK-LABEL: test_x86_avx_round_ps_256:
2842; CHECK:       # BB#0:
2843; CHECK-NEXT:    vroundps $7, %ymm0, %ymm0
2844; CHECK-NEXT:    retl
2845  %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
2846  ret <8 x float> %res
2847}
2848declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
2849
2850
2851define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
2852; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
2853; CHECK:       # BB#0:
2854; CHECK-NEXT:    vrsqrtps %ymm0, %ymm0
2855; CHECK-NEXT:    retl
2856  %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2857  ret <8 x float> %res
2858}
2859declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
2860
2861
2862define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
2863; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
2864; CHECK:       # BB#0:
2865; CHECK-NEXT:    vsqrtpd %ymm0, %ymm0
2866; CHECK-NEXT:    retl
2867  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
2868  ret <4 x double> %res
2869}
2870declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
2871
2872
2873define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
2874; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
2875; CHECK:       # BB#0:
2876; CHECK-NEXT:    vsqrtps %ymm0, %ymm0
2877; CHECK-NEXT:    retl
2878  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
2879  ret <8 x float> %res
2880}
2881declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
2882
2883
2884define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
2885  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
2886  ; add operation forces the execution domain.
2887; CHECK-LABEL: test_x86_avx_storeu_dq_256:
2888; CHECK:       # BB#0:
2889; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2890; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
2891; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2892; CHECK-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
2893; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
2894; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2895; CHECK-NEXT:    vmovups %ymm0, (%eax)
2896; CHECK-NEXT:    vzeroupper
2897; CHECK-NEXT:    retl
2898  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2899  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
2900  ret void
2901}
2902declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
2903
2904
2905define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
2906  ; add operation forces the execution domain.
2907; CHECK-LABEL: test_x86_avx_storeu_pd_256:
2908; CHECK:       # BB#0:
2909; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2910; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
2911; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
2912; CHECK-NEXT:    vmovupd %ymm0, (%eax)
2913; CHECK-NEXT:    vzeroupper
2914; CHECK-NEXT:    retl
2915  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
2916  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
2917  ret void
2918}
2919declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
2920
2921
2922define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
2923; CHECK-LABEL: test_x86_avx_storeu_ps_256:
2924; CHECK:       # BB#0:
2925; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2926; CHECK-NEXT:    vmovups %ymm0, (%eax)
2927; CHECK-NEXT:    vzeroupper
2928; CHECK-NEXT:    retl
2929  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
2930  ret void
2931}
2932declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
2933
2934
2935define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
2936; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
2937; CHECK:       # BB#0:
2938; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2939; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
2940; CHECK-NEXT:    retl
2941  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2942  ret <4 x double> %res
2943}
2944declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
2945
2946
2947define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
2948; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
2949; CHECK:       # BB#0:
2950; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
2951; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
2952; CHECK-NEXT:    retl
2953  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
2954  ret <8 x float> %res
2955}
2956declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
2957
2958
2959define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
2960; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
2961; CHECK:       # BB#0:
2962; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2963; CHECK-NEXT:    retl
2964  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
2965  ret <4 x double> %res
2966}
2967declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
2968
2969
2970define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
2971; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
2972; CHECK:       # BB#0:
2973; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2974; CHECK-NEXT:    retl
2975  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
2976  ret <8 x float> %res
2977}
2978declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
2979
2980
2981define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
2982; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
2983; CHECK:       # BB#0:
2984; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
2985; CHECK-NEXT:    retl
2986  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
2987  ret <8 x i32> %res
2988}
2989declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
2990
2991
2992define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
2993; CHECK-LABEL: test_x86_avx_vpermil_pd:
2994; CHECK:       # BB#0:
2995; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
2996; CHECK-NEXT:    retl
2997  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
2998  ret <2 x double> %res
2999}
3000declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
3001
3002
3003define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
3004; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
3005; CHECK:       # BB#0:
3006; CHECK-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
3007; CHECK-NEXT:    retl
3008  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
3009  ret <4 x double> %res
3010}
3011declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
3012
3013
3014define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
3015; CHECK-LABEL: test_x86_avx_vpermil_ps:
3016; CHECK:       # BB#0:
3017; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
3018; CHECK-NEXT:    retl
3019  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
3020  ret <4 x float> %res
3021}
3022declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
3023
3024
3025define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
3026; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
3027; CHECK:       # BB#0:
3028; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
3029; CHECK-NEXT:    retl
3030  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
3031  ret <8 x float> %res
3032}
3033declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
3034
3035
3036define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
3037; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
3038; CHECK:       # BB#0:
3039; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
3040; CHECK-NEXT:    retl
3041  %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
3042  ret <2 x double> %res
3043}
3044declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
3045
3046
3047define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
3048; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
3049; CHECK:       # BB#0:
3050; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0
3051; CHECK-NEXT:    retl
3052  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
3053  ret <4 x double> %res
3054}
3055declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
3056
3057
3058define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
3059; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
3060; CHECK:       # BB#0:
3061; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
3062; CHECK-NEXT:    retl
3063  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
3064  ret <4 x float> %res
3065}
3066define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
3067; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
3068; CHECK:       # BB#0:
3069; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3070; CHECK-NEXT:    vpermilps (%eax), %xmm0, %xmm0
3071; CHECK-NEXT:    retl
3072  %a2 = load <4 x i32>, <4 x i32>* %a1
3073  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
3074  ret <4 x float> %res
3075}
3076declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
3077
3078
3079define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
3080; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
3081; CHECK:       # BB#0:
3082; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm0
3083; CHECK-NEXT:    retl
3084  %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
3085  ret <8 x float> %res
3086}
3087declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
3088
3089
3090define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
3091; CHECK-LABEL: test_x86_avx_vtestc_pd:
3092; CHECK:       # BB#0:
3093; CHECK-NEXT:    vtestpd %xmm1, %xmm0
3094; CHECK-NEXT:    sbbl %eax, %eax
3095; CHECK-NEXT:    andl $1, %eax
3096; CHECK-NEXT:    retl
3097  %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3098  ret i32 %res
3099}
3100declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
3101
3102
3103define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
3104; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
3105; CHECK:       # BB#0:
3106; CHECK-NEXT:    vtestpd %ymm1, %ymm0
3107; CHECK-NEXT:    sbbl %eax, %eax
3108; CHECK-NEXT:    andl $1, %eax
3109; CHECK-NEXT:    vzeroupper
3110; CHECK-NEXT:    retl
3111  %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3112  ret i32 %res
3113}
3114declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
3115
3116
3117define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
3118; CHECK-LABEL: test_x86_avx_vtestc_ps:
3119; CHECK:       # BB#0:
3120; CHECK-NEXT:    vtestps %xmm1, %xmm0
3121; CHECK-NEXT:    sbbl %eax, %eax
3122; CHECK-NEXT:    andl $1, %eax
3123; CHECK-NEXT:    retl
3124  %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3125  ret i32 %res
3126}
3127declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
3128
3129
3130define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
3131; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
3132; CHECK:       # BB#0:
3133; CHECK-NEXT:    vtestps %ymm1, %ymm0
3134; CHECK-NEXT:    sbbl %eax, %eax
3135; CHECK-NEXT:    andl $1, %eax
3136; CHECK-NEXT:    vzeroupper
3137; CHECK-NEXT:    retl
3138  %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3139  ret i32 %res
3140}
3141declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
3142
3143
3144define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
3145; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
3146; CHECK:       # BB#0:
3147; CHECK-NEXT:    vtestpd %xmm1, %xmm0
3148; CHECK-NEXT:    seta %al
3149; CHECK-NEXT:    movzbl %al, %eax
3150; CHECK-NEXT:    retl
3151  %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3152  ret i32 %res
3153}
3154declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
3155
3156
3157define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
3158; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
3159; CHECK:       # BB#0:
3160; CHECK-NEXT:    vtestpd %ymm1, %ymm0
3161; CHECK-NEXT:    seta %al
3162; CHECK-NEXT:    movzbl %al, %eax
3163; CHECK-NEXT:    vzeroupper
3164; CHECK-NEXT:    retl
3165  %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3166  ret i32 %res
3167}
3168declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
3169
3170
3171define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
3172; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
3173; CHECK:       # BB#0:
3174; CHECK-NEXT:    vtestps %xmm1, %xmm0
3175; CHECK-NEXT:    seta %al
3176; CHECK-NEXT:    movzbl %al, %eax
3177; CHECK-NEXT:    retl
3178  %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3179  ret i32 %res
3180}
3181declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
3182
3183
3184define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
3185; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
3186; CHECK:       # BB#0:
3187; CHECK-NEXT:    vtestps %ymm1, %ymm0
3188; CHECK-NEXT:    seta %al
3189; CHECK-NEXT:    movzbl %al, %eax
3190; CHECK-NEXT:    vzeroupper
3191; CHECK-NEXT:    retl
3192  %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3193  ret i32 %res
3194}
3195declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
3196
3197
3198define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
3199; CHECK-LABEL: test_x86_avx_vtestz_pd:
3200; CHECK:       # BB#0:
3201; CHECK-NEXT:    vtestpd %xmm1, %xmm0
3202; CHECK-NEXT:    sete %al
3203; CHECK-NEXT:    movzbl %al, %eax
3204; CHECK-NEXT:    retl
3205  %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
3206  ret i32 %res
3207}
3208declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
3209
3210
3211define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
3212; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
3213; CHECK:       # BB#0:
3214; CHECK-NEXT:    vtestpd %ymm1, %ymm0
3215; CHECK-NEXT:    sete %al
3216; CHECK-NEXT:    movzbl %al, %eax
3217; CHECK-NEXT:    vzeroupper
3218; CHECK-NEXT:    retl
3219  %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
3220  ret i32 %res
3221}
3222declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
3223
3224
3225define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
3226; CHECK-LABEL: test_x86_avx_vtestz_ps:
3227; CHECK:       # BB#0:
3228; CHECK-NEXT:    vtestps %xmm1, %xmm0
3229; CHECK-NEXT:    sete %al
3230; CHECK-NEXT:    movzbl %al, %eax
3231; CHECK-NEXT:    retl
3232  %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
3233  ret i32 %res
3234}
3235declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
3236
3237
3238define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
3239; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
3240; CHECK:       # BB#0:
3241; CHECK-NEXT:    vtestps %ymm1, %ymm0
3242; CHECK-NEXT:    sete %al
3243; CHECK-NEXT:    movzbl %al, %eax
3244; CHECK-NEXT:    vzeroupper
3245; CHECK-NEXT:    retl
3246  %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
3247  ret i32 %res
3248}
3249declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
3250
3251
3252define void @test_x86_avx_vzeroall() {
3253; CHECK-LABEL: test_x86_avx_vzeroall:
3254; CHECK:       # BB#0:
3255; CHECK-NEXT:    vzeroall
3256; CHECK-NEXT:    vzeroupper
3257; CHECK-NEXT:    retl
3258  call void @llvm.x86.avx.vzeroall()
3259  ret void
3260}
3261declare void @llvm.x86.avx.vzeroall() nounwind
3262
3263
3264define void @test_x86_avx_vzeroupper() {
3265; CHECK-LABEL: test_x86_avx_vzeroupper:
3266; CHECK:       # BB#0:
3267; CHECK-NEXT:    vzeroupper
3268; CHECK-NEXT:    vzeroupper
3269; CHECK-NEXT:    retl
3270  call void @llvm.x86.avx.vzeroupper()
3271  ret void
3272}
3273declare void @llvm.x86.avx.vzeroupper() nounwind
3274
3275; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
3276
3277define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
3278; CHECK-LABEL: monitor:
3279; CHECK:       # BB#0:
3280; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
3281; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3282; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3283; CHECK-NEXT:    leal (%eax), %eax
3284; CHECK-NEXT:    monitor
3285; CHECK-NEXT:    retl
3286  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
3287  ret void
3288}
3289declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
3290
3291define void @mwait(i32 %E, i32 %H) nounwind {
3292; CHECK-LABEL: mwait:
3293; CHECK:       # BB#0:
3294; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
3295; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3296; CHECK-NEXT:    mwait
3297; CHECK-NEXT:    retl
3298  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
3299  ret void
3300}
3301declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
3302
3303define void @sfence() nounwind {
3304; CHECK-LABEL: sfence:
3305; CHECK:       # BB#0:
3306; CHECK-NEXT:    sfence
3307; CHECK-NEXT:    retl
3308  tail call void @llvm.x86.sse.sfence()
3309  ret void
3310}
3311declare void @llvm.x86.sse.sfence() nounwind
3312
3313define void @lfence() nounwind {
3314; CHECK-LABEL: lfence:
3315; CHECK:       # BB#0:
3316; CHECK-NEXT:    lfence
3317; CHECK-NEXT:    retl
3318  tail call void @llvm.x86.sse2.lfence()
3319  ret void
3320}
3321declare void @llvm.x86.sse2.lfence() nounwind
3322
3323define void @mfence() nounwind {
3324; CHECK-LABEL: mfence:
3325; CHECK:       # BB#0:
3326; CHECK-NEXT:    mfence
3327; CHECK-NEXT:    retl
3328  tail call void @llvm.x86.sse2.mfence()
3329  ret void
3330}
3331declare void @llvm.x86.sse2.mfence() nounwind
3332
3333define void @clflush(i8* %p) nounwind {
3334; CHECK-LABEL: clflush:
3335; CHECK:       # BB#0:
3336; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3337; CHECK-NEXT:    clflush (%eax)
3338; CHECK-NEXT:    retl
3339  tail call void @llvm.x86.sse2.clflush(i8* %p)
3340  ret void
3341}
3342declare void @llvm.x86.sse2.clflush(i8*) nounwind
3343
3344define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
3345; CHECK-LABEL: crc32_32_8:
3346; CHECK:       # BB#0:
3347; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3348; CHECK-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
3349; CHECK-NEXT:    retl
3350  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
3351  ret i32 %tmp
3352}
3353declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
3354
3355define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
3356; CHECK-LABEL: crc32_32_16:
3357; CHECK:       # BB#0:
3358; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3359; CHECK-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
3360; CHECK-NEXT:    retl
3361  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
3362  ret i32 %tmp
3363}
3364declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
3365
3366define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
3367; CHECK-LABEL: crc32_32_32:
3368; CHECK:       # BB#0:
3369; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3370; CHECK-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
3371; CHECK-NEXT:    retl
3372  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
3373  ret i32 %tmp
3374}
3375declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
3376
3377define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
3378; CHECK-LABEL: movnt_dq:
3379; CHECK:       # BB#0:
3380; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3381; CHECK-NEXT:    vpaddq LCPI282_0, %xmm0, %xmm0
3382; CHECK-NEXT:    vmovntdq %ymm0, (%eax)
3383; CHECK-NEXT:    vzeroupper
3384; CHECK-NEXT:    retl
3385  %a2 = add <2 x i64> %a1, <i64 1, i64 1>
3386  %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
3387  tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
3388  ret void
3389}
3390declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
3391
3392define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
3393; CHECK-LABEL: movnt_ps:
3394; CHECK:       # BB#0:
3395; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3396; CHECK-NEXT:    vmovntps %ymm0, (%eax)
3397; CHECK-NEXT:    vzeroupper
3398; CHECK-NEXT:    retl
3399  tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
3400  ret void
3401}
3402declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
3403
3404define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
3405  ; add operation forces the execution domain.
3406; CHECK-LABEL: movnt_pd:
3407; CHECK:       # BB#0:
3408; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
3409; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
3410; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
3411; CHECK-NEXT:    vmovntpd %ymm0, (%eax)
3412; CHECK-NEXT:    vzeroupper
3413; CHECK-NEXT:    retl
3414  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
3415  tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
3416  ret void
3417}
3418declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
3419
3420
3421; Check for pclmulqdq
3422define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
3423; CHECK-LABEL: test_x86_pclmulqdq:
3424; CHECK:       # BB#0:
3425; CHECK-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0
3426; CHECK-NEXT:    retl
3427  %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
3428  ret <2 x i64> %res
3429}
3430declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
3431