1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
4
5define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
6; SSE-LABEL: test_x86_sse2_add_sd:
7; SSE:       ## BB#0:
8; SSE-NEXT:    addsd %xmm1, %xmm0
9; SSE-NEXT:    retl
10;
11; KNL-LABEL: test_x86_sse2_add_sd:
12; KNL:       ## BB#0:
13; KNL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
14; KNL-NEXT:    retl
15  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
16  ret <2 x double> %res
17}
18declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
19
20
21define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
22; SSE-LABEL: test_x86_sse2_cmp_pd:
23; SSE:       ## BB#0:
24; SSE-NEXT:    cmpordpd %xmm1, %xmm0
25; SSE-NEXT:    retl
26;
27; KNL-LABEL: test_x86_sse2_cmp_pd:
28; KNL:       ## BB#0:
29; KNL-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
30; KNL-NEXT:    retl
31  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
32  ret <2 x double> %res
33}
34declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
35
36
37define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
38; SSE-LABEL: test_x86_sse2_cmp_sd:
39; SSE:       ## BB#0:
40; SSE-NEXT:    cmpordsd %xmm1, %xmm0
41; SSE-NEXT:    retl
42;
43; KNL-LABEL: test_x86_sse2_cmp_sd:
44; KNL:       ## BB#0:
45; KNL-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
46; KNL-NEXT:    retl
47  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
48  ret <2 x double> %res
49}
50declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
51
52
53define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
54; SSE-LABEL: test_x86_sse2_comieq_sd:
55; SSE:       ## BB#0:
56; SSE-NEXT:    comisd %xmm1, %xmm0
57; SSE-NEXT:    setnp %al
58; SSE-NEXT:    sete %cl
59; SSE-NEXT:    andb %al, %cl
60; SSE-NEXT:    movzbl %cl, %eax
61; SSE-NEXT:    retl
62;
63; KNL-LABEL: test_x86_sse2_comieq_sd:
64; KNL:       ## BB#0:
65; KNL-NEXT:    vcomisd %xmm1, %xmm0
66; KNL-NEXT:    setnp %al
67; KNL-NEXT:    sete %cl
68; KNL-NEXT:    andb %al, %cl
69; KNL-NEXT:    movzbl %cl, %eax
70; KNL-NEXT:    retl
71  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
72  ret i32 %res
73}
74declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
75
76
77define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
78; SSE-LABEL: test_x86_sse2_comige_sd:
79; SSE:       ## BB#0:
80; SSE-NEXT:    xorl %eax, %eax
81; SSE-NEXT:    comisd %xmm1, %xmm0
82; SSE-NEXT:    setae %al
83; SSE-NEXT:    retl
84;
85; KNL-LABEL: test_x86_sse2_comige_sd:
86; KNL:       ## BB#0:
87; KNL-NEXT:    xorl %eax, %eax
88; KNL-NEXT:    vcomisd %xmm1, %xmm0
89; KNL-NEXT:    setae %al
90; KNL-NEXT:    retl
91  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
92  ret i32 %res
93}
94declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
95
96
97define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
98; SSE-LABEL: test_x86_sse2_comigt_sd:
99; SSE:       ## BB#0:
100; SSE-NEXT:    xorl %eax, %eax
101; SSE-NEXT:    comisd %xmm1, %xmm0
102; SSE-NEXT:    seta %al
103; SSE-NEXT:    retl
104;
105; KNL-LABEL: test_x86_sse2_comigt_sd:
106; KNL:       ## BB#0:
107; KNL-NEXT:    xorl %eax, %eax
108; KNL-NEXT:    vcomisd %xmm1, %xmm0
109; KNL-NEXT:    seta %al
110; KNL-NEXT:    retl
111  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
112  ret i32 %res
113}
114declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
115
116
117define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
118; SSE-LABEL: test_x86_sse2_comile_sd:
119; SSE:       ## BB#0:
120; SSE-NEXT:    xorl %eax, %eax
121; SSE-NEXT:    comisd %xmm0, %xmm1
122; SSE-NEXT:    setae %al
123; SSE-NEXT:    retl
124;
125; KNL-LABEL: test_x86_sse2_comile_sd:
126; KNL:       ## BB#0:
127; KNL-NEXT:    xorl %eax, %eax
128; KNL-NEXT:    vcomisd %xmm0, %xmm1
129; KNL-NEXT:    setae %al
130; KNL-NEXT:    retl
131  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
132  ret i32 %res
133}
134declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
135
136
137define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
138; SSE-LABEL: test_x86_sse2_comilt_sd:
139; SSE:       ## BB#0:
140; SSE-NEXT:    xorl %eax, %eax
141; SSE-NEXT:    comisd %xmm0, %xmm1
142; SSE-NEXT:    seta %al
143; SSE-NEXT:    retl
144;
145; KNL-LABEL: test_x86_sse2_comilt_sd:
146; KNL:       ## BB#0:
147; KNL-NEXT:    xorl %eax, %eax
148; KNL-NEXT:    vcomisd %xmm0, %xmm1
149; KNL-NEXT:    seta %al
150; KNL-NEXT:    retl
151  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
152  ret i32 %res
153}
154declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
155
156
157define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
158; SSE-LABEL: test_x86_sse2_comineq_sd:
159; SSE:       ## BB#0:
160; SSE-NEXT:    comisd %xmm1, %xmm0
161; SSE-NEXT:    setp %al
162; SSE-NEXT:    setne %cl
163; SSE-NEXT:    orb %al, %cl
164; SSE-NEXT:    movzbl %cl, %eax
165; SSE-NEXT:    retl
166;
167; KNL-LABEL: test_x86_sse2_comineq_sd:
168; KNL:       ## BB#0:
169; KNL-NEXT:    vcomisd %xmm1, %xmm0
170; KNL-NEXT:    setp %al
171; KNL-NEXT:    setne %cl
172; KNL-NEXT:    orb %al, %cl
173; KNL-NEXT:    movzbl %cl, %eax
174; KNL-NEXT:    retl
175  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
176  ret i32 %res
177}
178declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
179
180
181define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
182; SSE-LABEL: test_x86_sse2_cvtdq2ps:
183; SSE:       ## BB#0:
184; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
185; SSE-NEXT:    retl
186;
187; KNL-LABEL: test_x86_sse2_cvtdq2ps:
188; KNL:       ## BB#0:
189; KNL-NEXT:    vcvtdq2ps %xmm0, %xmm0
190; KNL-NEXT:    retl
191  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
192  ret <4 x float> %res
193}
194declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
195
196
197define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
198; SSE-LABEL: test_x86_sse2_cvtpd2dq:
199; SSE:       ## BB#0:
200; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0
201; SSE-NEXT:    retl
202;
203; KNL-LABEL: test_x86_sse2_cvtpd2dq:
204; KNL:       ## BB#0:
205; KNL-NEXT:    vcvtpd2dq %xmm0, %xmm0
206; KNL-NEXT:    retl
207  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
208  ret <4 x i32> %res
209}
210declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
211
212
213define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
214; SSE-LABEL: test_x86_sse2_cvtpd2ps:
215; SSE:       ## BB#0:
216; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0
217; SSE-NEXT:    retl
218;
219; KNL-LABEL: test_x86_sse2_cvtpd2ps:
220; KNL:       ## BB#0:
221; KNL-NEXT:    vcvtpd2ps %xmm0, %xmm0
222; KNL-NEXT:    retl
223  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
224  ret <4 x float> %res
225}
226declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
227
228
229define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
230; SSE-LABEL: test_x86_sse2_cvtps2dq:
231; SSE:       ## BB#0:
232; SSE-NEXT:    cvtps2dq %xmm0, %xmm0
233; SSE-NEXT:    retl
234;
235; KNL-LABEL: test_x86_sse2_cvtps2dq:
236; KNL:       ## BB#0:
237; KNL-NEXT:    vcvtps2dq %xmm0, %xmm0
238; KNL-NEXT:    retl
239  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
240  ret <4 x i32> %res
241}
242declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
243
244
245define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
246; SSE-LABEL: test_x86_sse2_cvtsd2si:
247; SSE:       ## BB#0:
248; SSE-NEXT:    cvtsd2si %xmm0, %eax
249; SSE-NEXT:    retl
250;
251; KNL-LABEL: test_x86_sse2_cvtsd2si:
252; KNL:       ## BB#0:
253; KNL-NEXT:    vcvtsd2si %xmm0, %eax
254; KNL-NEXT:    retl
255  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
256  ret i32 %res
257}
258declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
259
260
261define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
262; SSE-LABEL: test_x86_sse2_cvtsd2ss:
263; SSE:       ## BB#0:
264; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0
265; SSE-NEXT:    retl
266;
267; KNL-LABEL: test_x86_sse2_cvtsd2ss:
268; KNL:       ## BB#0:
269; KNL-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
270; KNL-NEXT:    retl
271  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
272  ret <4 x float> %res
273}
274declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
275
276
277define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
278; SSE-LABEL: test_x86_sse2_cvtsi2sd:
279; SSE:       ## BB#0:
280; SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
281; SSE-NEXT:    retl
282;
283; KNL-LABEL: test_x86_sse2_cvtsi2sd:
284; KNL:       ## BB#0:
285; KNL-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0
286; KNL-NEXT:    retl
287  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1]
288  ret <2 x double> %res
289}
290declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
291
292
293define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
294; SSE-LABEL: test_x86_sse2_cvtss2sd:
295; SSE:       ## BB#0:
296; SSE-NEXT:    cvtss2sd %xmm1, %xmm0
297; SSE-NEXT:    retl
298;
299; KNL-LABEL: test_x86_sse2_cvtss2sd:
300; KNL:       ## BB#0:
301; KNL-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
302; KNL-NEXT:    retl
303  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
304  ret <2 x double> %res
305}
306declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
307
308
309define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
310; SSE-LABEL: test_x86_sse2_cvttpd2dq:
311; SSE:       ## BB#0:
312; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
313; SSE-NEXT:    retl
314;
315; KNL-LABEL: test_x86_sse2_cvttpd2dq:
316; KNL:       ## BB#0:
317; KNL-NEXT:    vcvttpd2dq %xmm0, %xmm0
318; KNL-NEXT:    retl
319  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
320  ret <4 x i32> %res
321}
322declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
323
324
325define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
326; SSE-LABEL: test_x86_sse2_cvttsd2si:
327; SSE:       ## BB#0:
328; SSE-NEXT:    cvttsd2si %xmm0, %eax
329; SSE-NEXT:    retl
330;
331; KNL-LABEL: test_x86_sse2_cvttsd2si:
332; KNL:       ## BB#0:
333; KNL-NEXT:    vcvttsd2si %xmm0, %eax
334; KNL-NEXT:    retl
335  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
336  ret i32 %res
337}
338declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
339
340
341define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
342; SSE-LABEL: test_x86_sse2_div_sd:
343; SSE:       ## BB#0:
344; SSE-NEXT:    divsd %xmm1, %xmm0
345; SSE-NEXT:    retl
346;
347; KNL-LABEL: test_x86_sse2_div_sd:
348; KNL:       ## BB#0:
349; KNL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
350; KNL-NEXT:    retl
351  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
352  ret <2 x double> %res
353}
354declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
355
356
357
358define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
359; SSE-LABEL: test_x86_sse2_max_pd:
360; SSE:       ## BB#0:
361; SSE-NEXT:    maxpd %xmm1, %xmm0
362; SSE-NEXT:    retl
363;
364; KNL-LABEL: test_x86_sse2_max_pd:
365; KNL:       ## BB#0:
366; KNL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
367; KNL-NEXT:    retl
368  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
369  ret <2 x double> %res
370}
371declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
372
373
374define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
375; SSE-LABEL: test_x86_sse2_max_sd:
376; SSE:       ## BB#0:
377; SSE-NEXT:    maxsd %xmm1, %xmm0
378; SSE-NEXT:    retl
379;
380; KNL-LABEL: test_x86_sse2_max_sd:
381; KNL:       ## BB#0:
382; KNL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
383; KNL-NEXT:    retl
384  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
385  ret <2 x double> %res
386}
387declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
388
389
390define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
391; SSE-LABEL: test_x86_sse2_min_pd:
392; SSE:       ## BB#0:
393; SSE-NEXT:    minpd %xmm1, %xmm0
394; SSE-NEXT:    retl
395;
396; KNL-LABEL: test_x86_sse2_min_pd:
397; KNL:       ## BB#0:
398; KNL-NEXT:    vminpd %xmm1, %xmm0, %xmm0
399; KNL-NEXT:    retl
400  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
401  ret <2 x double> %res
402}
403declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
404
405
406define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
407; SSE-LABEL: test_x86_sse2_min_sd:
408; SSE:       ## BB#0:
409; SSE-NEXT:    minsd %xmm1, %xmm0
410; SSE-NEXT:    retl
411;
412; KNL-LABEL: test_x86_sse2_min_sd:
413; KNL:       ## BB#0:
414; KNL-NEXT:    vminsd %xmm1, %xmm0, %xmm0
415; KNL-NEXT:    retl
416  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
417  ret <2 x double> %res
418}
419declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
420
421
422define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
423; SSE-LABEL: test_x86_sse2_movmsk_pd:
424; SSE:       ## BB#0:
425; SSE-NEXT:    movmskpd %xmm0, %eax
426; SSE-NEXT:    retl
427;
428; KNL-LABEL: test_x86_sse2_movmsk_pd:
429; KNL:       ## BB#0:
430; KNL-NEXT:    vmovmskpd %xmm0, %eax
431; KNL-NEXT:    retl
432  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
433  ret i32 %res
434}
435declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
436
437
438
439
440define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
441; SSE-LABEL: test_x86_sse2_mul_sd:
442; SSE:       ## BB#0:
443; SSE-NEXT:    mulsd %xmm1, %xmm0
444; SSE-NEXT:    retl
445;
446; KNL-LABEL: test_x86_sse2_mul_sd:
447; KNL:       ## BB#0:
448; KNL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
449; KNL-NEXT:    retl
450  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
451  ret <2 x double> %res
452}
453declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
454
455
456define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
457; SSE-LABEL: test_x86_sse2_packssdw_128:
458; SSE:       ## BB#0:
459; SSE-NEXT:    packssdw %xmm1, %xmm0
460; SSE-NEXT:    retl
461;
462; KNL-LABEL: test_x86_sse2_packssdw_128:
463; KNL:       ## BB#0:
464; KNL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
465; KNL-NEXT:    retl
466  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
467  ret <8 x i16> %res
468}
469declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
470
471
472define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
473; SSE-LABEL: test_x86_sse2_packsswb_128:
474; SSE:       ## BB#0:
475; SSE-NEXT:    packsswb %xmm1, %xmm0
476; SSE-NEXT:    retl
477;
478; KNL-LABEL: test_x86_sse2_packsswb_128:
479; KNL:       ## BB#0:
480; KNL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
481; KNL-NEXT:    retl
482  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
483  ret <16 x i8> %res
484}
485declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
486
487
488define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
489; SSE-LABEL: test_x86_sse2_packuswb_128:
490; SSE:       ## BB#0:
491; SSE-NEXT:    packuswb %xmm1, %xmm0
492; SSE-NEXT:    retl
493;
494; KNL-LABEL: test_x86_sse2_packuswb_128:
495; KNL:       ## BB#0:
496; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
497; KNL-NEXT:    retl
498  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
499  ret <16 x i8> %res
500}
501declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
502
503
504define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
505; SSE-LABEL: test_x86_sse2_padds_b:
506; SSE:       ## BB#0:
507; SSE-NEXT:    paddsb %xmm1, %xmm0
508; SSE-NEXT:    retl
509;
510; KNL-LABEL: test_x86_sse2_padds_b:
511; KNL:       ## BB#0:
512; KNL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
513; KNL-NEXT:    retl
514  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
515  ret <16 x i8> %res
516}
517declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
518
519
520define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
521; SSE-LABEL: test_x86_sse2_padds_w:
522; SSE:       ## BB#0:
523; SSE-NEXT:    paddsw %xmm1, %xmm0
524; SSE-NEXT:    retl
525;
526; KNL-LABEL: test_x86_sse2_padds_w:
527; KNL:       ## BB#0:
528; KNL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
529; KNL-NEXT:    retl
530  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
531  ret <8 x i16> %res
532}
533declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
534
535
536define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
537; SSE-LABEL: test_x86_sse2_paddus_b:
538; SSE:       ## BB#0:
539; SSE-NEXT:    paddusb %xmm1, %xmm0
540; SSE-NEXT:    retl
541;
542; KNL-LABEL: test_x86_sse2_paddus_b:
543; KNL:       ## BB#0:
544; KNL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
545; KNL-NEXT:    retl
546  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
547  ret <16 x i8> %res
548}
549declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
550
551
552define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
553; SSE-LABEL: test_x86_sse2_paddus_w:
554; SSE:       ## BB#0:
555; SSE-NEXT:    paddusw %xmm1, %xmm0
556; SSE-NEXT:    retl
557;
558; KNL-LABEL: test_x86_sse2_paddus_w:
559; KNL:       ## BB#0:
560; KNL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
561; KNL-NEXT:    retl
562  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
563  ret <8 x i16> %res
564}
565declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
566
567
568define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
569; SSE-LABEL: test_x86_sse2_pavg_b:
570; SSE:       ## BB#0:
571; SSE-NEXT:    pavgb %xmm1, %xmm0
572; SSE-NEXT:    retl
573;
574; KNL-LABEL: test_x86_sse2_pavg_b:
575; KNL:       ## BB#0:
576; KNL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
577; KNL-NEXT:    retl
578  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
579  ret <16 x i8> %res
580}
581declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
582
583
584define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
585; SSE-LABEL: test_x86_sse2_pavg_w:
586; SSE:       ## BB#0:
587; SSE-NEXT:    pavgw %xmm1, %xmm0
588; SSE-NEXT:    retl
589;
590; KNL-LABEL: test_x86_sse2_pavg_w:
591; KNL:       ## BB#0:
592; KNL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
593; KNL-NEXT:    retl
594  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
595  ret <8 x i16> %res
596}
597declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
598
599
600define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
601; SSE-LABEL: test_x86_sse2_pmadd_wd:
602; SSE:       ## BB#0:
603; SSE-NEXT:    pmaddwd %xmm1, %xmm0
604; SSE-NEXT:    retl
605;
606; KNL-LABEL: test_x86_sse2_pmadd_wd:
607; KNL:       ## BB#0:
608; KNL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
609; KNL-NEXT:    retl
610  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
611  ret <4 x i32> %res
612}
613declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
614
615
616define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
617; SSE-LABEL: test_x86_sse2_pmaxs_w:
618; SSE:       ## BB#0:
619; SSE-NEXT:    pmaxsw %xmm1, %xmm0
620; SSE-NEXT:    retl
621;
622; KNL-LABEL: test_x86_sse2_pmaxs_w:
623; KNL:       ## BB#0:
624; KNL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
625; KNL-NEXT:    retl
626  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
627  ret <8 x i16> %res
628}
629declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
630
631
632define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
633; SSE-LABEL: test_x86_sse2_pmaxu_b:
634; SSE:       ## BB#0:
635; SSE-NEXT:    pmaxub %xmm1, %xmm0
636; SSE-NEXT:    retl
637;
638; KNL-LABEL: test_x86_sse2_pmaxu_b:
639; KNL:       ## BB#0:
640; KNL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
641; KNL-NEXT:    retl
642  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
643  ret <16 x i8> %res
644}
645declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
646
647
648define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
649; SSE-LABEL: test_x86_sse2_pmins_w:
650; SSE:       ## BB#0:
651; SSE-NEXT:    pminsw %xmm1, %xmm0
652; SSE-NEXT:    retl
653;
654; KNL-LABEL: test_x86_sse2_pmins_w:
655; KNL:       ## BB#0:
656; KNL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
657; KNL-NEXT:    retl
658  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
659  ret <8 x i16> %res
660}
661declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
662
663
664define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
665; SSE-LABEL: test_x86_sse2_pminu_b:
666; SSE:       ## BB#0:
667; SSE-NEXT:    pminub %xmm1, %xmm0
668; SSE-NEXT:    retl
669;
670; KNL-LABEL: test_x86_sse2_pminu_b:
671; KNL:       ## BB#0:
672; KNL-NEXT:    vpminub %xmm1, %xmm0, %xmm0
673; KNL-NEXT:    retl
674  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
675  ret <16 x i8> %res
676}
677declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
678
679
680define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
681; SSE-LABEL: test_x86_sse2_pmovmskb_128:
682; SSE:       ## BB#0:
683; SSE-NEXT:    pmovmskb %xmm0, %eax
684; SSE-NEXT:    retl
685;
686; KNL-LABEL: test_x86_sse2_pmovmskb_128:
687; KNL:       ## BB#0:
688; KNL-NEXT:    vpmovmskb %xmm0, %eax
689; KNL-NEXT:    retl
690  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
691  ret i32 %res
692}
693declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
694
695
696define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
697; SSE-LABEL: test_x86_sse2_pmulh_w:
698; SSE:       ## BB#0:
699; SSE-NEXT:    pmulhw %xmm1, %xmm0
700; SSE-NEXT:    retl
701;
702; KNL-LABEL: test_x86_sse2_pmulh_w:
703; KNL:       ## BB#0:
704; KNL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
705; KNL-NEXT:    retl
706  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
707  ret <8 x i16> %res
708}
709declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
710
711
712define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
713; SSE-LABEL: test_x86_sse2_pmulhu_w:
714; SSE:       ## BB#0:
715; SSE-NEXT:    pmulhuw %xmm1, %xmm0
716; SSE-NEXT:    retl
717;
718; KNL-LABEL: test_x86_sse2_pmulhu_w:
719; KNL:       ## BB#0:
720; KNL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
721; KNL-NEXT:    retl
722  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
723  ret <8 x i16> %res
724}
725declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
726
727
728define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
729; SSE-LABEL: test_x86_sse2_pmulu_dq:
730; SSE:       ## BB#0:
731; SSE-NEXT:    pmuludq %xmm1, %xmm0
732; SSE-NEXT:    retl
733;
734; KNL-LABEL: test_x86_sse2_pmulu_dq:
735; KNL:       ## BB#0:
736; KNL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
737; KNL-NEXT:    retl
738  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
739  ret <2 x i64> %res
740}
741declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
742
743
744define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
745; SSE-LABEL: test_x86_sse2_psad_bw:
746; SSE:       ## BB#0:
747; SSE-NEXT:    psadbw %xmm1, %xmm0
748; SSE-NEXT:    retl
749;
750; KNL-LABEL: test_x86_sse2_psad_bw:
751; KNL:       ## BB#0:
752; KNL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
753; KNL-NEXT:    retl
754  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
755  ret <2 x i64> %res
756}
757declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
758
759
760define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
761; SSE-LABEL: test_x86_sse2_psll_d:
762; SSE:       ## BB#0:
763; SSE-NEXT:    pslld %xmm1, %xmm0
764; SSE-NEXT:    retl
765;
766; KNL-LABEL: test_x86_sse2_psll_d:
767; KNL:       ## BB#0:
768; KNL-NEXT:    vpslld %xmm1, %xmm0, %xmm0
769; KNL-NEXT:    retl
770  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
771  ret <4 x i32> %res
772}
773declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
774
775
776define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
777; SSE-LABEL: test_x86_sse2_psll_q:
778; SSE:       ## BB#0:
779; SSE-NEXT:    psllq %xmm1, %xmm0
780; SSE-NEXT:    retl
781;
782; KNL-LABEL: test_x86_sse2_psll_q:
783; KNL:       ## BB#0:
784; KNL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
785; KNL-NEXT:    retl
786  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
787  ret <2 x i64> %res
788}
789declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
790
791
792define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
793; SSE-LABEL: test_x86_sse2_psll_w:
794; SSE:       ## BB#0:
795; SSE-NEXT:    psllw %xmm1, %xmm0
796; SSE-NEXT:    retl
797;
798; KNL-LABEL: test_x86_sse2_psll_w:
799; KNL:       ## BB#0:
800; KNL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
801; KNL-NEXT:    retl
802  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
803  ret <8 x i16> %res
804}
805declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
806
807
808define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
809; SSE-LABEL: test_x86_sse2_pslli_d:
810; SSE:       ## BB#0:
811; SSE-NEXT:    pslld $7, %xmm0
812; SSE-NEXT:    retl
813;
814; KNL-LABEL: test_x86_sse2_pslli_d:
815; KNL:       ## BB#0:
816; KNL-NEXT:    vpslld $7, %xmm0, %xmm0
817; KNL-NEXT:    retl
818  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
819  ret <4 x i32> %res
820}
821declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
822
823
824define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
825; SSE-LABEL: test_x86_sse2_pslli_q:
826; SSE:       ## BB#0:
827; SSE-NEXT:    psllq $7, %xmm0
828; SSE-NEXT:    retl
829;
830; KNL-LABEL: test_x86_sse2_pslli_q:
831; KNL:       ## BB#0:
832; KNL-NEXT:    vpsllq $7, %xmm0, %xmm0
833; KNL-NEXT:    retl
834  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
835  ret <2 x i64> %res
836}
837declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
838
839
840define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
841; SSE-LABEL: test_x86_sse2_pslli_w:
842; SSE:       ## BB#0:
843; SSE-NEXT:    psllw $7, %xmm0
844; SSE-NEXT:    retl
845;
846; KNL-LABEL: test_x86_sse2_pslli_w:
847; KNL:       ## BB#0:
848; KNL-NEXT:    vpsllw $7, %xmm0, %xmm0
849; KNL-NEXT:    retl
850  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
851  ret <8 x i16> %res
852}
853declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
854
855
856define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
857; SSE-LABEL: test_x86_sse2_psra_d:
858; SSE:       ## BB#0:
859; SSE-NEXT:    psrad %xmm1, %xmm0
860; SSE-NEXT:    retl
861;
862; KNL-LABEL: test_x86_sse2_psra_d:
863; KNL:       ## BB#0:
864; KNL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
865; KNL-NEXT:    retl
866  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
867  ret <4 x i32> %res
868}
869declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
870
871
872define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
873; SSE-LABEL: test_x86_sse2_psra_w:
874; SSE:       ## BB#0:
875; SSE-NEXT:    psraw %xmm1, %xmm0
876; SSE-NEXT:    retl
877;
878; KNL-LABEL: test_x86_sse2_psra_w:
879; KNL:       ## BB#0:
880; KNL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
881; KNL-NEXT:    retl
882  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
883  ret <8 x i16> %res
884}
885declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
886
887
888define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
889; SSE-LABEL: test_x86_sse2_psrai_d:
890; SSE:       ## BB#0:
891; SSE-NEXT:    psrad $7, %xmm0
892; SSE-NEXT:    retl
893;
894; KNL-LABEL: test_x86_sse2_psrai_d:
895; KNL:       ## BB#0:
896; KNL-NEXT:    vpsrad $7, %xmm0, %xmm0
897; KNL-NEXT:    retl
898  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
899  ret <4 x i32> %res
900}
901declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
902
903
904define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
905; SSE-LABEL: test_x86_sse2_psrai_w:
906; SSE:       ## BB#0:
907; SSE-NEXT:    psraw $7, %xmm0
908; SSE-NEXT:    retl
909;
910; KNL-LABEL: test_x86_sse2_psrai_w:
911; KNL:       ## BB#0:
912; KNL-NEXT:    vpsraw $7, %xmm0, %xmm0
913; KNL-NEXT:    retl
914  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
915  ret <8 x i16> %res
916}
917declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
918
919
920define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
921; SSE-LABEL: test_x86_sse2_psrl_d:
922; SSE:       ## BB#0:
923; SSE-NEXT:    psrld %xmm1, %xmm0
924; SSE-NEXT:    retl
925;
926; KNL-LABEL: test_x86_sse2_psrl_d:
927; KNL:       ## BB#0:
928; KNL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
929; KNL-NEXT:    retl
930  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
931  ret <4 x i32> %res
932}
933declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
934
935
936define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
937; SSE-LABEL: test_x86_sse2_psrl_q:
938; SSE:       ## BB#0:
939; SSE-NEXT:    psrlq %xmm1, %xmm0
940; SSE-NEXT:    retl
941;
942; KNL-LABEL: test_x86_sse2_psrl_q:
943; KNL:       ## BB#0:
944; KNL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
945; KNL-NEXT:    retl
946  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
947  ret <2 x i64> %res
948}
949declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
950
951
952define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
953; SSE-LABEL: test_x86_sse2_psrl_w:
954; SSE:       ## BB#0:
955; SSE-NEXT:    psrlw %xmm1, %xmm0
956; SSE-NEXT:    retl
957;
958; KNL-LABEL: test_x86_sse2_psrl_w:
959; KNL:       ## BB#0:
960; KNL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
961; KNL-NEXT:    retl
962  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
963  ret <8 x i16> %res
964}
965declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
966
967
968define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
969; SSE-LABEL: test_x86_sse2_psrli_d:
970; SSE:       ## BB#0:
971; SSE-NEXT:    psrld $7, %xmm0
972; SSE-NEXT:    retl
973;
974; KNL-LABEL: test_x86_sse2_psrli_d:
975; KNL:       ## BB#0:
976; KNL-NEXT:    vpsrld $7, %xmm0, %xmm0
977; KNL-NEXT:    retl
978  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
979  ret <4 x i32> %res
980}
981declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
982
983
984define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
985; SSE-LABEL: test_x86_sse2_psrli_q:
986; SSE:       ## BB#0:
987; SSE-NEXT:    psrlq $7, %xmm0
988; SSE-NEXT:    retl
989;
990; KNL-LABEL: test_x86_sse2_psrli_q:
991; KNL:       ## BB#0:
992; KNL-NEXT:    vpsrlq $7, %xmm0, %xmm0
993; KNL-NEXT:    retl
994  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
995  ret <2 x i64> %res
996}
997declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
998
999
1000define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
1001; SSE-LABEL: test_x86_sse2_psrli_w:
1002; SSE:       ## BB#0:
1003; SSE-NEXT:    psrlw $7, %xmm0
1004; SSE-NEXT:    retl
1005;
1006; KNL-LABEL: test_x86_sse2_psrli_w:
1007; KNL:       ## BB#0:
1008; KNL-NEXT:    vpsrlw $7, %xmm0, %xmm0
1009; KNL-NEXT:    retl
1010  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1011  ret <8 x i16> %res
1012}
1013declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
1014
1015
1016define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
1017; SSE-LABEL: test_x86_sse2_psubs_b:
1018; SSE:       ## BB#0:
1019; SSE-NEXT:    psubsb %xmm1, %xmm0
1020; SSE-NEXT:    retl
1021;
1022; KNL-LABEL: test_x86_sse2_psubs_b:
1023; KNL:       ## BB#0:
1024; KNL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
1025; KNL-NEXT:    retl
1026  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1027  ret <16 x i8> %res
1028}
1029declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
1030
1031
1032define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
1033; SSE-LABEL: test_x86_sse2_psubs_w:
1034; SSE:       ## BB#0:
1035; SSE-NEXT:    psubsw %xmm1, %xmm0
1036; SSE-NEXT:    retl
1037;
1038; KNL-LABEL: test_x86_sse2_psubs_w:
1039; KNL:       ## BB#0:
1040; KNL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
1041; KNL-NEXT:    retl
1042  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1043  ret <8 x i16> %res
1044}
1045declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
1046
1047
1048define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
1049; SSE-LABEL: test_x86_sse2_psubus_b:
1050; SSE:       ## BB#0:
1051; SSE-NEXT:    psubusb %xmm1, %xmm0
1052; SSE-NEXT:    retl
1053;
1054; KNL-LABEL: test_x86_sse2_psubus_b:
1055; KNL:       ## BB#0:
1056; KNL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
1057; KNL-NEXT:    retl
1058  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
1059  ret <16 x i8> %res
1060}
1061declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
1062
1063
1064define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
1065; SSE-LABEL: test_x86_sse2_psubus_w:
1066; SSE:       ## BB#0:
1067; SSE-NEXT:    psubusw %xmm1, %xmm0
1068; SSE-NEXT:    retl
1069;
1070; KNL-LABEL: test_x86_sse2_psubus_w:
1071; KNL:       ## BB#0:
1072; KNL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
1073; KNL-NEXT:    retl
1074  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1075  ret <8 x i16> %res
1076}
1077declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
1078
1079
1080define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
1081; SSE-LABEL: test_x86_sse2_sqrt_pd:
1082; SSE:       ## BB#0:
1083; SSE-NEXT:    sqrtpd %xmm0, %xmm0
1084; SSE-NEXT:    retl
1085;
1086; KNL-LABEL: test_x86_sse2_sqrt_pd:
1087; KNL:       ## BB#0:
1088; KNL-NEXT:    vsqrtpd %xmm0, %xmm0
1089; KNL-NEXT:    retl
1090  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
1091  ret <2 x double> %res
1092}
1093declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
1094
1095
1096define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
1097; SSE-LABEL: test_x86_sse2_sqrt_sd:
1098; SSE:       ## BB#0:
1099; SSE-NEXT:    sqrtsd %xmm0, %xmm0
1100; SSE-NEXT:    retl
1101;
1102; KNL-LABEL: test_x86_sse2_sqrt_sd:
1103; KNL:       ## BB#0:
1104; KNL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
1105; KNL-NEXT:    retl
1106  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
1107  ret <2 x double> %res
1108}
1109declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
1110
1111
1112define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
1113; SSE-LABEL: test_x86_sse2_sub_sd:
1114; SSE:       ## BB#0:
1115; SSE-NEXT:    subsd %xmm1, %xmm0
1116; SSE-NEXT:    retl
1117;
1118; KNL-LABEL: test_x86_sse2_sub_sd:
1119; KNL:       ## BB#0:
1120; KNL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
1121; KNL-NEXT:    retl
1122  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
1123  ret <2 x double> %res
1124}
1125declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
1126
1127
1128define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
1129; SSE-LABEL: test_x86_sse2_ucomieq_sd:
1130; SSE:       ## BB#0:
1131; SSE-NEXT:    ucomisd %xmm1, %xmm0
1132; SSE-NEXT:    setnp %al
1133; SSE-NEXT:    sete %cl
1134; SSE-NEXT:    andb %al, %cl
1135; SSE-NEXT:    movzbl %cl, %eax
1136; SSE-NEXT:    retl
1137;
1138; KNL-LABEL: test_x86_sse2_ucomieq_sd:
1139; KNL:       ## BB#0:
1140; KNL-NEXT:    vucomisd %xmm1, %xmm0
1141; KNL-NEXT:    setnp %al
1142; KNL-NEXT:    sete %cl
1143; KNL-NEXT:    andb %al, %cl
1144; KNL-NEXT:    movzbl %cl, %eax
1145; KNL-NEXT:    retl
1146  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1147  ret i32 %res
1148}
1149declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1150
1151
1152define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
1153; SSE-LABEL: test_x86_sse2_ucomige_sd:
1154; SSE:       ## BB#0:
1155; SSE-NEXT:    xorl %eax, %eax
1156; SSE-NEXT:    ucomisd %xmm1, %xmm0
1157; SSE-NEXT:    setae %al
1158; SSE-NEXT:    retl
1159;
1160; KNL-LABEL: test_x86_sse2_ucomige_sd:
1161; KNL:       ## BB#0:
1162; KNL-NEXT:    xorl %eax, %eax
1163; KNL-NEXT:    vucomisd %xmm1, %xmm0
1164; KNL-NEXT:    setae %al
1165; KNL-NEXT:    retl
1166  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1167  ret i32 %res
1168}
1169declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
1170
1171
1172define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
1173; SSE-LABEL: test_x86_sse2_ucomigt_sd:
1174; SSE:       ## BB#0:
1175; SSE-NEXT:    xorl %eax, %eax
1176; SSE-NEXT:    ucomisd %xmm1, %xmm0
1177; SSE-NEXT:    seta %al
1178; SSE-NEXT:    retl
1179;
1180; KNL-LABEL: test_x86_sse2_ucomigt_sd:
1181; KNL:       ## BB#0:
1182; KNL-NEXT:    xorl %eax, %eax
1183; KNL-NEXT:    vucomisd %xmm1, %xmm0
1184; KNL-NEXT:    seta %al
1185; KNL-NEXT:    retl
1186  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1187  ret i32 %res
1188}
1189declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
1190
1191
1192define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
1193; SSE-LABEL: test_x86_sse2_ucomile_sd:
1194; SSE:       ## BB#0:
1195; SSE-NEXT:    xorl %eax, %eax
1196; SSE-NEXT:    ucomisd %xmm0, %xmm1
1197; SSE-NEXT:    setae %al
1198; SSE-NEXT:    retl
1199;
1200; KNL-LABEL: test_x86_sse2_ucomile_sd:
1201; KNL:       ## BB#0:
1202; KNL-NEXT:    xorl %eax, %eax
1203; KNL-NEXT:    vucomisd %xmm0, %xmm1
1204; KNL-NEXT:    setae %al
1205; KNL-NEXT:    retl
1206  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1207  ret i32 %res
1208}
1209declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
1210
1211
1212define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
1213; SSE-LABEL: test_x86_sse2_ucomilt_sd:
1214; SSE:       ## BB#0:
1215; SSE-NEXT:    xorl %eax, %eax
1216; SSE-NEXT:    ucomisd %xmm0, %xmm1
1217; SSE-NEXT:    seta %al
1218; SSE-NEXT:    retl
1219;
1220; KNL-LABEL: test_x86_sse2_ucomilt_sd:
1221; KNL:       ## BB#0:
1222; KNL-NEXT:    xorl %eax, %eax
1223; KNL-NEXT:    vucomisd %xmm0, %xmm1
1224; KNL-NEXT:    seta %al
1225; KNL-NEXT:    retl
1226  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1227  ret i32 %res
1228}
1229declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
1230
1231
1232define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
1233; SSE-LABEL: test_x86_sse2_ucomineq_sd:
1234; SSE:       ## BB#0:
1235; SSE-NEXT:    ucomisd %xmm1, %xmm0
1236; SSE-NEXT:    setp %al
1237; SSE-NEXT:    setne %cl
1238; SSE-NEXT:    orb %al, %cl
1239; SSE-NEXT:    movzbl %cl, %eax
1240; SSE-NEXT:    retl
1241;
1242; KNL-LABEL: test_x86_sse2_ucomineq_sd:
1243; KNL:       ## BB#0:
1244; KNL-NEXT:    vucomisd %xmm1, %xmm0
1245; KNL-NEXT:    setp %al
1246; KNL-NEXT:    setne %cl
1247; KNL-NEXT:    orb %al, %cl
1248; KNL-NEXT:    movzbl %cl, %eax
1249; KNL-NEXT:    retl
1250  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1251  ret i32 %res
1252}
1253declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
1254
1255define void @test_x86_sse2_pause() {
1256; SSE-LABEL: test_x86_sse2_pause:
1257; SSE:       ## BB#0:
1258; SSE-NEXT:    pause
1259; SSE-NEXT:    retl
1260;
1261; KNL-LABEL: test_x86_sse2_pause:
1262; KNL:       ## BB#0:
1263; KNL-NEXT:    pause
1264; KNL-NEXT:    retl
1265  tail call void @llvm.x86.sse2.pause()
1266  ret void
1267}
1268declare void @llvm.x86.sse2.pause() nounwind
1269