1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
4
5define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
6; SSE-LABEL: test_x86_sse_add_ss:
7; SSE:       ## BB#0:
8; SSE-NEXT:    addss %xmm1, %xmm0
9; SSE-NEXT:    retl
10;
11; KNL-LABEL: test_x86_sse_add_ss:
12; KNL:       ## BB#0:
13; KNL-NEXT:    vaddss %xmm1, %xmm0, %xmm0
14; KNL-NEXT:    retl
15  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
16  ret <4 x float> %res
17}
18declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
19
20
21define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
22; SSE-LABEL: test_x86_sse_cmp_ps:
23; SSE:       ## BB#0:
24; SSE-NEXT:    cmpordps %xmm1, %xmm0
25; SSE-NEXT:    retl
26;
27; KNL-LABEL: test_x86_sse_cmp_ps:
28; KNL:       ## BB#0:
29; KNL-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0
30; KNL-NEXT:    retl
31  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
32  ret <4 x float> %res
33}
34declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
35
36
37define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
38; SSE-LABEL: test_x86_sse_cmp_ss:
39; SSE:       ## BB#0:
40; SSE-NEXT:    cmpordss %xmm1, %xmm0
41; SSE-NEXT:    retl
42;
43; KNL-LABEL: test_x86_sse_cmp_ss:
44; KNL:       ## BB#0:
45; KNL-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0
46; KNL-NEXT:    retl
47  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
48  ret <4 x float> %res
49}
50declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
51
52
53define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
54; SSE-LABEL: test_x86_sse_comieq_ss:
55; SSE:       ## BB#0:
56; SSE-NEXT:    comiss %xmm1, %xmm0
57; SSE-NEXT:    setnp %al
58; SSE-NEXT:    sete %cl
59; SSE-NEXT:    andb %al, %cl
60; SSE-NEXT:    movzbl %cl, %eax
61; SSE-NEXT:    retl
62;
63; KNL-LABEL: test_x86_sse_comieq_ss:
64; KNL:       ## BB#0:
65; KNL-NEXT:    vcomiss %xmm1, %xmm0
66; KNL-NEXT:    setnp %al
67; KNL-NEXT:    sete %cl
68; KNL-NEXT:    andb %al, %cl
69; KNL-NEXT:    movzbl %cl, %eax
70; KNL-NEXT:    retl
71  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
72  ret i32 %res
73}
74declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
75
76
77define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
78; SSE-LABEL: test_x86_sse_comige_ss:
79; SSE:       ## BB#0:
80; SSE-NEXT:    xorl %eax, %eax
81; SSE-NEXT:    comiss %xmm1, %xmm0
82; SSE-NEXT:    setae %al
83; SSE-NEXT:    retl
84;
85; KNL-LABEL: test_x86_sse_comige_ss:
86; KNL:       ## BB#0:
87; KNL-NEXT:    xorl %eax, %eax
88; KNL-NEXT:    vcomiss %xmm1, %xmm0
89; KNL-NEXT:    setae %al
90; KNL-NEXT:    retl
91  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
92  ret i32 %res
93}
94declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
95
96
97define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
98; SSE-LABEL: test_x86_sse_comigt_ss:
99; SSE:       ## BB#0:
100; SSE-NEXT:    xorl %eax, %eax
101; SSE-NEXT:    comiss %xmm1, %xmm0
102; SSE-NEXT:    seta %al
103; SSE-NEXT:    retl
104;
105; KNL-LABEL: test_x86_sse_comigt_ss:
106; KNL:       ## BB#0:
107; KNL-NEXT:    xorl %eax, %eax
108; KNL-NEXT:    vcomiss %xmm1, %xmm0
109; KNL-NEXT:    seta %al
110; KNL-NEXT:    retl
111  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
112  ret i32 %res
113}
114declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
115
116
117define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
118; SSE-LABEL: test_x86_sse_comile_ss:
119; SSE:       ## BB#0:
120; SSE-NEXT:    xorl %eax, %eax
121; SSE-NEXT:    comiss %xmm0, %xmm1
122; SSE-NEXT:    setae %al
123; SSE-NEXT:    retl
124;
125; KNL-LABEL: test_x86_sse_comile_ss:
126; KNL:       ## BB#0:
127; KNL-NEXT:    xorl %eax, %eax
128; KNL-NEXT:    vcomiss %xmm0, %xmm1
129; KNL-NEXT:    setae %al
130; KNL-NEXT:    retl
131  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
132  ret i32 %res
133}
134declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
135
136
137define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
138; SSE-LABEL: test_x86_sse_comilt_ss:
139; SSE:       ## BB#0:
140; SSE-NEXT:    xorl %eax, %eax
141; SSE-NEXT:    comiss %xmm0, %xmm1
142; SSE-NEXT:    seta %al
143; SSE-NEXT:    retl
144;
145; KNL-LABEL: test_x86_sse_comilt_ss:
146; KNL:       ## BB#0:
147; KNL-NEXT:    xorl %eax, %eax
148; KNL-NEXT:    vcomiss %xmm0, %xmm1
149; KNL-NEXT:    seta %al
150; KNL-NEXT:    retl
151  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
152  ret i32 %res
153}
154declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
155
156
157define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
158; SSE-LABEL: test_x86_sse_comineq_ss:
159; SSE:       ## BB#0:
160; SSE-NEXT:    comiss %xmm1, %xmm0
161; SSE-NEXT:    setp %al
162; SSE-NEXT:    setne %cl
163; SSE-NEXT:    orb %al, %cl
164; SSE-NEXT:    movzbl %cl, %eax
165; SSE-NEXT:    retl
166;
167; KNL-LABEL: test_x86_sse_comineq_ss:
168; KNL:       ## BB#0:
169; KNL-NEXT:    vcomiss %xmm1, %xmm0
170; KNL-NEXT:    setp %al
171; KNL-NEXT:    setne %cl
172; KNL-NEXT:    orb %al, %cl
173; KNL-NEXT:    movzbl %cl, %eax
174; KNL-NEXT:    retl
175  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
176  ret i32 %res
177}
178declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
179
180
181define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
182; SSE-LABEL: test_x86_sse_cvtsi2ss:
183; SSE:       ## BB#0:
184; SSE-NEXT:    movl $7, %eax
185; SSE-NEXT:    cvtsi2ssl %eax, %xmm0
186; SSE-NEXT:    retl
187;
188; KNL-LABEL: test_x86_sse_cvtsi2ss:
189; KNL:       ## BB#0:
190; KNL-NEXT:    movl $7, %eax
191; KNL-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
192; KNL-NEXT:    retl
193  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
194  ret <4 x float> %res
195}
196declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
197
198
199define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
200; SSE-LABEL: test_x86_sse_cvtss2si:
201; SSE:       ## BB#0:
202; SSE-NEXT:    cvtss2si %xmm0, %eax
203; SSE-NEXT:    retl
204;
205; KNL-LABEL: test_x86_sse_cvtss2si:
206; KNL:       ## BB#0:
207; KNL-NEXT:    vcvtss2si %xmm0, %eax
208; KNL-NEXT:    retl
209  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
210  ret i32 %res
211}
212declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
213
214
215define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
216; SSE-LABEL: test_x86_sse_cvttss2si:
217; SSE:       ## BB#0:
218; SSE-NEXT:    cvttss2si %xmm0, %eax
219; SSE-NEXT:    retl
220;
221; KNL-LABEL: test_x86_sse_cvttss2si:
222; KNL:       ## BB#0:
223; KNL-NEXT:    vcvttss2si %xmm0, %eax
224; KNL-NEXT:    retl
225  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
226  ret i32 %res
227}
228declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
229
230
231define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
232; SSE-LABEL: test_x86_sse_div_ss:
233; SSE:       ## BB#0:
234; SSE-NEXT:    divss %xmm1, %xmm0
235; SSE-NEXT:    retl
236;
237; KNL-LABEL: test_x86_sse_div_ss:
238; KNL:       ## BB#0:
239; KNL-NEXT:    vdivss %xmm1, %xmm0, %xmm0
240; KNL-NEXT:    retl
241  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
242  ret <4 x float> %res
243}
244declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
245
246
247define void @test_x86_sse_ldmxcsr(i8* %a0) {
248; SSE-LABEL: test_x86_sse_ldmxcsr:
249; SSE:       ## BB#0:
250; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
251; SSE-NEXT:    ldmxcsr (%eax)
252; SSE-NEXT:    retl
253;
254; KNL-LABEL: test_x86_sse_ldmxcsr:
255; KNL:       ## BB#0:
256; KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
257; KNL-NEXT:    vldmxcsr (%eax)
258; KNL-NEXT:    retl
259  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
260  ret void
261}
262declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
263
264
265
266define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
267; SSE-LABEL: test_x86_sse_max_ps:
268; SSE:       ## BB#0:
269; SSE-NEXT:    maxps %xmm1, %xmm0
270; SSE-NEXT:    retl
271;
272; KNL-LABEL: test_x86_sse_max_ps:
273; KNL:       ## BB#0:
274; KNL-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
275; KNL-NEXT:    retl
276  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
277  ret <4 x float> %res
278}
279declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
280
281
282define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
283; SSE-LABEL: test_x86_sse_max_ss:
284; SSE:       ## BB#0:
285; SSE-NEXT:    maxss %xmm1, %xmm0
286; SSE-NEXT:    retl
287;
288; KNL-LABEL: test_x86_sse_max_ss:
289; KNL:       ## BB#0:
290; KNL-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
291; KNL-NEXT:    retl
292  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
293  ret <4 x float> %res
294}
295declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
296
297
298define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
299; SSE-LABEL: test_x86_sse_min_ps:
300; SSE:       ## BB#0:
301; SSE-NEXT:    minps %xmm1, %xmm0
302; SSE-NEXT:    retl
303;
304; KNL-LABEL: test_x86_sse_min_ps:
305; KNL:       ## BB#0:
306; KNL-NEXT:    vminps %xmm1, %xmm0, %xmm0
307; KNL-NEXT:    retl
308  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
309  ret <4 x float> %res
310}
311declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
312
313
314define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
315; SSE-LABEL: test_x86_sse_min_ss:
316; SSE:       ## BB#0:
317; SSE-NEXT:    minss %xmm1, %xmm0
318; SSE-NEXT:    retl
319;
320; KNL-LABEL: test_x86_sse_min_ss:
321; KNL:       ## BB#0:
322; KNL-NEXT:    vminss %xmm1, %xmm0, %xmm0
323; KNL-NEXT:    retl
324  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
325  ret <4 x float> %res
326}
327declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
328
329
330define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
331; SSE-LABEL: test_x86_sse_movmsk_ps:
332; SSE:       ## BB#0:
333; SSE-NEXT:    movmskps %xmm0, %eax
334; SSE-NEXT:    retl
335;
336; KNL-LABEL: test_x86_sse_movmsk_ps:
337; KNL:       ## BB#0:
338; KNL-NEXT:    vmovmskps %xmm0, %eax
339; KNL-NEXT:    retl
340  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
341  ret i32 %res
342}
343declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
344
345
346
347define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
348; SSE-LABEL: test_x86_sse_mul_ss:
349; SSE:       ## BB#0:
350; SSE-NEXT:    mulss %xmm1, %xmm0
351; SSE-NEXT:    retl
352;
353; KNL-LABEL: test_x86_sse_mul_ss:
354; KNL:       ## BB#0:
355; KNL-NEXT:    vmulss %xmm1, %xmm0, %xmm0
356; KNL-NEXT:    retl
357  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
358  ret <4 x float> %res
359}
360declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
361
362
363define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
364; SSE-LABEL: test_x86_sse_rcp_ps:
365; SSE:       ## BB#0:
366; SSE-NEXT:    rcpps %xmm0, %xmm0
367; SSE-NEXT:    retl
368;
369; KNL-LABEL: test_x86_sse_rcp_ps:
370; KNL:       ## BB#0:
371; KNL-NEXT:    vrcpps %xmm0, %xmm0
372; KNL-NEXT:    retl
373  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
374  ret <4 x float> %res
375}
376declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
377
378
379define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
380; SSE-LABEL: test_x86_sse_rcp_ss:
381; SSE:       ## BB#0:
382; SSE-NEXT:    rcpss %xmm0, %xmm0
383; SSE-NEXT:    retl
384;
385; KNL-LABEL: test_x86_sse_rcp_ss:
386; KNL:       ## BB#0:
387; KNL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
388; KNL-NEXT:    retl
389  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
390  ret <4 x float> %res
391}
392declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
393
394
395define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
396; SSE-LABEL: test_x86_sse_rsqrt_ps:
397; SSE:       ## BB#0:
398; SSE-NEXT:    rsqrtps %xmm0, %xmm0
399; SSE-NEXT:    retl
400;
401; KNL-LABEL: test_x86_sse_rsqrt_ps:
402; KNL:       ## BB#0:
403; KNL-NEXT:    vrsqrtps %xmm0, %xmm0
404; KNL-NEXT:    retl
405  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
406  ret <4 x float> %res
407}
408declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
409
410
411define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
412; SSE-LABEL: test_x86_sse_rsqrt_ss:
413; SSE:       ## BB#0:
414; SSE-NEXT:    rsqrtss %xmm0, %xmm0
415; SSE-NEXT:    retl
416;
417; KNL-LABEL: test_x86_sse_rsqrt_ss:
418; KNL:       ## BB#0:
419; KNL-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
420; KNL-NEXT:    retl
421  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
422  ret <4 x float> %res
423}
424declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
425
426
427define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
428; SSE-LABEL: test_x86_sse_sqrt_ps:
429; SSE:       ## BB#0:
430; SSE-NEXT:    sqrtps %xmm0, %xmm0
431; SSE-NEXT:    retl
432;
433; KNL-LABEL: test_x86_sse_sqrt_ps:
434; KNL:       ## BB#0:
435; KNL-NEXT:    vsqrtps %xmm0, %xmm0
436; KNL-NEXT:    retl
437  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
438  ret <4 x float> %res
439}
440declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
441
442
443define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
444; SSE-LABEL: test_x86_sse_sqrt_ss:
445; SSE:       ## BB#0:
446; SSE-NEXT:    sqrtss %xmm0, %xmm0
447; SSE-NEXT:    retl
448;
449; KNL-LABEL: test_x86_sse_sqrt_ss:
450; KNL:       ## BB#0:
451; KNL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
452; KNL-NEXT:    retl
453  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
454  ret <4 x float> %res
455}
456declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
457
458
459define void @test_x86_sse_stmxcsr(i8* %a0) {
460; SSE-LABEL: test_x86_sse_stmxcsr:
461; SSE:       ## BB#0:
462; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
463; SSE-NEXT:    stmxcsr (%eax)
464; SSE-NEXT:    retl
465;
466; KNL-LABEL: test_x86_sse_stmxcsr:
467; KNL:       ## BB#0:
468; KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
469; KNL-NEXT:    vstmxcsr (%eax)
470; KNL-NEXT:    retl
471  call void @llvm.x86.sse.stmxcsr(i8* %a0)
472  ret void
473}
474declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
475
476
477define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
478; SSE-LABEL: test_x86_sse_sub_ss:
479; SSE:       ## BB#0:
480; SSE-NEXT:    subss %xmm1, %xmm0
481; SSE-NEXT:    retl
482;
483; KNL-LABEL: test_x86_sse_sub_ss:
484; KNL:       ## BB#0:
485; KNL-NEXT:    vsubss %xmm1, %xmm0, %xmm0
486; KNL-NEXT:    retl
487  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
488  ret <4 x float> %res
489}
490declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
491
492
493define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
494; SSE-LABEL: test_x86_sse_ucomieq_ss:
495; SSE:       ## BB#0:
496; SSE-NEXT:    ucomiss %xmm1, %xmm0
497; SSE-NEXT:    setnp %al
498; SSE-NEXT:    sete %cl
499; SSE-NEXT:    andb %al, %cl
500; SSE-NEXT:    movzbl %cl, %eax
501; SSE-NEXT:    retl
502;
503; KNL-LABEL: test_x86_sse_ucomieq_ss:
504; KNL:       ## BB#0:
505; KNL-NEXT:    vucomiss %xmm1, %xmm0
506; KNL-NEXT:    setnp %al
507; KNL-NEXT:    sete %cl
508; KNL-NEXT:    andb %al, %cl
509; KNL-NEXT:    movzbl %cl, %eax
510; KNL-NEXT:    retl
511  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
512  ret i32 %res
513}
514declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
515
516
517define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
518; SSE-LABEL: test_x86_sse_ucomige_ss:
519; SSE:       ## BB#0:
520; SSE-NEXT:    xorl %eax, %eax
521; SSE-NEXT:    ucomiss %xmm1, %xmm0
522; SSE-NEXT:    setae %al
523; SSE-NEXT:    retl
524;
525; KNL-LABEL: test_x86_sse_ucomige_ss:
526; KNL:       ## BB#0:
527; KNL-NEXT:    xorl %eax, %eax
528; KNL-NEXT:    vucomiss %xmm1, %xmm0
529; KNL-NEXT:    setae %al
530; KNL-NEXT:    retl
531  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
532  ret i32 %res
533}
534declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
535
536
537define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
538; SSE-LABEL: test_x86_sse_ucomigt_ss:
539; SSE:       ## BB#0:
540; SSE-NEXT:    xorl %eax, %eax
541; SSE-NEXT:    ucomiss %xmm1, %xmm0
542; SSE-NEXT:    seta %al
543; SSE-NEXT:    retl
544;
545; KNL-LABEL: test_x86_sse_ucomigt_ss:
546; KNL:       ## BB#0:
547; KNL-NEXT:    xorl %eax, %eax
548; KNL-NEXT:    vucomiss %xmm1, %xmm0
549; KNL-NEXT:    seta %al
550; KNL-NEXT:    retl
551  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
552  ret i32 %res
553}
554declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
555
556
557define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
558; SSE-LABEL: test_x86_sse_ucomile_ss:
559; SSE:       ## BB#0:
560; SSE-NEXT:    xorl %eax, %eax
561; SSE-NEXT:    ucomiss %xmm0, %xmm1
562; SSE-NEXT:    setae %al
563; SSE-NEXT:    retl
564;
565; KNL-LABEL: test_x86_sse_ucomile_ss:
566; KNL:       ## BB#0:
567; KNL-NEXT:    xorl %eax, %eax
568; KNL-NEXT:    vucomiss %xmm0, %xmm1
569; KNL-NEXT:    setae %al
570; KNL-NEXT:    retl
571  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
572  ret i32 %res
573}
574declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
575
576
577define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
578; SSE-LABEL: test_x86_sse_ucomilt_ss:
579; SSE:       ## BB#0:
580; SSE-NEXT:    xorl %eax, %eax
581; SSE-NEXT:    ucomiss %xmm0, %xmm1
582; SSE-NEXT:    seta %al
583; SSE-NEXT:    retl
584;
585; KNL-LABEL: test_x86_sse_ucomilt_ss:
586; KNL:       ## BB#0:
587; KNL-NEXT:    xorl %eax, %eax
588; KNL-NEXT:    vucomiss %xmm0, %xmm1
589; KNL-NEXT:    seta %al
590; KNL-NEXT:    retl
591  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
592  ret i32 %res
593}
594declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
595
596
597define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
598; SSE-LABEL: test_x86_sse_ucomineq_ss:
599; SSE:       ## BB#0:
600; SSE-NEXT:    ucomiss %xmm1, %xmm0
601; SSE-NEXT:    setp %al
602; SSE-NEXT:    setne %cl
603; SSE-NEXT:    orb %al, %cl
604; SSE-NEXT:    movzbl %cl, %eax
605; SSE-NEXT:    retl
606;
607; KNL-LABEL: test_x86_sse_ucomineq_ss:
608; KNL:       ## BB#0:
609; KNL-NEXT:    vucomiss %xmm1, %xmm0
610; KNL-NEXT:    setp %al
611; KNL-NEXT:    setne %cl
612; KNL-NEXT:    orb %al, %cl
613; KNL-NEXT:    movzbl %cl, %eax
614; KNL-NEXT:    retl
615  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
616  ret i32 %res
617}
618declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
619