1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
2
3; This tests fcmp operations that do not map directly to NEON instructions.
4
5; une is implemented with VCEQ/VMVN
6define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
7;CHECK: vcunef32:
8;CHECK: vceq.f32
9;CHECK-NEXT: vmvn
10	%tmp1 = load <2 x float>* %A
11	%tmp2 = load <2 x float>* %B
12	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
13        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
14	ret <2 x i32> %tmp4
15}
16
17; olt is implemented with VCGT
18define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
19;CHECK: vcoltf32:
20;CHECK: vcgt.f32
21	%tmp1 = load <2 x float>* %A
22	%tmp2 = load <2 x float>* %B
23	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
24        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
25	ret <2 x i32> %tmp4
26}
27
28; ole is implemented with VCGE
29define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
30;CHECK: vcolef32:
31;CHECK: vcge.f32
32	%tmp1 = load <2 x float>* %A
33	%tmp2 = load <2 x float>* %B
34	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
35        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
36	ret <2 x i32> %tmp4
37}
38
39; uge is implemented with VCGT/VMVN
40define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
41;CHECK: vcugef32:
42;CHECK: vcgt.f32
43;CHECK-NEXT: vmvn
44	%tmp1 = load <2 x float>* %A
45	%tmp2 = load <2 x float>* %B
46	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
47        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
48	ret <2 x i32> %tmp4
49}
50
51; ule is implemented with VCGT/VMVN
52define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
53;CHECK: vculef32:
54;CHECK: vcgt.f32
55;CHECK-NEXT: vmvn
56	%tmp1 = load <2 x float>* %A
57	%tmp2 = load <2 x float>* %B
58	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
59        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
60	ret <2 x i32> %tmp4
61}
62
63; ugt is implemented with VCGE/VMVN
64define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
65;CHECK: vcugtf32:
66;CHECK: vcge.f32
67;CHECK-NEXT: vmvn
68	%tmp1 = load <2 x float>* %A
69	%tmp2 = load <2 x float>* %B
70	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
71        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
72	ret <2 x i32> %tmp4
73}
74
75; ult is implemented with VCGE/VMVN
76define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
77;CHECK: vcultf32:
78;CHECK: vcge.f32
79;CHECK-NEXT: vmvn
80	%tmp1 = load <2 x float>* %A
81	%tmp2 = load <2 x float>* %B
82	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
83        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
84	ret <2 x i32> %tmp4
85}
86
87; ueq is implemented with VCGT/VCGT/VORR/VMVN
88define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
89;CHECK: vcueqf32:
90;CHECK: vcgt.f32
91;CHECK-NEXT: vcgt.f32
92;CHECK-NEXT: vorr
93;CHECK-NEXT: vmvn
94	%tmp1 = load <2 x float>* %A
95	%tmp2 = load <2 x float>* %B
96	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
97        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
98	ret <2 x i32> %tmp4
99}
100
101; one is implemented with VCGT/VCGT/VORR
102define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
103;CHECK: vconef32:
104;CHECK: vcgt.f32
105;CHECK-NEXT: vcgt.f32
106;CHECK-NEXT: vorr
107	%tmp1 = load <2 x float>* %A
108	%tmp2 = load <2 x float>* %B
109	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
110        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
111	ret <2 x i32> %tmp4
112}
113
114; uno is implemented with VCGT/VCGE/VORR/VMVN
115define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
116;CHECK: vcunof32:
117;CHECK: vcge.f32
118;CHECK-NEXT: vcgt.f32
119;CHECK-NEXT: vorr
120;CHECK-NEXT: vmvn
121	%tmp1 = load <2 x float>* %A
122	%tmp2 = load <2 x float>* %B
123	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
124        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
125	ret <2 x i32> %tmp4
126}
127
128; ord is implemented with VCGT/VCGE/VORR
129define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
130;CHECK: vcordf32:
131;CHECK: vcge.f32
132;CHECK-NEXT: vcgt.f32
133;CHECK-NEXT: vorr
134	%tmp1 = load <2 x float>* %A
135	%tmp2 = load <2 x float>* %B
136	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
137        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
138	ret <2 x i32> %tmp4
139}
140