1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
2
3; This tests fcmp operations that do not map directly to NEON instructions.
4
5; une is implemented with VCEQ/VMVN
6define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
7;CHECK-LABEL: vcunef32:
8;CHECK: vceq.f32
9;CHECK-NEXT: vmvn
10  %tmp1 = load <2 x float>, <2 x float>* %A
11  %tmp2 = load <2 x float>, <2 x float>* %B
12  %tmp3 = fcmp une <2 x float> %tmp1, %tmp2
13  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
14  ret <2 x i32> %tmp4
15}
16
17; olt is implemented with VCGT
18define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
19;CHECK-LABEL: vcoltf32:
20;CHECK: vcgt.f32
21  %tmp1 = load <2 x float>, <2 x float>* %A
22  %tmp2 = load <2 x float>, <2 x float>* %B
23  %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
24  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
25  ret <2 x i32> %tmp4
26}
27
28; ole is implemented with VCGE
29define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
30;CHECK-LABEL: vcolef32:
31;CHECK: vcge.f32
32  %tmp1 = load <2 x float>, <2 x float>* %A
33  %tmp2 = load <2 x float>, <2 x float>* %B
34  %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
35  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
36  ret <2 x i32> %tmp4
37}
38
39; uge is implemented with VCGT/VMVN
40define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
41;CHECK-LABEL: vcugef32:
42;CHECK: vcgt.f32
43;CHECK-NEXT: vmvn
44  %tmp1 = load <2 x float>, <2 x float>* %A
45  %tmp2 = load <2 x float>, <2 x float>* %B
46  %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
47  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
48  ret <2 x i32> %tmp4
49}
50
51; ule is implemented with VCGT/VMVN
52define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
53;CHECK-LABEL: vculef32:
54;CHECK: vcgt.f32
55;CHECK-NEXT: vmvn
56  %tmp1 = load <2 x float>, <2 x float>* %A
57  %tmp2 = load <2 x float>, <2 x float>* %B
58  %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
59  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
60  ret <2 x i32> %tmp4
61}
62
63; ugt is implemented with VCGE/VMVN
64define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
65;CHECK-LABEL: vcugtf32:
66;CHECK: vcge.f32
67;CHECK-NEXT: vmvn
68  %tmp1 = load <2 x float>, <2 x float>* %A
69  %tmp2 = load <2 x float>, <2 x float>* %B
70  %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
71  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
72  ret <2 x i32> %tmp4
73}
74
75; ult is implemented with VCGE/VMVN
76define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
77;CHECK-LABEL: vcultf32:
78;CHECK: vcge.f32
79;CHECK-NEXT: vmvn
80  %tmp1 = load <2 x float>, <2 x float>* %A
81  %tmp2 = load <2 x float>, <2 x float>* %B
82  %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
83  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
84  ret <2 x i32> %tmp4
85}
86
87; ueq is implemented with VCGT/VCGT/VORR/VMVN
88define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
89;CHECK-LABEL: vcueqf32:
90;CHECK: vcgt.f32
91;CHECK-NEXT: vcgt.f32
92;CHECK-NEXT: vorr
93;CHECK-NEXT: vmvn
94  %tmp1 = load <2 x float>, <2 x float>* %A
95  %tmp2 = load <2 x float>, <2 x float>* %B
96  %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
97  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
98  ret <2 x i32> %tmp4
99}
100
101; one is implemented with VCGT/VCGT/VORR
102define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
103;CHECK-LABEL: vconef32:
104;CHECK: vcgt.f32
105;CHECK-NEXT: vcgt.f32
106;CHECK-NEXT: vorr
107  %tmp1 = load <2 x float>, <2 x float>* %A
108  %tmp2 = load <2 x float>, <2 x float>* %B
109  %tmp3 = fcmp one <2 x float> %tmp1, %tmp2
110  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
111  ret <2 x i32> %tmp4
112}
113
114; uno is implemented with VCGT/VCGE/VORR/VMVN
115define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
116;CHECK-LABEL: vcunof32:
117;CHECK: vcge.f32
118;CHECK-NEXT: vcgt.f32
119;CHECK-NEXT: vorr
120;CHECK-NEXT: vmvn
121  %tmp1 = load <2 x float>, <2 x float>* %A
122  %tmp2 = load <2 x float>, <2 x float>* %B
123  %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
124  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
125  ret <2 x i32> %tmp4
126}
127
128; ord is implemented with VCGT/VCGE/VORR
129define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
130;CHECK-LABEL: vcordf32:
131;CHECK: vcge.f32
132;CHECK-NEXT: vcgt.f32
133;CHECK-NEXT: vorr
134  %tmp1 = load <2 x float>, <2 x float>* %A
135  %tmp2 = load <2 x float>, <2 x float>* %B
136  %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
137  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
138  ret <2 x i32> %tmp4
139}
140