1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon | FileCheck %s
3
4define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
5; CHECK-LABEL: vcges8:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vldr d16, [r1]
8; CHECK-NEXT:    vldr d17, [r0]
9; CHECK-NEXT:    vcge.s8 d16, d17, d16
10; CHECK-NEXT:    vmov r0, r1, d16
11; CHECK-NEXT:    mov pc, lr
12	%tmp1 = load <8 x i8>, <8 x i8>* %A
13	%tmp2 = load <8 x i8>, <8 x i8>* %B
14	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
15	%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
16	ret <8 x i8> %tmp4
17}
18
19define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
20; CHECK-LABEL: vcges16:
21; CHECK:       @ %bb.0:
22; CHECK-NEXT:    vldr d16, [r1]
23; CHECK-NEXT:    vldr d17, [r0]
24; CHECK-NEXT:    vcge.s16 d16, d17, d16
25; CHECK-NEXT:    vmov r0, r1, d16
26; CHECK-NEXT:    mov pc, lr
27	%tmp1 = load <4 x i16>, <4 x i16>* %A
28	%tmp2 = load <4 x i16>, <4 x i16>* %B
29	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
30	%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
31	ret <4 x i16> %tmp4
32}
33
34define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
35; CHECK-LABEL: vcges32:
36; CHECK:       @ %bb.0:
37; CHECK-NEXT:    vldr d16, [r1]
38; CHECK-NEXT:    vldr d17, [r0]
39; CHECK-NEXT:    vcge.s32 d16, d17, d16
40; CHECK-NEXT:    vmov r0, r1, d16
41; CHECK-NEXT:    mov pc, lr
42	%tmp1 = load <2 x i32>, <2 x i32>* %A
43	%tmp2 = load <2 x i32>, <2 x i32>* %B
44	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
45	%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
46	ret <2 x i32> %tmp4
47}
48
49define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
50; CHECK-LABEL: vcgeu8:
51; CHECK:       @ %bb.0:
52; CHECK-NEXT:    vldr d16, [r1]
53; CHECK-NEXT:    vldr d17, [r0]
54; CHECK-NEXT:    vcge.u8 d16, d17, d16
55; CHECK-NEXT:    vmov r0, r1, d16
56; CHECK-NEXT:    mov pc, lr
57	%tmp1 = load <8 x i8>, <8 x i8>* %A
58	%tmp2 = load <8 x i8>, <8 x i8>* %B
59	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
60	%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
61	ret <8 x i8> %tmp4
62}
63
64define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
65; CHECK-LABEL: vcgeu16:
66; CHECK:       @ %bb.0:
67; CHECK-NEXT:    vldr d16, [r1]
68; CHECK-NEXT:    vldr d17, [r0]
69; CHECK-NEXT:    vcge.u16 d16, d17, d16
70; CHECK-NEXT:    vmov r0, r1, d16
71; CHECK-NEXT:    mov pc, lr
72	%tmp1 = load <4 x i16>, <4 x i16>* %A
73	%tmp2 = load <4 x i16>, <4 x i16>* %B
74	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
75	%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
76	ret <4 x i16> %tmp4
77}
78
79define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
80; CHECK-LABEL: vcgeu32:
81; CHECK:       @ %bb.0:
82; CHECK-NEXT:    vldr d16, [r1]
83; CHECK-NEXT:    vldr d17, [r0]
84; CHECK-NEXT:    vcge.u32 d16, d17, d16
85; CHECK-NEXT:    vmov r0, r1, d16
86; CHECK-NEXT:    mov pc, lr
87	%tmp1 = load <2 x i32>, <2 x i32>* %A
88	%tmp2 = load <2 x i32>, <2 x i32>* %B
89	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
90	%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
91	ret <2 x i32> %tmp4
92}
93
94define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
95; CHECK-LABEL: vcgef32:
96; CHECK:       @ %bb.0:
97; CHECK-NEXT:    vldr d16, [r1]
98; CHECK-NEXT:    vldr d17, [r0]
99; CHECK-NEXT:    vcge.f32 d16, d17, d16
100; CHECK-NEXT:    vmov r0, r1, d16
101; CHECK-NEXT:    mov pc, lr
102	%tmp1 = load <2 x float>, <2 x float>* %A
103	%tmp2 = load <2 x float>, <2 x float>* %B
104	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
105	%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
106	ret <2 x i32> %tmp4
107}
108
109define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
110; CHECK-LABEL: vcgeQs8:
111; CHECK:       @ %bb.0:
112; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
113; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
114; CHECK-NEXT:    vcge.s8 q8, q9, q8
115; CHECK-NEXT:    vmov r0, r1, d16
116; CHECK-NEXT:    vmov r2, r3, d17
117; CHECK-NEXT:    mov pc, lr
118	%tmp1 = load <16 x i8>, <16 x i8>* %A
119	%tmp2 = load <16 x i8>, <16 x i8>* %B
120	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
121	%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
122	ret <16 x i8> %tmp4
123}
124
125define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
126; CHECK-LABEL: vcgeQs16:
127; CHECK:       @ %bb.0:
128; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
129; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
130; CHECK-NEXT:    vcge.s16 q8, q9, q8
131; CHECK-NEXT:    vmov r0, r1, d16
132; CHECK-NEXT:    vmov r2, r3, d17
133; CHECK-NEXT:    mov pc, lr
134	%tmp1 = load <8 x i16>, <8 x i16>* %A
135	%tmp2 = load <8 x i16>, <8 x i16>* %B
136	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
137	%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
138	ret <8 x i16> %tmp4
139}
140
141define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
142; CHECK-LABEL: vcgeQs32:
143; CHECK:       @ %bb.0:
144; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
145; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
146; CHECK-NEXT:    vcge.s32 q8, q9, q8
147; CHECK-NEXT:    vmov r0, r1, d16
148; CHECK-NEXT:    vmov r2, r3, d17
149; CHECK-NEXT:    mov pc, lr
150	%tmp1 = load <4 x i32>, <4 x i32>* %A
151	%tmp2 = load <4 x i32>, <4 x i32>* %B
152	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
153	%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
154	ret <4 x i32> %tmp4
155}
156
157define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
158; CHECK-LABEL: vcgeQu8:
159; CHECK:       @ %bb.0:
160; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
161; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
162; CHECK-NEXT:    vcge.u8 q8, q9, q8
163; CHECK-NEXT:    vmov r0, r1, d16
164; CHECK-NEXT:    vmov r2, r3, d17
165; CHECK-NEXT:    mov pc, lr
166	%tmp1 = load <16 x i8>, <16 x i8>* %A
167	%tmp2 = load <16 x i8>, <16 x i8>* %B
168	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
169	%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
170	ret <16 x i8> %tmp4
171}
172
173define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
174; CHECK-LABEL: vcgeQu16:
175; CHECK:       @ %bb.0:
176; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
177; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
178; CHECK-NEXT:    vcge.u16 q8, q9, q8
179; CHECK-NEXT:    vmov r0, r1, d16
180; CHECK-NEXT:    vmov r2, r3, d17
181; CHECK-NEXT:    mov pc, lr
182	%tmp1 = load <8 x i16>, <8 x i16>* %A
183	%tmp2 = load <8 x i16>, <8 x i16>* %B
184	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
185	%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
186	ret <8 x i16> %tmp4
187}
188
189define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
190; CHECK-LABEL: vcgeQu32:
191; CHECK:       @ %bb.0:
192; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
193; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
194; CHECK-NEXT:    vcge.u32 q8, q9, q8
195; CHECK-NEXT:    vmov r0, r1, d16
196; CHECK-NEXT:    vmov r2, r3, d17
197; CHECK-NEXT:    mov pc, lr
198	%tmp1 = load <4 x i32>, <4 x i32>* %A
199	%tmp2 = load <4 x i32>, <4 x i32>* %B
200	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
201	%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
202	ret <4 x i32> %tmp4
203}
204
205define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
206; CHECK-LABEL: vcgeQf32:
207; CHECK:       @ %bb.0:
208; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
209; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
210; CHECK-NEXT:    vcge.f32 q8, q9, q8
211; CHECK-NEXT:    vmov r0, r1, d16
212; CHECK-NEXT:    vmov r2, r3, d17
213; CHECK-NEXT:    mov pc, lr
214	%tmp1 = load <4 x float>, <4 x float>* %A
215	%tmp2 = load <4 x float>, <4 x float>* %B
216	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
217	%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
218	ret <4 x i32> %tmp4
219}
220
221define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
222; CHECK-LABEL: vacgef32:
223; CHECK:       @ %bb.0:
224; CHECK-NEXT:    vldr d16, [r1]
225; CHECK-NEXT:    vldr d17, [r0]
226; CHECK-NEXT:    vacge.f32 d16, d17, d16
227; CHECK-NEXT:    vmov r0, r1, d16
228; CHECK-NEXT:    mov pc, lr
229	%tmp1 = load <2 x float>, <2 x float>* %A
230	%tmp2 = load <2 x float>, <2 x float>* %B
231	%tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
232	ret <2 x i32> %tmp3
233}
234
235define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
236; CHECK-LABEL: vacgeQf32:
237; CHECK:       @ %bb.0:
238; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
239; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
240; CHECK-NEXT:    vacge.f32 q8, q9, q8
241; CHECK-NEXT:    vmov r0, r1, d16
242; CHECK-NEXT:    vmov r2, r3, d17
243; CHECK-NEXT:    mov pc, lr
244	%tmp1 = load <4 x float>, <4 x float>* %A
245	%tmp2 = load <4 x float>, <4 x float>* %B
246	%tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
247	ret <4 x i32> %tmp3
248}
249
250declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
251declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
252
253define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
254; CHECK-LABEL: vcgei8Z:
255; CHECK:       @ %bb.0:
256; CHECK-NEXT:    vldr d16, [r0]
257; CHECK-NEXT:    vcge.s8 d16, d16, #0
258; CHECK-NEXT:    vmov r0, r1, d16
259; CHECK-NEXT:    mov pc, lr
260	%tmp1 = load <8 x i8>, <8 x i8>* %A
261	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
262	%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
263	ret <8 x i8> %tmp4
264}
265
266define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
267; CHECK-LABEL: vclei8Z:
268; CHECK:       @ %bb.0:
269; CHECK-NEXT:    vldr d16, [r0]
270; CHECK-NEXT:    vcle.s8 d16, d16, #0
271; CHECK-NEXT:    vmov r0, r1, d16
272; CHECK-NEXT:    mov pc, lr
273	%tmp1 = load <8 x i8>, <8 x i8>* %A
274	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
275	%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
276	ret <8 x i8> %tmp4
277}
278
279; Radar 8782191
280; Floating-point comparisons against zero produce results with integer
281; elements, not floating-point elements.
282define void @test_vclez_fp(<4 x float>* %A) nounwind optsize {
283; CHECK-LABEL: test_vclez_fp:
284; CHECK:       @ %bb.0: @ %entry
285; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
286; CHECK-NEXT:    vcle.f32 q8, q8, #0
287; CHECK-NEXT:    vmovn.i32 d16, q8
288; CHECK-NEXT:    vmov.i8 d17, #0x1
289; CHECK-NEXT:    vuzp.8 d16, d18
290; CHECK-NEXT:    vadd.i8 d16, d16, d17
291; CHECK-NEXT:    vst1.8 {d16}, [r0]
292entry:
293  %ld = load <4 x float>, <4 x float>* %A
294  %0 = fcmp ole <4 x float> %ld, zeroinitializer
295  %1 = sext <4 x i1> %0 to <4 x i16>
296  %2 = add <4 x i16> %1, zeroinitializer
297  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
298  %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
299  %5 = trunc <8 x i16> %4 to <8 x i8>
300  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* undef, <8 x i8> %5, i32 1)
301  unreachable
302}
303
304declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
305