1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon | FileCheck %s --check-prefixes=CHECK,ALLOC
3; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -regalloc=basic | FileCheck %s --check-prefixes=CHECK,BASIC
4
5define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
6; ALLOC-LABEL: vcgts8:
7; ALLOC:       @ %bb.0:
8; ALLOC-NEXT:    vldr d16, [r1]
9; ALLOC-NEXT:    vldr d17, [r0]
10; ALLOC-NEXT:    vcgt.s8 d16, d17, d16
11; ALLOC-NEXT:    vmov r0, r1, d16
12; ALLOC-NEXT:    mov pc, lr
13;
14; BASIC-LABEL: vcgts8:
15; BASIC:       @ %bb.0:
16; BASIC-NEXT:    vldr d17, [r1]
17; BASIC-NEXT:    vldr d16, [r0]
18; BASIC-NEXT:    vcgt.s8 d16, d16, d17
19; BASIC-NEXT:    vmov r0, r1, d16
20; BASIC-NEXT:    mov pc, lr
21	%tmp1 = load <8 x i8>, <8 x i8>* %A
22	%tmp2 = load <8 x i8>, <8 x i8>* %B
23	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
24	%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
25	ret <8 x i8> %tmp4
26}
27
28define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
29; ALLOC-LABEL: vcgts16:
30; ALLOC:       @ %bb.0:
31; ALLOC-NEXT:    vldr d16, [r1]
32; ALLOC-NEXT:    vldr d17, [r0]
33; ALLOC-NEXT:    vcgt.s16 d16, d17, d16
34; ALLOC-NEXT:    vmov r0, r1, d16
35; ALLOC-NEXT:    mov pc, lr
36;
37; BASIC-LABEL: vcgts16:
38; BASIC:       @ %bb.0:
39; BASIC-NEXT:    vldr d17, [r1]
40; BASIC-NEXT:    vldr d16, [r0]
41; BASIC-NEXT:    vcgt.s16 d16, d16, d17
42; BASIC-NEXT:    vmov r0, r1, d16
43; BASIC-NEXT:    mov pc, lr
44	%tmp1 = load <4 x i16>, <4 x i16>* %A
45	%tmp2 = load <4 x i16>, <4 x i16>* %B
46	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
47	%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
48	ret <4 x i16> %tmp4
49}
50
51define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
52; ALLOC-LABEL: vcgts32:
53; ALLOC:       @ %bb.0:
54; ALLOC-NEXT:    vldr d16, [r1]
55; ALLOC-NEXT:    vldr d17, [r0]
56; ALLOC-NEXT:    vcgt.s32 d16, d17, d16
57; ALLOC-NEXT:    vmov r0, r1, d16
58; ALLOC-NEXT:    mov pc, lr
59;
60; BASIC-LABEL: vcgts32:
61; BASIC:       @ %bb.0:
62; BASIC-NEXT:    vldr d17, [r1]
63; BASIC-NEXT:    vldr d16, [r0]
64; BASIC-NEXT:    vcgt.s32 d16, d16, d17
65; BASIC-NEXT:    vmov r0, r1, d16
66; BASIC-NEXT:    mov pc, lr
67	%tmp1 = load <2 x i32>, <2 x i32>* %A
68	%tmp2 = load <2 x i32>, <2 x i32>* %B
69	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
70	%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
71	ret <2 x i32> %tmp4
72}
73
74define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
75; ALLOC-LABEL: vcgtu8:
76; ALLOC:       @ %bb.0:
77; ALLOC-NEXT:    vldr d16, [r1]
78; ALLOC-NEXT:    vldr d17, [r0]
79; ALLOC-NEXT:    vcgt.u8 d16, d17, d16
80; ALLOC-NEXT:    vmov r0, r1, d16
81; ALLOC-NEXT:    mov pc, lr
82;
83; BASIC-LABEL: vcgtu8:
84; BASIC:       @ %bb.0:
85; BASIC-NEXT:    vldr d17, [r1]
86; BASIC-NEXT:    vldr d16, [r0]
87; BASIC-NEXT:    vcgt.u8 d16, d16, d17
88; BASIC-NEXT:    vmov r0, r1, d16
89; BASIC-NEXT:    mov pc, lr
90	%tmp1 = load <8 x i8>, <8 x i8>* %A
91	%tmp2 = load <8 x i8>, <8 x i8>* %B
92	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
93	%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
94	ret <8 x i8> %tmp4
95}
96
97define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
98; ALLOC-LABEL: vcgtu16:
99; ALLOC:       @ %bb.0:
100; ALLOC-NEXT:    vldr d16, [r1]
101; ALLOC-NEXT:    vldr d17, [r0]
102; ALLOC-NEXT:    vcgt.u16 d16, d17, d16
103; ALLOC-NEXT:    vmov r0, r1, d16
104; ALLOC-NEXT:    mov pc, lr
105;
106; BASIC-LABEL: vcgtu16:
107; BASIC:       @ %bb.0:
108; BASIC-NEXT:    vldr d17, [r1]
109; BASIC-NEXT:    vldr d16, [r0]
110; BASIC-NEXT:    vcgt.u16 d16, d16, d17
111; BASIC-NEXT:    vmov r0, r1, d16
112; BASIC-NEXT:    mov pc, lr
113	%tmp1 = load <4 x i16>, <4 x i16>* %A
114	%tmp2 = load <4 x i16>, <4 x i16>* %B
115	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
116	%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
117	ret <4 x i16> %tmp4
118}
119
120define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
121; ALLOC-LABEL: vcgtu32:
122; ALLOC:       @ %bb.0:
123; ALLOC-NEXT:    vldr d16, [r1]
124; ALLOC-NEXT:    vldr d17, [r0]
125; ALLOC-NEXT:    vcgt.u32 d16, d17, d16
126; ALLOC-NEXT:    vmov r0, r1, d16
127; ALLOC-NEXT:    mov pc, lr
128;
129; BASIC-LABEL: vcgtu32:
130; BASIC:       @ %bb.0:
131; BASIC-NEXT:    vldr d17, [r1]
132; BASIC-NEXT:    vldr d16, [r0]
133; BASIC-NEXT:    vcgt.u32 d16, d16, d17
134; BASIC-NEXT:    vmov r0, r1, d16
135; BASIC-NEXT:    mov pc, lr
136	%tmp1 = load <2 x i32>, <2 x i32>* %A
137	%tmp2 = load <2 x i32>, <2 x i32>* %B
138	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
139	%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
140	ret <2 x i32> %tmp4
141}
142
143define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
144; ALLOC-LABEL: vcgtf32:
145; ALLOC:       @ %bb.0:
146; ALLOC-NEXT:    vldr d16, [r1]
147; ALLOC-NEXT:    vldr d17, [r0]
148; ALLOC-NEXT:    vcgt.f32 d16, d17, d16
149; ALLOC-NEXT:    vmov r0, r1, d16
150; ALLOC-NEXT:    mov pc, lr
151;
152; BASIC-LABEL: vcgtf32:
153; BASIC:       @ %bb.0:
154; BASIC-NEXT:    vldr d17, [r1]
155; BASIC-NEXT:    vldr d16, [r0]
156; BASIC-NEXT:    vcgt.f32 d16, d16, d17
157; BASIC-NEXT:    vmov r0, r1, d16
158; BASIC-NEXT:    mov pc, lr
159	%tmp1 = load <2 x float>, <2 x float>* %A
160	%tmp2 = load <2 x float>, <2 x float>* %B
161	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
162	%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
163	ret <2 x i32> %tmp4
164}
165
166define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
167; ALLOC-LABEL: vcgtQs8:
168; ALLOC:       @ %bb.0:
169; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
170; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
171; ALLOC-NEXT:    vcgt.s8 q8, q9, q8
172; ALLOC-NEXT:    vmov r0, r1, d16
173; ALLOC-NEXT:    vmov r2, r3, d17
174; ALLOC-NEXT:    mov pc, lr
175;
176; BASIC-LABEL: vcgtQs8:
177; BASIC:       @ %bb.0:
178; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
179; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
180; BASIC-NEXT:    vcgt.s8 q8, q8, q9
181; BASIC-NEXT:    vmov r0, r1, d16
182; BASIC-NEXT:    vmov r2, r3, d17
183; BASIC-NEXT:    mov pc, lr
184	%tmp1 = load <16 x i8>, <16 x i8>* %A
185	%tmp2 = load <16 x i8>, <16 x i8>* %B
186	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
187	%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
188	ret <16 x i8> %tmp4
189}
190
191define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
192; ALLOC-LABEL: vcgtQs16:
193; ALLOC:       @ %bb.0:
194; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
195; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
196; ALLOC-NEXT:    vcgt.s16 q8, q9, q8
197; ALLOC-NEXT:    vmov r0, r1, d16
198; ALLOC-NEXT:    vmov r2, r3, d17
199; ALLOC-NEXT:    mov pc, lr
200;
201; BASIC-LABEL: vcgtQs16:
202; BASIC:       @ %bb.0:
203; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
204; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
205; BASIC-NEXT:    vcgt.s16 q8, q8, q9
206; BASIC-NEXT:    vmov r0, r1, d16
207; BASIC-NEXT:    vmov r2, r3, d17
208; BASIC-NEXT:    mov pc, lr
209	%tmp1 = load <8 x i16>, <8 x i16>* %A
210	%tmp2 = load <8 x i16>, <8 x i16>* %B
211	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
212	%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
213	ret <8 x i16> %tmp4
214}
215
216define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
217; ALLOC-LABEL: vcgtQs32:
218; ALLOC:       @ %bb.0:
219; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
220; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
221; ALLOC-NEXT:    vcgt.s32 q8, q9, q8
222; ALLOC-NEXT:    vmov r0, r1, d16
223; ALLOC-NEXT:    vmov r2, r3, d17
224; ALLOC-NEXT:    mov pc, lr
225;
226; BASIC-LABEL: vcgtQs32:
227; BASIC:       @ %bb.0:
228; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
229; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
230; BASIC-NEXT:    vcgt.s32 q8, q8, q9
231; BASIC-NEXT:    vmov r0, r1, d16
232; BASIC-NEXT:    vmov r2, r3, d17
233; BASIC-NEXT:    mov pc, lr
234	%tmp1 = load <4 x i32>, <4 x i32>* %A
235	%tmp2 = load <4 x i32>, <4 x i32>* %B
236	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
237	%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
238	ret <4 x i32> %tmp4
239}
240
241define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
242; ALLOC-LABEL: vcgtQu8:
243; ALLOC:       @ %bb.0:
244; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
245; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
246; ALLOC-NEXT:    vcgt.u8 q8, q9, q8
247; ALLOC-NEXT:    vmov r0, r1, d16
248; ALLOC-NEXT:    vmov r2, r3, d17
249; ALLOC-NEXT:    mov pc, lr
250;
251; BASIC-LABEL: vcgtQu8:
252; BASIC:       @ %bb.0:
253; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
254; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
255; BASIC-NEXT:    vcgt.u8 q8, q8, q9
256; BASIC-NEXT:    vmov r0, r1, d16
257; BASIC-NEXT:    vmov r2, r3, d17
258; BASIC-NEXT:    mov pc, lr
259	%tmp1 = load <16 x i8>, <16 x i8>* %A
260	%tmp2 = load <16 x i8>, <16 x i8>* %B
261	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
262	%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
263	ret <16 x i8> %tmp4
264}
265
266define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
267; ALLOC-LABEL: vcgtQu16:
268; ALLOC:       @ %bb.0:
269; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
270; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
271; ALLOC-NEXT:    vcgt.u16 q8, q9, q8
272; ALLOC-NEXT:    vmov r0, r1, d16
273; ALLOC-NEXT:    vmov r2, r3, d17
274; ALLOC-NEXT:    mov pc, lr
275;
276; BASIC-LABEL: vcgtQu16:
277; BASIC:       @ %bb.0:
278; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
279; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
280; BASIC-NEXT:    vcgt.u16 q8, q8, q9
281; BASIC-NEXT:    vmov r0, r1, d16
282; BASIC-NEXT:    vmov r2, r3, d17
283; BASIC-NEXT:    mov pc, lr
284	%tmp1 = load <8 x i16>, <8 x i16>* %A
285	%tmp2 = load <8 x i16>, <8 x i16>* %B
286	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
287	%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
288	ret <8 x i16> %tmp4
289}
290
291define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
292; ALLOC-LABEL: vcgtQu32:
293; ALLOC:       @ %bb.0:
294; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
295; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
296; ALLOC-NEXT:    vcgt.u32 q8, q9, q8
297; ALLOC-NEXT:    vmov r0, r1, d16
298; ALLOC-NEXT:    vmov r2, r3, d17
299; ALLOC-NEXT:    mov pc, lr
300;
301; BASIC-LABEL: vcgtQu32:
302; BASIC:       @ %bb.0:
303; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
304; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
305; BASIC-NEXT:    vcgt.u32 q8, q8, q9
306; BASIC-NEXT:    vmov r0, r1, d16
307; BASIC-NEXT:    vmov r2, r3, d17
308; BASIC-NEXT:    mov pc, lr
309	%tmp1 = load <4 x i32>, <4 x i32>* %A
310	%tmp2 = load <4 x i32>, <4 x i32>* %B
311	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
312	%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
313	ret <4 x i32> %tmp4
314}
315
316define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
317; ALLOC-LABEL: vcgtQf32:
318; ALLOC:       @ %bb.0:
319; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
320; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
321; ALLOC-NEXT:    vcgt.f32 q8, q9, q8
322; ALLOC-NEXT:    vmov r0, r1, d16
323; ALLOC-NEXT:    vmov r2, r3, d17
324; ALLOC-NEXT:    mov pc, lr
325;
326; BASIC-LABEL: vcgtQf32:
327; BASIC:       @ %bb.0:
328; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
329; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
330; BASIC-NEXT:    vcgt.f32 q8, q8, q9
331; BASIC-NEXT:    vmov r0, r1, d16
332; BASIC-NEXT:    vmov r2, r3, d17
333; BASIC-NEXT:    mov pc, lr
334	%tmp1 = load <4 x float>, <4 x float>* %A
335	%tmp2 = load <4 x float>, <4 x float>* %B
336	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
337	%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
338	ret <4 x i32> %tmp4
339}
340
341define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
342; ALLOC-LABEL: vacgtf32:
343; ALLOC:       @ %bb.0:
344; ALLOC-NEXT:    vldr d16, [r1]
345; ALLOC-NEXT:    vldr d17, [r0]
346; ALLOC-NEXT:    vacgt.f32 d16, d17, d16
347; ALLOC-NEXT:    vmov r0, r1, d16
348; ALLOC-NEXT:    mov pc, lr
349;
350; BASIC-LABEL: vacgtf32:
351; BASIC:       @ %bb.0:
352; BASIC-NEXT:    vldr d17, [r1]
353; BASIC-NEXT:    vldr d16, [r0]
354; BASIC-NEXT:    vacgt.f32 d16, d16, d17
355; BASIC-NEXT:    vmov r0, r1, d16
356; BASIC-NEXT:    mov pc, lr
357	%tmp1 = load <2 x float>, <2 x float>* %A
358	%tmp2 = load <2 x float>, <2 x float>* %B
359	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
360	ret <2 x i32> %tmp3
361}
362
363define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
364; ALLOC-LABEL: vacgtQf32:
365; ALLOC:       @ %bb.0:
366; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
367; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
368; ALLOC-NEXT:    vacgt.f32 q8, q9, q8
369; ALLOC-NEXT:    vmov r0, r1, d16
370; ALLOC-NEXT:    vmov r2, r3, d17
371; ALLOC-NEXT:    mov pc, lr
372;
373; BASIC-LABEL: vacgtQf32:
374; BASIC:       @ %bb.0:
375; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
376; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
377; BASIC-NEXT:    vacgt.f32 q8, q8, q9
378; BASIC-NEXT:    vmov r0, r1, d16
379; BASIC-NEXT:    vmov r2, r3, d17
380; BASIC-NEXT:    mov pc, lr
381	%tmp1 = load <4 x float>, <4 x float>* %A
382	%tmp2 = load <4 x float>, <4 x float>* %B
383	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
384	ret <4 x i32> %tmp3
385}
386
387; rdar://7923010
388define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
389; ALLOC-LABEL: vcgt_zext:
390; ALLOC:       @ %bb.0:
391; ALLOC-NEXT:    vld1.64 {d16, d17}, [r1]
392; ALLOC-NEXT:    vld1.64 {d18, d19}, [r0]
393; ALLOC-NEXT:    vcgt.f32 q8, q9, q8
394; ALLOC-NEXT:    vmov.i32 q9, #0x1
395; ALLOC-NEXT:    vand q8, q8, q9
396; ALLOC-NEXT:    vmov r0, r1, d16
397; ALLOC-NEXT:    vmov r2, r3, d17
398; ALLOC-NEXT:    mov pc, lr
399;
400; BASIC-LABEL: vcgt_zext:
401; BASIC:       @ %bb.0:
402; BASIC-NEXT:    vld1.64 {d18, d19}, [r1]
403; BASIC-NEXT:    vld1.64 {d16, d17}, [r0]
404; BASIC-NEXT:    vcgt.f32 q9, q8, q9
405; BASIC-NEXT:    vmov.i32 q8, #0x1
406; BASIC-NEXT:    vand q8, q9, q8
407; BASIC-NEXT:    vmov r0, r1, d16
408; BASIC-NEXT:    vmov r2, r3, d17
409; BASIC-NEXT:    mov pc, lr
410	%tmp1 = load <4 x float>, <4 x float>* %A
411	%tmp2 = load <4 x float>, <4 x float>* %B
412	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
413	%tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
414	ret <4 x i32> %tmp4
415}
416
417declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
418declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
419
420define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
421; CHECK-LABEL: vcgti8Z:
422; CHECK:       @ %bb.0:
423; CHECK-NEXT:    vldr d16, [r0]
424; CHECK-NEXT:    vcgt.s8 d16, d16, #0
425; CHECK-NEXT:    vmov r0, r1, d16
426; CHECK-NEXT:    mov pc, lr
427	%tmp1 = load <8 x i8>, <8 x i8>* %A
428	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
429	%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
430	ret <8 x i8> %tmp4
431}
432
433define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
434; CHECK-LABEL: vclti8Z:
435; CHECK:       @ %bb.0:
436; CHECK-NEXT:    vldr d16, [r0]
437; CHECK-NEXT:    vclt.s8 d16, d16, #0
438; CHECK-NEXT:    vmov r0, r1, d16
439; CHECK-NEXT:    mov pc, lr
440	%tmp1 = load <8 x i8>, <8 x i8>* %A
441	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
442	%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
443	ret <8 x i8> %tmp4
444}
445