1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple armv7-linux-gnueabihf -mattr=+neon | FileCheck %s
3
4; This test checks the @llvm.cttz.* intrinsics for vectors.
5
6declare <1 x i8> @llvm.cttz.v1i8(<1 x i8>, i1)
7declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1)
8declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1)
9declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1)
10declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
11
12declare <1 x i16> @llvm.cttz.v1i16(<1 x i16>, i1)
13declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1)
14declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1)
15declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
16
17declare <1 x i32> @llvm.cttz.v1i32(<1 x i32>, i1)
18declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
19declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
20
21declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1)
22declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
23
24;------------------------------------------------------------------------------
25
26define void @test_v1i8(<1 x i8>* %p) {
27; CHECK-LABEL: test_v1i8:
28; CHECK:       @ %bb.0:
29; CHECK-NEXT:    ldrb r1, [r0]
30; CHECK-NEXT:    orr r1, r1, #256
31; CHECK-NEXT:    rbit r1, r1
32; CHECK-NEXT:    clz r1, r1
33; CHECK-NEXT:    strb r1, [r0]
34; CHECK-NEXT:    bx lr
35  %a = load <1 x i8>, <1 x i8>* %p
36  %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 false)
37  store <1 x i8> %tmp, <1 x i8>* %p
38  ret void
39}
40
41define void @test_v2i8(<2 x i8>* %p) {
42; CHECK-LABEL: test_v2i8:
43; CHECK:       @ %bb.0:
44; CHECK-NEXT:    vld1.16 {d16[0]}, [r0:16]
45; CHECK-NEXT:    vmovl.u8 q8, d16
46; CHECK-NEXT:    vmovl.u16 q8, d16
47; CHECK-NEXT:    vorr.i32 d16, #0x100
48; CHECK-NEXT:    vneg.s32 d18, d16
49; CHECK-NEXT:    vand d16, d16, d18
50; CHECK-NEXT:    vmov.i32 d17, #0x1f
51; CHECK-NEXT:    vclz.i32 d16, d16
52; CHECK-NEXT:    vsub.i32 d16, d17, d16
53; CHECK-NEXT:    vmov.32 r1, d16[1]
54; CHECK-NEXT:    vmov.32 r2, d16[0]
55; CHECK-NEXT:    strb r1, [r0, #1]
56; CHECK-NEXT:    strb r2, [r0]
57; CHECK-NEXT:    bx lr
58  %a = load <2 x i8>, <2 x i8>* %p
59  %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 false)
60  store <2 x i8> %tmp, <2 x i8>* %p
61  ret void
62}
63
64define void @test_v4i8(<4 x i8>* %p) {
65; CHECK-LABEL: test_v4i8:
66; CHECK:       @ %bb.0:
67; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
68; CHECK-NEXT:    vmov.i16 d19, #0x1
69; CHECK-NEXT:    vmovl.u8 q8, d16
70; CHECK-NEXT:    vorr.i16 d16, #0x100
71; CHECK-NEXT:    vneg.s16 d18, d16
72; CHECK-NEXT:    vand d16, d16, d18
73; CHECK-NEXT:    vsub.i16 d16, d16, d19
74; CHECK-NEXT:    vcnt.8 d16, d16
75; CHECK-NEXT:    vpaddl.u8 d16, d16
76; CHECK-NEXT:    vuzp.8 d16, d17
77; CHECK-NEXT:    vst1.32 {d16[0]}, [r0:32]
78; CHECK-NEXT:    bx lr
79  %a = load <4 x i8>, <4 x i8>* %p
80  %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 false)
81  store <4 x i8> %tmp, <4 x i8>* %p
82  ret void
83}
84
85define void @test_v8i8(<8 x i8>* %p) {
86; CHECK-LABEL: test_v8i8:
87; CHECK:       @ %bb.0:
88; CHECK-NEXT:    vldr d16, [r0]
89; CHECK-NEXT:    vmov.i8 d18, #0x1
90; CHECK-NEXT:    vneg.s8 d17, d16
91; CHECK-NEXT:    vand d16, d16, d17
92; CHECK-NEXT:    vsub.i8 d16, d16, d18
93; CHECK-NEXT:    vcnt.8 d16, d16
94; CHECK-NEXT:    vstr d16, [r0]
95; CHECK-NEXT:    bx lr
96  %a = load <8 x i8>, <8 x i8>* %p
97  %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 false)
98  store <8 x i8> %tmp, <8 x i8>* %p
99  ret void
100}
101
102define void @test_v16i8(<16 x i8>* %p) {
103; CHECK-LABEL: test_v16i8:
104; CHECK:       @ %bb.0:
105; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
106; CHECK-NEXT:    vmov.i8 q10, #0x1
107; CHECK-NEXT:    vneg.s8 q9, q8
108; CHECK-NEXT:    vand q8, q8, q9
109; CHECK-NEXT:    vsub.i8 q8, q8, q10
110; CHECK-NEXT:    vcnt.8 q8, q8
111; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
112; CHECK-NEXT:    bx lr
113  %a = load <16 x i8>, <16 x i8>* %p
114  %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
115  store <16 x i8> %tmp, <16 x i8>* %p
116  ret void
117}
118
119define void @test_v1i16(<1 x i16>* %p) {
120; CHECK-LABEL: test_v1i16:
121; CHECK:       @ %bb.0:
122; CHECK-NEXT:    ldrh r1, [r0]
123; CHECK-NEXT:    orr r1, r1, #65536
124; CHECK-NEXT:    rbit r1, r1
125; CHECK-NEXT:    clz r1, r1
126; CHECK-NEXT:    strh r1, [r0]
127; CHECK-NEXT:    bx lr
128  %a = load <1 x i16>, <1 x i16>* %p
129  %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 false)
130  store <1 x i16> %tmp, <1 x i16>* %p
131  ret void
132}
133
134define void @test_v2i16(<2 x i16>* %p) {
135; CHECK-LABEL: test_v2i16:
136; CHECK:       @ %bb.0:
137; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
138; CHECK-NEXT:    vmovl.u16 q8, d16
139; CHECK-NEXT:    vorr.i32 d16, #0x10000
140; CHECK-NEXT:    vneg.s32 d18, d16
141; CHECK-NEXT:    vand d16, d16, d18
142; CHECK-NEXT:    vmov.i32 d17, #0x1f
143; CHECK-NEXT:    vclz.i32 d16, d16
144; CHECK-NEXT:    vsub.i32 d16, d17, d16
145; CHECK-NEXT:    vuzp.16 d16, d17
146; CHECK-NEXT:    vst1.32 {d16[0]}, [r0:32]
147; CHECK-NEXT:    bx lr
148  %a = load <2 x i16>, <2 x i16>* %p
149  %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 false)
150  store <2 x i16> %tmp, <2 x i16>* %p
151  ret void
152}
153
154define void @test_v4i16(<4 x i16>* %p) {
155; CHECK-LABEL: test_v4i16:
156; CHECK:       @ %bb.0:
157; CHECK-NEXT:    vldr d16, [r0]
158; CHECK-NEXT:    vmov.i16 d18, #0x1
159; CHECK-NEXT:    vneg.s16 d17, d16
160; CHECK-NEXT:    vand d16, d16, d17
161; CHECK-NEXT:    vsub.i16 d16, d16, d18
162; CHECK-NEXT:    vcnt.8 d16, d16
163; CHECK-NEXT:    vpaddl.u8 d16, d16
164; CHECK-NEXT:    vstr d16, [r0]
165; CHECK-NEXT:    bx lr
166  %a = load <4 x i16>, <4 x i16>* %p
167  %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 false)
168  store <4 x i16> %tmp, <4 x i16>* %p
169  ret void
170}
171
172define void @test_v8i16(<8 x i16>* %p) {
173; CHECK-LABEL: test_v8i16:
174; CHECK:       @ %bb.0:
175; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
176; CHECK-NEXT:    vmov.i16 q10, #0x1
177; CHECK-NEXT:    vneg.s16 q9, q8
178; CHECK-NEXT:    vand q8, q8, q9
179; CHECK-NEXT:    vsub.i16 q8, q8, q10
180; CHECK-NEXT:    vcnt.8 q8, q8
181; CHECK-NEXT:    vpaddl.u8 q8, q8
182; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
183; CHECK-NEXT:    bx lr
184  %a = load <8 x i16>, <8 x i16>* %p
185  %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
186  store <8 x i16> %tmp, <8 x i16>* %p
187  ret void
188}
189
190define void @test_v1i32(<1 x i32>* %p) {
191; CHECK-LABEL: test_v1i32:
192; CHECK:       @ %bb.0:
193; CHECK-NEXT:    ldr r1, [r0]
194; CHECK-NEXT:    rbit r1, r1
195; CHECK-NEXT:    clz r1, r1
196; CHECK-NEXT:    str r1, [r0]
197; CHECK-NEXT:    bx lr
198  %a = load <1 x i32>, <1 x i32>* %p
199  %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 false)
200  store <1 x i32> %tmp, <1 x i32>* %p
201  ret void
202}
203
204define void @test_v2i32(<2 x i32>* %p) {
205; CHECK-LABEL: test_v2i32:
206; CHECK:       @ %bb.0:
207; CHECK-NEXT:    vldr d16, [r0]
208; CHECK-NEXT:    vmov.i32 d18, #0x1
209; CHECK-NEXT:    vneg.s32 d17, d16
210; CHECK-NEXT:    vand d16, d16, d17
211; CHECK-NEXT:    vsub.i32 d16, d16, d18
212; CHECK-NEXT:    vcnt.8 d16, d16
213; CHECK-NEXT:    vpaddl.u8 d16, d16
214; CHECK-NEXT:    vpaddl.u16 d16, d16
215; CHECK-NEXT:    vstr d16, [r0]
216; CHECK-NEXT:    bx lr
217  %a = load <2 x i32>, <2 x i32>* %p
218  %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
219  store <2 x i32> %tmp, <2 x i32>* %p
220  ret void
221}
222
223define void @test_v4i32(<4 x i32>* %p) {
224; CHECK-LABEL: test_v4i32:
225; CHECK:       @ %bb.0:
226; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
227; CHECK-NEXT:    vmov.i32 q10, #0x1
228; CHECK-NEXT:    vneg.s32 q9, q8
229; CHECK-NEXT:    vand q8, q8, q9
230; CHECK-NEXT:    vsub.i32 q8, q8, q10
231; CHECK-NEXT:    vcnt.8 q8, q8
232; CHECK-NEXT:    vpaddl.u8 q8, q8
233; CHECK-NEXT:    vpaddl.u16 q8, q8
234; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
235; CHECK-NEXT:    bx lr
236  %a = load <4 x i32>, <4 x i32>* %p
237  %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
238  store <4 x i32> %tmp, <4 x i32>* %p
239  ret void
240}
241
242define void @test_v1i64(<1 x i64>* %p) {
243; CHECK-LABEL: test_v1i64:
244; CHECK:       @ %bb.0:
245; CHECK-NEXT:    vmov.i32 d16, #0x0
246; CHECK-NEXT:    vldr d17, [r0]
247; CHECK-NEXT:    vmov.i64 d18, #0xffffffffffffffff
248; CHECK-NEXT:    vsub.i64 d16, d16, d17
249; CHECK-NEXT:    vand d16, d17, d16
250; CHECK-NEXT:    vadd.i64 d16, d16, d18
251; CHECK-NEXT:    vcnt.8 d16, d16
252; CHECK-NEXT:    vpaddl.u8 d16, d16
253; CHECK-NEXT:    vpaddl.u16 d16, d16
254; CHECK-NEXT:    vpaddl.u32 d16, d16
255; CHECK-NEXT:    vstr d16, [r0]
256; CHECK-NEXT:    bx lr
257  %a = load <1 x i64>, <1 x i64>* %p
258  %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false)
259  store <1 x i64> %tmp, <1 x i64>* %p
260  ret void
261}
262
263define void @test_v2i64(<2 x i64>* %p) {
264; CHECK-LABEL: test_v2i64:
265; CHECK:       @ %bb.0:
266; CHECK-NEXT:    vmov.i32 q8, #0x0
267; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
268; CHECK-NEXT:    vmov.i64 q10, #0xffffffffffffffff
269; CHECK-NEXT:    vsub.i64 q8, q8, q9
270; CHECK-NEXT:    vand q8, q9, q8
271; CHECK-NEXT:    vadd.i64 q8, q8, q10
272; CHECK-NEXT:    vcnt.8 q8, q8
273; CHECK-NEXT:    vpaddl.u8 q8, q8
274; CHECK-NEXT:    vpaddl.u16 q8, q8
275; CHECK-NEXT:    vpaddl.u32 q8, q8
276; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
277; CHECK-NEXT:    bx lr
278  %a = load <2 x i64>, <2 x i64>* %p
279  %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
280  store <2 x i64> %tmp, <2 x i64>* %p
281  ret void
282}
283
284;------------------------------------------------------------------------------
285
286define void @test_v1i8_zero_undef(<1 x i8>* %p) {
287; CHECK-LABEL: test_v1i8_zero_undef:
288; CHECK:       @ %bb.0:
289; CHECK-NEXT:    ldrb r1, [r0]
290; CHECK-NEXT:    rbit r1, r1
291; CHECK-NEXT:    clz r1, r1
292; CHECK-NEXT:    strb r1, [r0]
293; CHECK-NEXT:    bx lr
294  %a = load <1 x i8>, <1 x i8>* %p
295  %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 true)
296  store <1 x i8> %tmp, <1 x i8>* %p
297  ret void
298}
299
300define void @test_v2i8_zero_undef(<2 x i8>* %p) {
301; CHECK-LABEL: test_v2i8_zero_undef:
302; CHECK:       @ %bb.0:
303; CHECK-NEXT:    vld1.16 {d16[0]}, [r0:16]
304; CHECK-NEXT:    vmovl.u8 q8, d16
305; CHECK-NEXT:    vmovl.u16 q8, d16
306; CHECK-NEXT:    vneg.s32 d18, d16
307; CHECK-NEXT:    vand d16, d16, d18
308; CHECK-NEXT:    vmov.i32 d17, #0x1f
309; CHECK-NEXT:    vclz.i32 d16, d16
310; CHECK-NEXT:    vsub.i32 d16, d17, d16
311; CHECK-NEXT:    vmov.32 r1, d16[1]
312; CHECK-NEXT:    vmov.32 r2, d16[0]
313; CHECK-NEXT:    strb r1, [r0, #1]
314; CHECK-NEXT:    strb r2, [r0]
315; CHECK-NEXT:    bx lr
316  %a = load <2 x i8>, <2 x i8>* %p
317  %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
318  store <2 x i8> %tmp, <2 x i8>* %p
319  ret void
320}
321
322define void @test_v4i8_zero_undef(<4 x i8>* %p) {
323; CHECK-LABEL: test_v4i8_zero_undef:
324; CHECK:       @ %bb.0:
325; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
326; CHECK-NEXT:    vmovl.u8 q8, d16
327; CHECK-NEXT:    vneg.s16 d18, d16
328; CHECK-NEXT:    vand d16, d16, d18
329; CHECK-NEXT:    vmov.i16 d17, #0xf
330; CHECK-NEXT:    vclz.i16 d16, d16
331; CHECK-NEXT:    vsub.i16 d16, d17, d16
332; CHECK-NEXT:    vuzp.8 d16, d17
333; CHECK-NEXT:    vst1.32 {d16[0]}, [r0:32]
334; CHECK-NEXT:    bx lr
335  %a = load <4 x i8>, <4 x i8>* %p
336  %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
337  store <4 x i8> %tmp, <4 x i8>* %p
338  ret void
339}
340
341define void @test_v8i8_zero_undef(<8 x i8>* %p) {
342; CHECK-LABEL: test_v8i8_zero_undef:
343; CHECK:       @ %bb.0:
344; CHECK-NEXT:    vldr d16, [r0]
345; CHECK-NEXT:    vmov.i8 d18, #0x1
346; CHECK-NEXT:    vneg.s8 d17, d16
347; CHECK-NEXT:    vand d16, d16, d17
348; CHECK-NEXT:    vsub.i8 d16, d16, d18
349; CHECK-NEXT:    vcnt.8 d16, d16
350; CHECK-NEXT:    vstr d16, [r0]
351; CHECK-NEXT:    bx lr
352  %a = load <8 x i8>, <8 x i8>* %p
353  %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
354  store <8 x i8> %tmp, <8 x i8>* %p
355  ret void
356}
357
358define void @test_v16i8_zero_undef(<16 x i8>* %p) {
359; CHECK-LABEL: test_v16i8_zero_undef:
360; CHECK:       @ %bb.0:
361; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
362; CHECK-NEXT:    vmov.i8 q10, #0x1
363; CHECK-NEXT:    vneg.s8 q9, q8
364; CHECK-NEXT:    vand q8, q8, q9
365; CHECK-NEXT:    vsub.i8 q8, q8, q10
366; CHECK-NEXT:    vcnt.8 q8, q8
367; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
368; CHECK-NEXT:    bx lr
369  %a = load <16 x i8>, <16 x i8>* %p
370  %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
371  store <16 x i8> %tmp, <16 x i8>* %p
372  ret void
373}
374
375define void @test_v1i16_zero_undef(<1 x i16>* %p) {
376; CHECK-LABEL: test_v1i16_zero_undef:
377; CHECK:       @ %bb.0:
378; CHECK-NEXT:    ldrh r1, [r0]
379; CHECK-NEXT:    rbit r1, r1
380; CHECK-NEXT:    clz r1, r1
381; CHECK-NEXT:    strh r1, [r0]
382; CHECK-NEXT:    bx lr
383  %a = load <1 x i16>, <1 x i16>* %p
384  %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 true)
385  store <1 x i16> %tmp, <1 x i16>* %p
386  ret void
387}
388
389define void @test_v2i16_zero_undef(<2 x i16>* %p) {
390; CHECK-LABEL: test_v2i16_zero_undef:
391; CHECK:       @ %bb.0:
392; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
393; CHECK-NEXT:    vmovl.u16 q8, d16
394; CHECK-NEXT:    vneg.s32 d18, d16
395; CHECK-NEXT:    vand d16, d16, d18
396; CHECK-NEXT:    vmov.i32 d17, #0x1f
397; CHECK-NEXT:    vclz.i32 d16, d16
398; CHECK-NEXT:    vsub.i32 d16, d17, d16
399; CHECK-NEXT:    vuzp.16 d16, d17
400; CHECK-NEXT:    vst1.32 {d16[0]}, [r0:32]
401; CHECK-NEXT:    bx lr
402  %a = load <2 x i16>, <2 x i16>* %p
403  %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
404  store <2 x i16> %tmp, <2 x i16>* %p
405  ret void
406}
407
408define void @test_v4i16_zero_undef(<4 x i16>* %p) {
409; CHECK-LABEL: test_v4i16_zero_undef:
410; CHECK:       @ %bb.0:
411; CHECK-NEXT:    vldr d16, [r0]
412; CHECK-NEXT:    vneg.s16 d17, d16
413; CHECK-NEXT:    vand d16, d16, d17
414; CHECK-NEXT:    vmov.i16 d17, #0xf
415; CHECK-NEXT:    vclz.i16 d16, d16
416; CHECK-NEXT:    vsub.i16 d16, d17, d16
417; CHECK-NEXT:    vstr d16, [r0]
418; CHECK-NEXT:    bx lr
419  %a = load <4 x i16>, <4 x i16>* %p
420  %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
421  store <4 x i16> %tmp, <4 x i16>* %p
422  ret void
423}
424
425define void @test_v8i16_zero_undef(<8 x i16>* %p) {
426; CHECK-LABEL: test_v8i16_zero_undef:
427; CHECK:       @ %bb.0:
428; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
429; CHECK-NEXT:    vneg.s16 q9, q8
430; CHECK-NEXT:    vand q8, q8, q9
431; CHECK-NEXT:    vmov.i16 q9, #0xf
432; CHECK-NEXT:    vclz.i16 q8, q8
433; CHECK-NEXT:    vsub.i16 q8, q9, q8
434; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
435; CHECK-NEXT:    bx lr
436  %a = load <8 x i16>, <8 x i16>* %p
437  %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
438  store <8 x i16> %tmp, <8 x i16>* %p
439  ret void
440}
441
442define void @test_v1i32_zero_undef(<1 x i32>* %p) {
443; CHECK-LABEL: test_v1i32_zero_undef:
444; CHECK:       @ %bb.0:
445; CHECK-NEXT:    ldr r1, [r0]
446; CHECK-NEXT:    rbit r1, r1
447; CHECK-NEXT:    clz r1, r1
448; CHECK-NEXT:    str r1, [r0]
449; CHECK-NEXT:    bx lr
450  %a = load <1 x i32>, <1 x i32>* %p
451  %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 true)
452  store <1 x i32> %tmp, <1 x i32>* %p
453  ret void
454}
455
456define void @test_v2i32_zero_undef(<2 x i32>* %p) {
457; CHECK-LABEL: test_v2i32_zero_undef:
458; CHECK:       @ %bb.0:
459; CHECK-NEXT:    vldr d16, [r0]
460; CHECK-NEXT:    vneg.s32 d17, d16
461; CHECK-NEXT:    vand d16, d16, d17
462; CHECK-NEXT:    vmov.i32 d17, #0x1f
463; CHECK-NEXT:    vclz.i32 d16, d16
464; CHECK-NEXT:    vsub.i32 d16, d17, d16
465; CHECK-NEXT:    vstr d16, [r0]
466; CHECK-NEXT:    bx lr
467  %a = load <2 x i32>, <2 x i32>* %p
468  %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
469  store <2 x i32> %tmp, <2 x i32>* %p
470  ret void
471}
472
473define void @test_v4i32_zero_undef(<4 x i32>* %p) {
474; CHECK-LABEL: test_v4i32_zero_undef:
475; CHECK:       @ %bb.0:
476; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
477; CHECK-NEXT:    vneg.s32 q9, q8
478; CHECK-NEXT:    vand q8, q8, q9
479; CHECK-NEXT:    vmov.i32 q9, #0x1f
480; CHECK-NEXT:    vclz.i32 q8, q8
481; CHECK-NEXT:    vsub.i32 q8, q9, q8
482; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
483; CHECK-NEXT:    bx lr
484  %a = load <4 x i32>, <4 x i32>* %p
485  %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
486  store <4 x i32> %tmp, <4 x i32>* %p
487  ret void
488}
489
490define void @test_v1i64_zero_undef(<1 x i64>* %p) {
491; CHECK-LABEL: test_v1i64_zero_undef:
492; CHECK:       @ %bb.0:
493; CHECK-NEXT:    vmov.i32 d16, #0x0
494; CHECK-NEXT:    vldr d17, [r0]
495; CHECK-NEXT:    vmov.i64 d18, #0xffffffffffffffff
496; CHECK-NEXT:    vsub.i64 d16, d16, d17
497; CHECK-NEXT:    vand d16, d17, d16
498; CHECK-NEXT:    vadd.i64 d16, d16, d18
499; CHECK-NEXT:    vcnt.8 d16, d16
500; CHECK-NEXT:    vpaddl.u8 d16, d16
501; CHECK-NEXT:    vpaddl.u16 d16, d16
502; CHECK-NEXT:    vpaddl.u32 d16, d16
503; CHECK-NEXT:    vstr d16, [r0]
504; CHECK-NEXT:    bx lr
505  %a = load <1 x i64>, <1 x i64>* %p
506  %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true)
507  store <1 x i64> %tmp, <1 x i64>* %p
508  ret void
509}
510
511define void @test_v2i64_zero_undef(<2 x i64>* %p) {
512; CHECK-LABEL: test_v2i64_zero_undef:
513; CHECK:       @ %bb.0:
514; CHECK-NEXT:    vmov.i32 q8, #0x0
515; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
516; CHECK-NEXT:    vmov.i64 q10, #0xffffffffffffffff
517; CHECK-NEXT:    vsub.i64 q8, q8, q9
518; CHECK-NEXT:    vand q8, q9, q8
519; CHECK-NEXT:    vadd.i64 q8, q8, q10
520; CHECK-NEXT:    vcnt.8 q8, q8
521; CHECK-NEXT:    vpaddl.u8 q8, q8
522; CHECK-NEXT:    vpaddl.u16 q8, q8
523; CHECK-NEXT:    vpaddl.u32 q8, q8
524; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
525; CHECK-NEXT:    bx lr
526  %a = load <2 x i64>, <2 x i64>* %p
527  %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
528  store <2 x i64> %tmp, <2 x i64>* %p
529  ret void
530}
531