1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { 4;CHECK: vcnt8: 5;CHECK: vcnt.8 6 %tmp1 = load <8 x i8>* %A 7 %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1) 8 ret <8 x i8> %tmp2 9} 10 11define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { 12;CHECK: vcntQ8: 13;CHECK: vcnt.8 14 %tmp1 = load <16 x i8>* %A 15 %tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1) 16 ret <16 x i8> %tmp2 17} 18 19declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone 20declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone 21 22define <8 x i8> @vclz8(<8 x i8>* %A) nounwind { 23;CHECK: vclz8: 24;CHECK: vclz.i8 25 %tmp1 = load <8 x i8>* %A 26 %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1) 27 ret <8 x i8> %tmp2 28} 29 30define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { 31;CHECK: vclz16: 32;CHECK: vclz.i16 33 %tmp1 = load <4 x i16>* %A 34 %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1) 35 ret <4 x i16> %tmp2 36} 37 38define <2 x i32> @vclz32(<2 x i32>* %A) nounwind { 39;CHECK: vclz32: 40;CHECK: vclz.i32 41 %tmp1 = load <2 x i32>* %A 42 %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1) 43 ret <2 x i32> %tmp2 44} 45 46define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind { 47;CHECK: vclzQ8: 48;CHECK: vclz.i8 49 %tmp1 = load <16 x i8>* %A 50 %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1) 51 ret <16 x i8> %tmp2 52} 53 54define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind { 55;CHECK: vclzQ16: 56;CHECK: vclz.i16 57 %tmp1 = load <8 x i16>* %A 58 %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1) 59 ret <8 x i16> %tmp2 60} 61 62define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind { 63;CHECK: vclzQ32: 64;CHECK: vclz.i32 65 %tmp1 = load <4 x i32>* %A 66 %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1) 67 ret <4 x i32> %tmp2 68} 69 70declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone 71declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone 72declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone 73 74declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone 75declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone 76declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone 77 78define <8 x i8> @vclss8(<8 x i8>* %A) nounwind { 79;CHECK: vclss8: 80;CHECK: vcls.s8 81 %tmp1 = load <8 x i8>* %A 82 %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1) 83 ret <8 x i8> %tmp2 84} 85 86define <4 x i16> @vclss16(<4 x i16>* %A) nounwind { 87;CHECK: vclss16: 88;CHECK: vcls.s16 89 %tmp1 = load <4 x i16>* %A 90 %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1) 91 ret <4 x i16> %tmp2 92} 93 94define <2 x i32> @vclss32(<2 x i32>* %A) nounwind { 95;CHECK: vclss32: 96;CHECK: vcls.s32 97 %tmp1 = load <2 x i32>* %A 98 %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1) 99 ret <2 x i32> %tmp2 100} 101 102define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind { 103;CHECK: vclsQs8: 104;CHECK: vcls.s8 105 %tmp1 = load <16 x i8>* %A 106 %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1) 107 ret <16 x i8> %tmp2 108} 109 110define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind { 111;CHECK: vclsQs16: 112;CHECK: vcls.s16 113 %tmp1 = load <8 x i16>* %A 114 %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1) 115 ret <8 x i16> %tmp2 116} 117 118define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind { 119;CHECK: vclsQs32: 120;CHECK: vcls.s32 121 %tmp1 = load <4 x i32>* %A 122 %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1) 123 ret <4 x i32> %tmp2 124} 125 126declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone 127declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone 128declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone 129 130declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone 131declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone 132declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone 133