1; RUN: llc < %s -mtriple armv7-linux-gnueabihf -mattr=+neon | FileCheck %s 2 3; This test checks the @llvm.cttz.* intrinsics for vectors. 4 5declare <1 x i8> @llvm.cttz.v1i8(<1 x i8>, i1) 6declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) 7declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1) 8declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1) 9declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) 10 11declare <1 x i16> @llvm.cttz.v1i16(<1 x i16>, i1) 12declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1) 13declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1) 14declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) 15 16declare <1 x i32> @llvm.cttz.v1i32(<1 x i32>, i1) 17declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) 18declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) 19 20declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1) 21declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) 22 23;------------------------------------------------------------------------------ 24 25define void @test_v1i8(<1 x i8>* %p) { 26; CHECK-LABEL: test_v1i8 27 %a = load <1 x i8>, <1 x i8>* %p 28 %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 false) 29 store <1 x i8> %tmp, <1 x i8>* %p 30 ret void 31} 32 33define void @test_v2i8(<2 x i8>* %p) { 34; CHECK-LABEL: test_v2i8: 35 %a = load <2 x i8>, <2 x i8>* %p 36 %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 false) 37 store <2 x i8> %tmp, <2 x i8>* %p 38 ret void 39} 40 41define void @test_v4i8(<4 x i8>* %p) { 42; CHECK-LABEL: test_v4i8: 43 %a = load <4 x i8>, <4 x i8>* %p 44 %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 false) 45 store <4 x i8> %tmp, <4 x i8>* %p 46 ret void 47} 48 49define void @test_v8i8(<8 x i8>* %p) { 50; CHECK-LABEL: test_v8i8: 51; CHECK: vldr [[D1:d[0-9]+]], [r0] 52; CHECK: vmov.i8 [[D2:d[0-9]+]], #0x1 53; CHECK: vneg.s8 [[D3:d[0-9]+]], [[D1]] 54; CHECK: vand [[D1]], [[D1]], [[D3]] 55; CHECK: vsub.i8 [[D1]], [[D1]], [[D2]] 56; CHECK: vcnt.8 [[D1]], [[D1]] 57; CHECK: vstr [[D1]], [r0] 58 %a = load <8 x i8>, <8 x i8>* %p 59 %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 false) 60 store <8 x i8> %tmp, <8 x i8>* %p 61 ret void 62} 63 64define void @test_v16i8(<16 x i8>* %p) { 65; CHECK-LABEL: test_v16i8: 66; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] 67; CHECK: vmov.i8 [[Q2:q[0-9]+]], #0x1 68; CHECK: vneg.s8 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]] 69; CHECK: vand [[Q1]], [[Q1]], [[Q3]] 70; CHECK: vsub.i8 [[Q1]], [[Q1]], [[Q2]] 71; CHECK: vcnt.8 [[Q1]], [[Q1]] 72; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] 73 %a = load <16 x i8>, <16 x i8>* %p 74 %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) 75 store <16 x i8> %tmp, <16 x i8>* %p 76 ret void 77} 78 79define void @test_v1i16(<1 x i16>* %p) { 80; CHECK-LABEL: test_v1i16: 81 %a = load <1 x i16>, <1 x i16>* %p 82 %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 false) 83 store <1 x i16> %tmp, <1 x i16>* %p 84 ret void 85} 86 87define void @test_v2i16(<2 x i16>* %p) { 88; CHECK-LABEL: test_v2i16: 89 %a = load <2 x i16>, <2 x i16>* %p 90 %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 false) 91 store <2 x i16> %tmp, <2 x i16>* %p 92 ret void 93} 94 95define void @test_v4i16(<4 x i16>* %p) { 96; CHECK-LABEL: test_v4i16: 97; CHECK: vldr [[D1:d[0-9]+]], [r0] 98; CHECK: vmov.i16 [[D2:d[0-9]+]], #0x1 99; CHECK: vneg.s16 [[D3:d[0-9]+]], [[D1]] 100; CHECK: vand [[D1]], [[D1]], [[D3]] 101; CHECK: vsub.i16 [[D1]], [[D1]], [[D2]] 102; CHECK: vcnt.8 [[D1]], [[D1]] 103; CHECK: vpaddl.u8 [[D1]], [[D1]] 104; CHECK: vstr [[D1]], [r0] 105 %a = load <4 x i16>, <4 x i16>* %p 106 %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 false) 107 store <4 x i16> %tmp, <4 x i16>* %p 108 ret void 109} 110 111define void @test_v8i16(<8 x i16>* %p) { 112; CHECK-LABEL: test_v8i16: 113; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] 114; CHECK: vmov.i16 [[Q2:q[0-9]+]], #0x1 115; CHECK: vneg.s16 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]] 116; CHECK: vand [[Q1]], [[Q1]], [[Q3]] 117; CHECK: vsub.i16 [[Q1]], [[Q1]], [[Q2]] 118; CHECK: vcnt.8 [[Q1]], [[Q1]] 119; CHECK: vpaddl.u8 [[Q1]], [[Q1]] 120; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] 121 %a = load <8 x i16>, <8 x i16>* %p 122 %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) 123 store <8 x i16> %tmp, <8 x i16>* %p 124 ret void 125} 126 127define void @test_v1i32(<1 x i32>* %p) { 128; CHECK-LABEL: test_v1i32: 129 %a = load <1 x i32>, <1 x i32>* %p 130 %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 false) 131 store <1 x i32> %tmp, <1 x i32>* %p 132 ret void 133} 134 135define void @test_v2i32(<2 x i32>* %p) { 136; CHECK-LABEL: test_v2i32: 137; CHECK: vldr [[D1:d[0-9]+]], [r0] 138; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x1 139; CHECK: vneg.s32 [[D3:d[0-9]+]], [[D1]] 140; CHECK: vand [[D1]], [[D1]], [[D3]] 141; CHECK: vsub.i32 [[D1]], [[D1]], [[D2]] 142; CHECK: vcnt.8 [[D1]], [[D1]] 143; CHECK: vpaddl.u8 [[D1]], [[D1]] 144; CHECK: vpaddl.u16 [[D1]], [[D1]] 145; CHECK: vstr [[D1]], [r0] 146 %a = load <2 x i32>, <2 x i32>* %p 147 %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) 148 store <2 x i32> %tmp, <2 x i32>* %p 149 ret void 150} 151 152define void @test_v4i32(<4 x i32>* %p) { 153; CHECK-LABEL: test_v4i32: 154; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] 155; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x1 156; CHECK: vneg.s32 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]] 157; CHECK: vand [[Q1]], [[Q1]], [[Q3]] 158; CHECK: vsub.i32 [[Q1]], [[Q1]], [[Q2]] 159; CHECK: vcnt.8 [[Q1]], [[Q1]] 160; CHECK: vpaddl.u8 [[Q1]], [[Q1]] 161; CHECK: vpaddl.u16 [[Q1]], [[Q1]] 162; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] 163 %a = load <4 x i32>, <4 x i32>* %p 164 %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) 165 store <4 x i32> %tmp, <4 x i32>* %p 166 ret void 167} 168 169define void @test_v1i64(<1 x i64>* %p) { 170; CHECK-LABEL: test_v1i64: 171; CHECK: vldr [[D1:d[0-9]+]], [r0] 172; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0 173; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff 174; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]] 175; CHECK: vand [[D1]], [[D1]], [[D2]] 176; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]] 177; CHECK: vcnt.8 [[D1]], [[D1]] 178; CHECK: vpaddl.u8 [[D1]], [[D1]] 179; CHECK: vpaddl.u16 [[D1]], [[D1]] 180; CHECK: vpaddl.u32 [[D1]], [[D1]] 181; CHECK: vstr [[D1]], [r0] 182 %a = load <1 x i64>, <1 x i64>* %p 183 %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false) 184 store <1 x i64> %tmp, <1 x i64>* %p 185 ret void 186} 187 188define void @test_v2i64(<2 x i64>* %p) { 189; CHECK-LABEL: test_v2i64: 190; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] 191; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0 192; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff 193; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]] 194; CHECK: vand [[Q1]], [[Q1]], [[Q2]] 195; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]] 196; CHECK: vcnt.8 [[Q1]], [[Q1]] 197; CHECK: vpaddl.u8 [[Q1]], [[Q1]] 198; CHECK: vpaddl.u16 [[Q1]], [[Q1]] 199; CHECK: vpaddl.u32 [[Q1]], [[Q1]] 200; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] 201 %a = load <2 x i64>, <2 x i64>* %p 202 %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) 203 store <2 x i64> %tmp, <2 x i64>* %p 204 ret void 205} 206 207;------------------------------------------------------------------------------ 208 209define void @test_v1i8_zero_undef(<1 x i8>* %p) { 210; CHECK-LABEL: test_v1i8_zero_undef 211 %a = load <1 x i8>, <1 x i8>* %p 212 %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 true) 213 store <1 x i8> %tmp, <1 x i8>* %p 214 ret void 215} 216 217define void @test_v2i8_zero_undef(<2 x i8>* %p) { 218; CHECK-LABEL: test_v2i8_zero_undef: 219 %a = load <2 x i8>, <2 x i8>* %p 220 %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true) 221 store <2 x i8> %tmp, <2 x i8>* %p 222 ret void 223} 224 225define void @test_v4i8_zero_undef(<4 x i8>* %p) { 226; CHECK-LABEL: test_v4i8_zero_undef: 227 %a = load <4 x i8>, <4 x i8>* %p 228 %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true) 229 store <4 x i8> %tmp, <4 x i8>* %p 230 ret void 231} 232 233define void @test_v8i8_zero_undef(<8 x i8>* %p) { 234; CHECK-LABEL: test_v8i8_zero_undef: 235; CHECK: vldr [[D1:d[0-9]+]], [r0] 236; CHECK: vmov.i8 [[D2:d[0-9]+]], #0x1 237; CHECK: vneg.s8 [[D3:d[0-9]+]], [[D1]] 238; CHECK: vand [[D1]], [[D1]], [[D3]] 239; CHECK: vsub.i8 [[D1]], [[D1]], [[D2]] 240; CHECK: vcnt.8 [[D1]], [[D1]] 241; CHECK: vstr [[D1]], [r0] 242 %a = load <8 x i8>, <8 x i8>* %p 243 %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true) 244 store <8 x i8> %tmp, <8 x i8>* %p 245 ret void 246} 247 248define void @test_v16i8_zero_undef(<16 x i8>* %p) { 249; CHECK-LABEL: test_v16i8_zero_undef: 250; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] 251; CHECK: vmov.i8 [[Q2:q[0-9]+]], #0x1 252; CHECK: vneg.s8 [[Q3:q[0-9]+]], [[Q1:q[0-9]+]] 253; CHECK: vand [[Q1]], [[Q1]], [[Q3]] 254; CHECK: vsub.i8 [[Q1]], [[Q1]], [[Q2]] 255; CHECK: vcnt.8 [[Q1]], [[Q1]] 256; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] 257 %a = load <16 x i8>, <16 x i8>* %p 258 %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) 259 store <16 x i8> %tmp, <16 x i8>* %p 260 ret void 261} 262 263define void @test_v1i16_zero_undef(<1 x i16>* %p) { 264; CHECK-LABEL: test_v1i16_zero_undef: 265 %a = load <1 x i16>, <1 x i16>* %p 266 %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 true) 267 store <1 x i16> %tmp, <1 x i16>* %p 268 ret void 269} 270 271define void @test_v2i16_zero_undef(<2 x i16>* %p) { 272; CHECK-LABEL: test_v2i16_zero_undef: 273 %a = load <2 x i16>, <2 x i16>* %p 274 %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true) 275 store <2 x i16> %tmp, <2 x i16>* %p 276 ret void 277} 278 279define void @test_v4i16_zero_undef(<4 x i16>* %p) { 280; CHECK-LABEL: test_v4i16_zero_undef: 281; CHECK: vldr [[D1:d[0-9]+]], [r0] 282; CHECK: vneg.s16 [[D2:d[0-9]+]], [[D1]] 283; CHECK: vand [[D1]], [[D1]], [[D2]] 284; CHECK: vmov.i16 [[D3:d[0-9]+]], #0xf 285; CHECK: vclz.i16 [[D1]], [[D1]] 286; CHECK: vsub.i16 [[D1]], [[D3]], [[D1]] 287; CHECK: vstr [[D1]], [r0] 288 %a = load <4 x i16>, <4 x i16>* %p 289 %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true) 290 store <4 x i16> %tmp, <4 x i16>* %p 291 ret void 292} 293 294define void @test_v8i16_zero_undef(<8 x i16>* %p) { 295; CHECK-LABEL: test_v8i16_zero_undef: 296; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] 297; CHECK: vneg.s16 [[Q2:q[0-9]+]], [[Q1:q[0-9]+]] 298; CHECK: vand [[Q1]], [[Q1]], [[Q2]] 299; CHECK: vmov.i16 [[Q3:q[0-9]+]], #0xf 300; CHECK: vclz.i16 [[Q1]], [[Q1]] 301; CHECK: vsub.i16 [[Q1]], [[Q3]], [[Q1]] 302; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] 303 %a = load <8 x i16>, <8 x i16>* %p 304 %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) 305 store <8 x i16> %tmp, <8 x i16>* %p 306 ret void 307} 308 309define void @test_v1i32_zero_undef(<1 x i32>* %p) { 310; CHECK-LABEL: test_v1i32_zero_undef: 311 %a = load <1 x i32>, <1 x i32>* %p 312 %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 true) 313 store <1 x i32> %tmp, <1 x i32>* %p 314 ret void 315} 316 317define void @test_v2i32_zero_undef(<2 x i32>* %p) { 318; CHECK-LABEL: test_v2i32_zero_undef: 319; CHECK: vldr [[D1:d[0-9]+]], [r0] 320; CHECK: vneg.s32 [[D2:d[0-9]+]], [[D1]] 321; CHECK: vand [[D1]], [[D1]], [[D2]] 322; CHECK: vmov.i32 [[D3:d[0-9]+]], #0x1f 323; CHECK: vclz.i32 [[D1]], [[D1]] 324; CHECK: vsub.i32 [[D1]], [[D3]], [[D1]] 325; CHECK: vstr [[D1]], [r0] 326 %a = load <2 x i32>, <2 x i32>* %p 327 %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true) 328 store <2 x i32> %tmp, <2 x i32>* %p 329 ret void 330} 331 332define void @test_v4i32_zero_undef(<4 x i32>* %p) { 333; CHECK-LABEL: test_v4i32_zero_undef: 334; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] 335; CHECK: vneg.s32 [[Q2:q[0-9]+]], [[Q1:q[0-9]+]] 336; CHECK: vand [[Q1]], [[Q1]], [[Q2]] 337; CHECK: vmov.i32 [[Q3:q[0-9]+]], #0x1f 338; CHECK: vclz.i32 [[Q1]], [[Q1]] 339; CHECK: vsub.i32 [[Q1]], [[Q3]], [[Q1]] 340; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] 341 %a = load <4 x i32>, <4 x i32>* %p 342 %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) 343 store <4 x i32> %tmp, <4 x i32>* %p 344 ret void 345} 346 347define void @test_v1i64_zero_undef(<1 x i64>* %p) { 348; CHECK-LABEL: test_v1i64_zero_undef: 349; CHECK: vldr [[D1:d[0-9]+]], [r0] 350; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0 351; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff 352; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]] 353; CHECK: vand [[D1]], [[D1]], [[D2]] 354; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]] 355; CHECK: vcnt.8 [[D1]], [[D1]] 356; CHECK: vpaddl.u8 [[D1]], [[D1]] 357; CHECK: vpaddl.u16 [[D1]], [[D1]] 358; CHECK: vpaddl.u32 [[D1]], [[D1]] 359; CHECK: vstr [[D1]], [r0] 360 %a = load <1 x i64>, <1 x i64>* %p 361 %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true) 362 store <1 x i64> %tmp, <1 x i64>* %p 363 ret void 364} 365 366define void @test_v2i64_zero_undef(<2 x i64>* %p) { 367; CHECK-LABEL: test_v2i64_zero_undef: 368; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0] 369; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0 370; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff 371; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]] 372; CHECK: vand [[Q1]], [[Q1]], [[Q2]] 373; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]] 374; CHECK: vcnt.8 [[Q1]], [[Q1]] 375; CHECK: vpaddl.u8 [[Q1]], [[Q1]] 376; CHECK: vpaddl.u16 [[Q1]], [[Q1]] 377; CHECK: vpaddl.u32 [[Q1]], [[Q1]] 378; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0] 379 %a = load <2 x i64>, <2 x i64>* %p 380 %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) 381 store <2 x i64> %tmp, <2 x i64>* %p 382 ret void 383} 384