1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc signext i8 @test_vminvq_s8(i8 signext %a, <16 x i8> %b) { 5; CHECK-LABEL: test_vminvq_s8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vminv.s8 r0, q0 8; CHECK-NEXT: sxtb r0, r0 9; CHECK-NEXT: bx lr 10entry: 11 %0 = zext i8 %a to i32 12 %1 = tail call i32 @llvm.arm.mve.minv.v16i8(i32 %0, <16 x i8> %b, i32 0) 13 %2 = trunc i32 %1 to i8 14 ret i8 %2 15} 16 17define arm_aapcs_vfpcc signext i16 @test_vminvq_s16(i16 signext %a, <8 x i16> %b) { 18; CHECK-LABEL: test_vminvq_s16: 19; CHECK: @ %bb.0: @ %entry 20; CHECK-NEXT: vminv.s16 r0, q0 21; CHECK-NEXT: sxth r0, r0 22; CHECK-NEXT: bx lr 23entry: 24 %0 = zext i16 %a to i32 25 %1 = tail call i32 @llvm.arm.mve.minv.v8i16(i32 %0, <8 x i16> %b, i32 0) 26 %2 = trunc i32 %1 to i16 27 ret i16 %2 28} 29 30define arm_aapcs_vfpcc i32 @test_vminvq_s32(i32 %a, <4 x i32> %b) { 31; CHECK-LABEL: test_vminvq_s32: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vminv.s32 r0, q0 34; CHECK-NEXT: bx lr 35entry: 36 %0 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %a, <4 x i32> %b, i32 0) 37 ret i32 %0 38} 39 40define arm_aapcs_vfpcc zeroext i8 @test_vminvq_u8(i8 zeroext %a, <16 x i8> %b) { 41; CHECK-LABEL: test_vminvq_u8: 42; CHECK: @ %bb.0: @ %entry 43; CHECK-NEXT: vminv.u8 r0, q0 44; CHECK-NEXT: uxtb r0, r0 45; CHECK-NEXT: bx lr 46entry: 47 %0 = zext i8 %a to i32 48 %1 = tail call i32 @llvm.arm.mve.minv.v16i8(i32 %0, <16 x i8> %b, i32 1) 49 %2 = trunc i32 %1 to i8 50 ret i8 %2 51} 52 53define arm_aapcs_vfpcc zeroext i16 @test_vminvq_u16(i16 zeroext %a, <8 x i16> %b) { 54; CHECK-LABEL: test_vminvq_u16: 55; CHECK: @ %bb.0: @ %entry 56; CHECK-NEXT: vminv.u16 r0, q0 57; CHECK-NEXT: uxth r0, r0 58; CHECK-NEXT: bx lr 59entry: 60 %0 = zext i16 %a to i32 61 %1 = tail call i32 @llvm.arm.mve.minv.v8i16(i32 %0, <8 x i16> %b, i32 1) 62 %2 = trunc i32 %1 to i16 63 ret i16 %2 64} 65 66define arm_aapcs_vfpcc i32 @test_vminvq_u32(i32 %a, <4 x i32> %b) { 67; CHECK-LABEL: test_vminvq_u32: 68; CHECK: @ %bb.0: @ %entry 69; CHECK-NEXT: vminv.u32 r0, q0 70; CHECK-NEXT: bx lr 71entry: 72 %0 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %a, <4 x i32> %b, i32 1) 73 ret i32 %0 74} 75 76define arm_aapcs_vfpcc signext i8 @test_vmaxvq_s8(i8 signext %a, <16 x i8> %b) { 77; CHECK-LABEL: test_vmaxvq_s8: 78; CHECK: @ %bb.0: @ %entry 79; CHECK-NEXT: vmaxv.s8 r0, q0 80; CHECK-NEXT: sxtb r0, r0 81; CHECK-NEXT: bx lr 82entry: 83 %0 = zext i8 %a to i32 84 %1 = tail call i32 @llvm.arm.mve.maxv.v16i8(i32 %0, <16 x i8> %b, i32 0) 85 %2 = trunc i32 %1 to i8 86 ret i8 %2 87} 88 89define arm_aapcs_vfpcc signext i16 @test_vmaxvq_s16(i16 signext %a, <8 x i16> %b) { 90; CHECK-LABEL: test_vmaxvq_s16: 91; CHECK: @ %bb.0: @ %entry 92; CHECK-NEXT: vmaxv.s16 r0, q0 93; CHECK-NEXT: sxth r0, r0 94; CHECK-NEXT: bx lr 95entry: 96 %0 = zext i16 %a to i32 97 %1 = tail call i32 @llvm.arm.mve.maxv.v8i16(i32 %0, <8 x i16> %b, i32 0) 98 %2 = trunc i32 %1 to i16 99 ret i16 %2 100} 101 102define arm_aapcs_vfpcc i32 @test_vmaxvq_s32(i32 %a, <4 x i32> %b) { 103; CHECK-LABEL: test_vmaxvq_s32: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vmaxv.s32 r0, q0 106; CHECK-NEXT: bx lr 107entry: 108 %0 = tail call i32 @llvm.arm.mve.maxv.v4i32(i32 %a, <4 x i32> %b, i32 0) 109 ret i32 %0 110} 111 112define arm_aapcs_vfpcc zeroext i8 @test_vmaxvq_u8(i8 zeroext %a, <16 x i8> %b) { 113; CHECK-LABEL: test_vmaxvq_u8: 114; CHECK: @ %bb.0: @ %entry 115; CHECK-NEXT: vmaxv.u8 r0, q0 116; CHECK-NEXT: uxtb r0, r0 117; CHECK-NEXT: bx lr 118entry: 119 %0 = zext i8 %a to i32 120 %1 = tail call i32 @llvm.arm.mve.maxv.v16i8(i32 %0, <16 x i8> %b, i32 1) 121 %2 = trunc i32 %1 to i8 122 ret i8 %2 123} 124 125define arm_aapcs_vfpcc zeroext i16 @test_vmaxvq_u16(i16 zeroext %a, <8 x i16> %b) { 126; CHECK-LABEL: test_vmaxvq_u16: 127; CHECK: @ %bb.0: @ %entry 128; CHECK-NEXT: vmaxv.u16 r0, q0 129; CHECK-NEXT: uxth r0, r0 130; CHECK-NEXT: bx lr 131entry: 132 %0 = zext i16 %a to i32 133 %1 = tail call i32 @llvm.arm.mve.maxv.v8i16(i32 %0, <8 x i16> %b, i32 1) 134 %2 = trunc i32 %1 to i16 135 ret i16 %2 136} 137 138define arm_aapcs_vfpcc i32 @test_vmaxvq_u32(i32 %a, <4 x i32> %b) { 139; CHECK-LABEL: test_vmaxvq_u32: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: vmaxv.u32 r0, q0 142; CHECK-NEXT: bx lr 143entry: 144 %0 = tail call i32 @llvm.arm.mve.maxv.v4i32(i32 %a, <4 x i32> %b, i32 1) 145 ret i32 %0 146} 147 148define arm_aapcs_vfpcc zeroext i8 @test_vminavq_s8(i8 zeroext %a, <16 x i8> %b) { 149; CHECK-LABEL: test_vminavq_s8: 150; CHECK: @ %bb.0: @ %entry 151; CHECK-NEXT: vminav.s8 r0, q0 152; CHECK-NEXT: uxtb r0, r0 153; CHECK-NEXT: bx lr 154entry: 155 %0 = zext i8 %a to i32 156 %1 = tail call i32 @llvm.arm.mve.minav.v16i8(i32 %0, <16 x i8> %b) 157 %2 = trunc i32 %1 to i8 158 ret i8 %2 159} 160 161define arm_aapcs_vfpcc zeroext i16 @test_vminavq_s16(i16 zeroext %a, <8 x i16> %b) { 162; CHECK-LABEL: test_vminavq_s16: 163; CHECK: @ %bb.0: @ %entry 164; CHECK-NEXT: vminav.s16 r0, q0 165; CHECK-NEXT: uxth r0, r0 166; CHECK-NEXT: bx lr 167entry: 168 %0 = zext i16 %a to i32 169 %1 = tail call i32 @llvm.arm.mve.minav.v8i16(i32 %0, <8 x i16> %b) 170 %2 = trunc i32 %1 to i16 171 ret i16 %2 172} 173 174define arm_aapcs_vfpcc i32 @test_vminavq_s32(i32 %a, <4 x i32> %b) { 175; CHECK-LABEL: test_vminavq_s32: 176; CHECK: @ %bb.0: @ %entry 177; CHECK-NEXT: vminav.s32 r0, q0 178; CHECK-NEXT: bx lr 179entry: 180 %0 = tail call i32 @llvm.arm.mve.minav.v4i32(i32 %a, <4 x i32> %b) 181 ret i32 %0 182} 183 184define arm_aapcs_vfpcc zeroext i8 @test_vmaxavq_s8(i8 zeroext %a, <16 x i8> %b) { 185; CHECK-LABEL: test_vmaxavq_s8: 186; CHECK: @ %bb.0: @ %entry 187; CHECK-NEXT: vmaxav.s8 r0, q0 188; CHECK-NEXT: uxtb r0, r0 189; CHECK-NEXT: bx lr 190entry: 191 %0 = zext i8 %a to i32 192 %1 = tail call i32 @llvm.arm.mve.maxav.v16i8(i32 %0, <16 x i8> %b) 193 %2 = trunc i32 %1 to i8 194 ret i8 %2 195} 196 197define arm_aapcs_vfpcc zeroext i16 @test_vmaxavq_s16(i16 zeroext %a, <8 x i16> %b) { 198; CHECK-LABEL: test_vmaxavq_s16: 199; CHECK: @ %bb.0: @ %entry 200; CHECK-NEXT: vmaxav.s16 r0, q0 201; CHECK-NEXT: uxth r0, r0 202; CHECK-NEXT: bx lr 203entry: 204 %0 = zext i16 %a to i32 205 %1 = tail call i32 @llvm.arm.mve.maxav.v8i16(i32 %0, <8 x i16> %b) 206 %2 = trunc i32 %1 to i16 207 ret i16 %2 208} 209 210define arm_aapcs_vfpcc i32 @test_vmaxavq_s32(i32 %a, <4 x i32> %b) { 211; CHECK-LABEL: test_vmaxavq_s32: 212; CHECK: @ %bb.0: @ %entry 213; CHECK-NEXT: vmaxav.s32 r0, q0 214; CHECK-NEXT: bx lr 215entry: 216 %0 = tail call i32 @llvm.arm.mve.maxav.v4i32(i32 %a, <4 x i32> %b) 217 ret i32 %0 218} 219 220define arm_aapcs_vfpcc float @test_vminnmvq_f16(float %a.coerce, <8 x half> %b) { 221; CHECK-LABEL: test_vminnmvq_f16: 222; CHECK: @ %bb.0: @ %entry 223; CHECK-NEXT: vmov r0, s0 224; CHECK-NEXT: vminnmv.f16 r0, q1 225; CHECK-NEXT: vmov s0, r0 226; CHECK-NEXT: vmov.f16 r0, s0 227; CHECK-NEXT: vmov s0, r0 228; CHECK-NEXT: bx lr 229entry: 230 %0 = bitcast float %a.coerce to i32 231 %tmp.0.extract.trunc = trunc i32 %0 to i16 232 %1 = bitcast i16 %tmp.0.extract.trunc to half 233 %2 = tail call half @llvm.arm.mve.minnmv.f16.v8f16(half %1, <8 x half> %b) 234 %3 = bitcast half %2 to i16 235 %tmp2.0.insert.ext = zext i16 %3 to i32 236 %4 = bitcast i32 %tmp2.0.insert.ext to float 237 ret float %4 238} 239 240define arm_aapcs_vfpcc float @test_vminnmvq_f32(float %a, <4 x float> %b) { 241; CHECK-LABEL: test_vminnmvq_f32: 242; CHECK: @ %bb.0: @ %entry 243; CHECK-NEXT: vmov r0, s0 244; CHECK-NEXT: vminnmv.f32 r0, q1 245; CHECK-NEXT: vmov s0, r0 246; CHECK-NEXT: bx lr 247entry: 248 %0 = tail call float @llvm.arm.mve.minnmv.f32.v4f32(float %a, <4 x float> %b) 249 ret float %0 250} 251 252define arm_aapcs_vfpcc float @test_vminnmavq_f16(float %a.coerce, <8 x half> %b) { 253; CHECK-LABEL: test_vminnmavq_f16: 254; CHECK: @ %bb.0: @ %entry 255; CHECK-NEXT: vmov r0, s0 256; CHECK-NEXT: vminnmav.f16 r0, q1 257; CHECK-NEXT: vmov s0, r0 258; CHECK-NEXT: vmov.f16 r0, s0 259; CHECK-NEXT: vmov s0, r0 260; CHECK-NEXT: bx lr 261entry: 262 %0 = bitcast float %a.coerce to i32 263 %tmp.0.extract.trunc = trunc i32 %0 to i16 264 %1 = bitcast i16 %tmp.0.extract.trunc to half 265 %2 = tail call half @llvm.arm.mve.minnmav.f16.v8f16(half %1, <8 x half> %b) 266 %3 = bitcast half %2 to i16 267 %tmp2.0.insert.ext = zext i16 %3 to i32 268 %4 = bitcast i32 %tmp2.0.insert.ext to float 269 ret float %4 270} 271 272define arm_aapcs_vfpcc float @test_vminnmavq_f32(float %a, <4 x float> %b) { 273; CHECK-LABEL: test_vminnmavq_f32: 274; CHECK: @ %bb.0: @ %entry 275; CHECK-NEXT: vmov r0, s0 276; CHECK-NEXT: vminnmav.f32 r0, q1 277; CHECK-NEXT: vmov s0, r0 278; CHECK-NEXT: bx lr 279entry: 280 %0 = tail call float @llvm.arm.mve.minnmav.f32.v4f32(float %a, <4 x float> %b) 281 ret float %0 282} 283 284define arm_aapcs_vfpcc float @test_vmaxnmvq_f16(float %a.coerce, <8 x half> %b) { 285; CHECK-LABEL: test_vmaxnmvq_f16: 286; CHECK: @ %bb.0: @ %entry 287; CHECK-NEXT: vmov r0, s0 288; CHECK-NEXT: vmaxnmv.f16 r0, q1 289; CHECK-NEXT: vmov s0, r0 290; CHECK-NEXT: vmov.f16 r0, s0 291; CHECK-NEXT: vmov s0, r0 292; CHECK-NEXT: bx lr 293entry: 294 %0 = bitcast float %a.coerce to i32 295 %tmp.0.extract.trunc = trunc i32 %0 to i16 296 %1 = bitcast i16 %tmp.0.extract.trunc to half 297 %2 = tail call half @llvm.arm.mve.maxnmv.f16.v8f16(half %1, <8 x half> %b) 298 %3 = bitcast half %2 to i16 299 %tmp2.0.insert.ext = zext i16 %3 to i32 300 %4 = bitcast i32 %tmp2.0.insert.ext to float 301 ret float %4 302} 303 304define arm_aapcs_vfpcc float @test_vmaxnmvq_f32(float %a, <4 x float> %b) { 305; CHECK-LABEL: test_vmaxnmvq_f32: 306; CHECK: @ %bb.0: @ %entry 307; CHECK-NEXT: vmov r0, s0 308; CHECK-NEXT: vmaxnmv.f32 r0, q1 309; CHECK-NEXT: vmov s0, r0 310; CHECK-NEXT: bx lr 311entry: 312 %0 = tail call float @llvm.arm.mve.maxnmv.f32.v4f32(float %a, <4 x float> %b) 313 ret float %0 314} 315 316define arm_aapcs_vfpcc float @test_vmaxnmavq_f16(float %a.coerce, <8 x half> %b) { 317; CHECK-LABEL: test_vmaxnmavq_f16: 318; CHECK: @ %bb.0: @ %entry 319; CHECK-NEXT: vmov r0, s0 320; CHECK-NEXT: vmaxnmav.f16 r0, q1 321; CHECK-NEXT: vmov s0, r0 322; CHECK-NEXT: vmov.f16 r0, s0 323; CHECK-NEXT: vmov s0, r0 324; CHECK-NEXT: bx lr 325entry: 326 %0 = bitcast float %a.coerce to i32 327 %tmp.0.extract.trunc = trunc i32 %0 to i16 328 %1 = bitcast i16 %tmp.0.extract.trunc to half 329 %2 = tail call half @llvm.arm.mve.maxnmav.f16.v8f16(half %1, <8 x half> %b) 330 %3 = bitcast half %2 to i16 331 %tmp2.0.insert.ext = zext i16 %3 to i32 332 %4 = bitcast i32 %tmp2.0.insert.ext to float 333 ret float %4 334} 335 336define arm_aapcs_vfpcc float @test_vmaxnmavq_f32(float %a, <4 x float> %b) { 337; CHECK-LABEL: test_vmaxnmavq_f32: 338; CHECK: @ %bb.0: @ %entry 339; CHECK-NEXT: vmov r0, s0 340; CHECK-NEXT: vmaxnmav.f32 r0, q1 341; CHECK-NEXT: vmov s0, r0 342; CHECK-NEXT: bx lr 343entry: 344 %0 = tail call float @llvm.arm.mve.maxnmav.f32.v4f32(float %a, <4 x float> %b) 345 ret float %0 346} 347 348define arm_aapcs_vfpcc signext i8 @test_vminvq_p_s8(i8 signext %a, <16 x i8> %b, i16 zeroext %p) { 349; CHECK-LABEL: test_vminvq_p_s8: 350; CHECK: @ %bb.0: @ %entry 351; CHECK-NEXT: vmsr p0, r1 352; CHECK-NEXT: vpst 353; CHECK-NEXT: vminvt.s8 r0, q0 354; CHECK-NEXT: sxtb r0, r0 355; CHECK-NEXT: bx lr 356entry: 357 %0 = zext i8 %a to i32 358 %1 = zext i16 %p to i32 359 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 360 %3 = tail call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 0, <16 x i1> %2) 361 %4 = trunc i32 %3 to i8 362 ret i8 %4 363} 364 365define arm_aapcs_vfpcc signext i16 @test_vminvq_p_s16(i16 signext %a, <8 x i16> %b, i16 zeroext %p) { 366; CHECK-LABEL: test_vminvq_p_s16: 367; CHECK: @ %bb.0: @ %entry 368; CHECK-NEXT: vmsr p0, r1 369; CHECK-NEXT: vpst 370; CHECK-NEXT: vminvt.s16 r0, q0 371; CHECK-NEXT: sxth r0, r0 372; CHECK-NEXT: bx lr 373entry: 374 %0 = zext i16 %a to i32 375 %1 = zext i16 %p to i32 376 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 377 %3 = tail call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 0, <8 x i1> %2) 378 %4 = trunc i32 %3 to i16 379 ret i16 %4 380} 381 382define arm_aapcs_vfpcc i32 @test_vminvq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) { 383; CHECK-LABEL: test_vminvq_p_s32: 384; CHECK: @ %bb.0: @ %entry 385; CHECK-NEXT: vmsr p0, r1 386; CHECK-NEXT: vpst 387; CHECK-NEXT: vminvt.s32 r0, q0 388; CHECK-NEXT: bx lr 389entry: 390 %0 = zext i16 %p to i32 391 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 392 %2 = tail call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 0, <4 x i1> %1) 393 ret i32 %2 394} 395 396define arm_aapcs_vfpcc zeroext i8 @test_vminvq_p_u8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) { 397; CHECK-LABEL: test_vminvq_p_u8: 398; CHECK: @ %bb.0: @ %entry 399; CHECK-NEXT: vmsr p0, r1 400; CHECK-NEXT: vpst 401; CHECK-NEXT: vminvt.u8 r0, q0 402; CHECK-NEXT: uxtb r0, r0 403; CHECK-NEXT: bx lr 404entry: 405 %0 = zext i8 %a to i32 406 %1 = zext i16 %p to i32 407 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 408 %3 = tail call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 1, <16 x i1> %2) 409 %4 = trunc i32 %3 to i8 410 ret i8 %4 411} 412 413define arm_aapcs_vfpcc zeroext i16 @test_vminvq_p_u16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) { 414; CHECK-LABEL: test_vminvq_p_u16: 415; CHECK: @ %bb.0: @ %entry 416; CHECK-NEXT: vmsr p0, r1 417; CHECK-NEXT: vpst 418; CHECK-NEXT: vminvt.u16 r0, q0 419; CHECK-NEXT: uxth r0, r0 420; CHECK-NEXT: bx lr 421entry: 422 %0 = zext i16 %a to i32 423 %1 = zext i16 %p to i32 424 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 425 %3 = tail call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 1, <8 x i1> %2) 426 %4 = trunc i32 %3 to i16 427 ret i16 %4 428} 429 430define arm_aapcs_vfpcc i32 @test_vminvq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) { 431; CHECK-LABEL: test_vminvq_p_u32: 432; CHECK: @ %bb.0: @ %entry 433; CHECK-NEXT: vmsr p0, r1 434; CHECK-NEXT: vpst 435; CHECK-NEXT: vminvt.u32 r0, q0 436; CHECK-NEXT: bx lr 437entry: 438 %0 = zext i16 %p to i32 439 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 440 %2 = tail call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 1, <4 x i1> %1) 441 ret i32 %2 442} 443 444define arm_aapcs_vfpcc signext i8 @test_vmaxvq_p_s8(i8 signext %a, <16 x i8> %b, i16 zeroext %p) { 445; CHECK-LABEL: test_vmaxvq_p_s8: 446; CHECK: @ %bb.0: @ %entry 447; CHECK-NEXT: vmsr p0, r1 448; CHECK-NEXT: vpst 449; CHECK-NEXT: vmaxvt.s8 r0, q0 450; CHECK-NEXT: sxtb r0, r0 451; CHECK-NEXT: bx lr 452entry: 453 %0 = zext i8 %a to i32 454 %1 = zext i16 %p to i32 455 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 456 %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 0, <16 x i1> %2) 457 %4 = trunc i32 %3 to i8 458 ret i8 %4 459} 460 461define arm_aapcs_vfpcc signext i16 @test_vmaxvq_p_s16(i16 signext %a, <8 x i16> %b, i16 zeroext %p) { 462; CHECK-LABEL: test_vmaxvq_p_s16: 463; CHECK: @ %bb.0: @ %entry 464; CHECK-NEXT: vmsr p0, r1 465; CHECK-NEXT: vpst 466; CHECK-NEXT: vmaxvt.s16 r0, q0 467; CHECK-NEXT: sxth r0, r0 468; CHECK-NEXT: bx lr 469entry: 470 %0 = zext i16 %a to i32 471 %1 = zext i16 %p to i32 472 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 473 %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 0, <8 x i1> %2) 474 %4 = trunc i32 %3 to i16 475 ret i16 %4 476} 477 478define arm_aapcs_vfpcc i32 @test_vmaxvq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) { 479; CHECK-LABEL: test_vmaxvq_p_s32: 480; CHECK: @ %bb.0: @ %entry 481; CHECK-NEXT: vmsr p0, r1 482; CHECK-NEXT: vpst 483; CHECK-NEXT: vmaxvt.s32 r0, q0 484; CHECK-NEXT: bx lr 485entry: 486 %0 = zext i16 %p to i32 487 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 488 %2 = tail call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 0, <4 x i1> %1) 489 ret i32 %2 490} 491 492define arm_aapcs_vfpcc zeroext i8 @test_vmaxvq_p_u8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) { 493; CHECK-LABEL: test_vmaxvq_p_u8: 494; CHECK: @ %bb.0: @ %entry 495; CHECK-NEXT: vmsr p0, r1 496; CHECK-NEXT: vpst 497; CHECK-NEXT: vmaxvt.u8 r0, q0 498; CHECK-NEXT: uxtb r0, r0 499; CHECK-NEXT: bx lr 500entry: 501 %0 = zext i8 %a to i32 502 %1 = zext i16 %p to i32 503 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 504 %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 1, <16 x i1> %2) 505 %4 = trunc i32 %3 to i8 506 ret i8 %4 507} 508 509define arm_aapcs_vfpcc zeroext i16 @test_vmaxvq_p_u16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) { 510; CHECK-LABEL: test_vmaxvq_p_u16: 511; CHECK: @ %bb.0: @ %entry 512; CHECK-NEXT: vmsr p0, r1 513; CHECK-NEXT: vpst 514; CHECK-NEXT: vmaxvt.u16 r0, q0 515; CHECK-NEXT: uxth r0, r0 516; CHECK-NEXT: bx lr 517entry: 518 %0 = zext i16 %a to i32 519 %1 = zext i16 %p to i32 520 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 521 %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 1, <8 x i1> %2) 522 %4 = trunc i32 %3 to i16 523 ret i16 %4 524} 525 526define arm_aapcs_vfpcc i32 @test_vmaxvq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) { 527; CHECK-LABEL: test_vmaxvq_p_u32: 528; CHECK: @ %bb.0: @ %entry 529; CHECK-NEXT: vmsr p0, r1 530; CHECK-NEXT: vpst 531; CHECK-NEXT: vmaxvt.u32 r0, q0 532; CHECK-NEXT: bx lr 533entry: 534 %0 = zext i16 %p to i32 535 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 536 %2 = tail call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 1, <4 x i1> %1) 537 ret i32 %2 538} 539 540define arm_aapcs_vfpcc zeroext i8 @test_vminavq_p_s8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) { 541; CHECK-LABEL: test_vminavq_p_s8: 542; CHECK: @ %bb.0: @ %entry 543; CHECK-NEXT: vmsr p0, r1 544; CHECK-NEXT: vpst 545; CHECK-NEXT: vminavt.s8 r0, q0 546; CHECK-NEXT: uxtb r0, r0 547; CHECK-NEXT: bx lr 548entry: 549 %0 = zext i8 %a to i32 550 %1 = zext i16 %p to i32 551 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 552 %3 = tail call i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, <16 x i1> %2) 553 %4 = trunc i32 %3 to i8 554 ret i8 %4 555} 556 557define arm_aapcs_vfpcc zeroext i16 @test_vminavq_p_s16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) { 558; CHECK-LABEL: test_vminavq_p_s16: 559; CHECK: @ %bb.0: @ %entry 560; CHECK-NEXT: vmsr p0, r1 561; CHECK-NEXT: vpst 562; CHECK-NEXT: vminavt.s16 r0, q0 563; CHECK-NEXT: uxth r0, r0 564; CHECK-NEXT: bx lr 565entry: 566 %0 = zext i16 %a to i32 567 %1 = zext i16 %p to i32 568 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 569 %3 = tail call i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, <8 x i1> %2) 570 %4 = trunc i32 %3 to i16 571 ret i16 %4 572} 573 574define arm_aapcs_vfpcc i32 @test_vminavq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) { 575; CHECK-LABEL: test_vminavq_p_s32: 576; CHECK: @ %bb.0: @ %entry 577; CHECK-NEXT: vmsr p0, r1 578; CHECK-NEXT: vpst 579; CHECK-NEXT: vminavt.s32 r0, q0 580; CHECK-NEXT: bx lr 581entry: 582 %0 = zext i16 %p to i32 583 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 584 %2 = tail call i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, <4 x i1> %1) 585 ret i32 %2 586} 587 588define arm_aapcs_vfpcc zeroext i8 @test_vmaxavq_p_s8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) { 589; CHECK-LABEL: test_vmaxavq_p_s8: 590; CHECK: @ %bb.0: @ %entry 591; CHECK-NEXT: vmsr p0, r1 592; CHECK-NEXT: vpst 593; CHECK-NEXT: vmaxavt.s8 r0, q0 594; CHECK-NEXT: uxtb r0, r0 595; CHECK-NEXT: bx lr 596entry: 597 %0 = zext i8 %a to i32 598 %1 = zext i16 %p to i32 599 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 600 %3 = tail call i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, <16 x i1> %2) 601 %4 = trunc i32 %3 to i8 602 ret i8 %4 603} 604 605define arm_aapcs_vfpcc zeroext i16 @test_vmaxavq_p_s16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) { 606; CHECK-LABEL: test_vmaxavq_p_s16: 607; CHECK: @ %bb.0: @ %entry 608; CHECK-NEXT: vmsr p0, r1 609; CHECK-NEXT: vpst 610; CHECK-NEXT: vmaxavt.s16 r0, q0 611; CHECK-NEXT: uxth r0, r0 612; CHECK-NEXT: bx lr 613entry: 614 %0 = zext i16 %a to i32 615 %1 = zext i16 %p to i32 616 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 617 %3 = tail call i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, <8 x i1> %2) 618 %4 = trunc i32 %3 to i16 619 ret i16 %4 620} 621 622define arm_aapcs_vfpcc i32 @test_vmaxavq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) { 623; CHECK-LABEL: test_vmaxavq_p_s32: 624; CHECK: @ %bb.0: @ %entry 625; CHECK-NEXT: vmsr p0, r1 626; CHECK-NEXT: vpst 627; CHECK-NEXT: vmaxavt.s32 r0, q0 628; CHECK-NEXT: bx lr 629entry: 630 %0 = zext i16 %p to i32 631 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 632 %2 = tail call i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, <4 x i1> %1) 633 ret i32 %2 634} 635 636define arm_aapcs_vfpcc float @test_vminnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) { 637; CHECK-LABEL: test_vminnmvq_p_f16: 638; CHECK: @ %bb.0: @ %entry 639; CHECK-NEXT: vmov r1, s0 640; CHECK-NEXT: vmsr p0, r0 641; CHECK-NEXT: vpst 642; CHECK-NEXT: vminnmvt.f16 r1, q1 643; CHECK-NEXT: vmov s0, r1 644; CHECK-NEXT: vmov.f16 r0, s0 645; CHECK-NEXT: vmov s0, r0 646; CHECK-NEXT: bx lr 647entry: 648 %0 = bitcast float %a.coerce to i32 649 %tmp.0.extract.trunc = trunc i32 %0 to i16 650 %1 = bitcast i16 %tmp.0.extract.trunc to half 651 %2 = zext i16 %p to i32 652 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2) 653 %4 = tail call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3) 654 %5 = bitcast half %4 to i16 655 %tmp2.0.insert.ext = zext i16 %5 to i32 656 %6 = bitcast i32 %tmp2.0.insert.ext to float 657 ret float %6 658} 659 660define arm_aapcs_vfpcc float @test_vminnmvq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) { 661; CHECK-LABEL: test_vminnmvq_p_f32: 662; CHECK: @ %bb.0: @ %entry 663; CHECK-NEXT: vmsr p0, r0 664; CHECK-NEXT: vmov r0, s0 665; CHECK-NEXT: vpst 666; CHECK-NEXT: vminnmvt.f32 r0, q1 667; CHECK-NEXT: vmov s0, r0 668; CHECK-NEXT: bx lr 669entry: 670 %0 = zext i16 %p to i32 671 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 672 %2 = tail call float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1) 673 ret float %2 674} 675 676define arm_aapcs_vfpcc float @test_vminnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) { 677; CHECK-LABEL: test_vminnmavq_p_f16: 678; CHECK: @ %bb.0: @ %entry 679; CHECK-NEXT: vmov r1, s0 680; CHECK-NEXT: vmsr p0, r0 681; CHECK-NEXT: vpst 682; CHECK-NEXT: vminnmavt.f16 r1, q1 683; CHECK-NEXT: vmov s0, r1 684; CHECK-NEXT: vmov.f16 r0, s0 685; CHECK-NEXT: vmov s0, r0 686; CHECK-NEXT: bx lr 687entry: 688 %0 = bitcast float %a.coerce to i32 689 %tmp.0.extract.trunc = trunc i32 %0 to i16 690 %1 = bitcast i16 %tmp.0.extract.trunc to half 691 %2 = zext i16 %p to i32 692 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2) 693 %4 = tail call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3) 694 %5 = bitcast half %4 to i16 695 %tmp2.0.insert.ext = zext i16 %5 to i32 696 %6 = bitcast i32 %tmp2.0.insert.ext to float 697 ret float %6 698} 699 700define arm_aapcs_vfpcc float @test_vminnmavq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) { 701; CHECK-LABEL: test_vminnmavq_p_f32: 702; CHECK: @ %bb.0: @ %entry 703; CHECK-NEXT: vmsr p0, r0 704; CHECK-NEXT: vmov r0, s0 705; CHECK-NEXT: vpst 706; CHECK-NEXT: vminnmavt.f32 r0, q1 707; CHECK-NEXT: vmov s0, r0 708; CHECK-NEXT: bx lr 709entry: 710 %0 = zext i16 %p to i32 711 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 712 %2 = tail call float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1) 713 ret float %2 714} 715 716define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) { 717; CHECK-LABEL: test_vmaxnmvq_p_f16: 718; CHECK: @ %bb.0: @ %entry 719; CHECK-NEXT: vmov r1, s0 720; CHECK-NEXT: vmsr p0, r0 721; CHECK-NEXT: vpst 722; CHECK-NEXT: vmaxnmvt.f16 r1, q1 723; CHECK-NEXT: vmov s0, r1 724; CHECK-NEXT: vmov.f16 r0, s0 725; CHECK-NEXT: vmov s0, r0 726; CHECK-NEXT: bx lr 727entry: 728 %0 = bitcast float %a.coerce to i32 729 %tmp.0.extract.trunc = trunc i32 %0 to i16 730 %1 = bitcast i16 %tmp.0.extract.trunc to half 731 %2 = zext i16 %p to i32 732 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2) 733 %4 = tail call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3) 734 %5 = bitcast half %4 to i16 735 %tmp2.0.insert.ext = zext i16 %5 to i32 736 %6 = bitcast i32 %tmp2.0.insert.ext to float 737 ret float %6 738} 739 740define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) { 741; CHECK-LABEL: test_vmaxnmvq_p_f32: 742; CHECK: @ %bb.0: @ %entry 743; CHECK-NEXT: vmsr p0, r0 744; CHECK-NEXT: vmov r0, s0 745; CHECK-NEXT: vpst 746; CHECK-NEXT: vmaxnmvt.f32 r0, q1 747; CHECK-NEXT: vmov s0, r0 748; CHECK-NEXT: bx lr 749entry: 750 %0 = zext i16 %p to i32 751 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 752 %2 = tail call float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1) 753 ret float %2 754} 755 756define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) { 757; CHECK-LABEL: test_vmaxnmavq_p_f16: 758; CHECK: @ %bb.0: @ %entry 759; CHECK-NEXT: vmov r1, s0 760; CHECK-NEXT: vmsr p0, r0 761; CHECK-NEXT: vpst 762; CHECK-NEXT: vmaxnmavt.f16 r1, q1 763; CHECK-NEXT: vmov s0, r1 764; CHECK-NEXT: vmov.f16 r0, s0 765; CHECK-NEXT: vmov s0, r0 766; CHECK-NEXT: bx lr 767entry: 768 %0 = bitcast float %a.coerce to i32 769 %tmp.0.extract.trunc = trunc i32 %0 to i16 770 %1 = bitcast i16 %tmp.0.extract.trunc to half 771 %2 = zext i16 %p to i32 772 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2) 773 %4 = tail call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3) 774 %5 = bitcast half %4 to i16 775 %tmp2.0.insert.ext = zext i16 %5 to i32 776 %6 = bitcast i32 %tmp2.0.insert.ext to float 777 ret float %6 778} 779 780define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) { 781; CHECK-LABEL: test_vmaxnmavq_p_f32: 782; CHECK: @ %bb.0: @ %entry 783; CHECK-NEXT: vmsr p0, r0 784; CHECK-NEXT: vmov r0, s0 785; CHECK-NEXT: vpst 786; CHECK-NEXT: vmaxnmavt.f32 r0, q1 787; CHECK-NEXT: vmov s0, r0 788; CHECK-NEXT: bx lr 789entry: 790 %0 = zext i16 %p to i32 791 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 792 %2 = tail call float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1) 793 ret float %2 794} 795 796declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 797declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 798declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 799 800declare i32 @llvm.arm.mve.minv.v16i8(i32, <16 x i8>, i32) 801declare i32 @llvm.arm.mve.minv.v8i16(i32, <8 x i16>, i32) 802declare i32 @llvm.arm.mve.minv.v4i32(i32, <4 x i32>, i32) 803declare i32 @llvm.arm.mve.maxv.v16i8(i32, <16 x i8>, i32) 804declare i32 @llvm.arm.mve.maxv.v8i16(i32, <8 x i16>, i32) 805declare i32 @llvm.arm.mve.maxv.v4i32(i32, <4 x i32>, i32) 806declare i32 @llvm.arm.mve.minav.v16i8(i32, <16 x i8>) 807declare i32 @llvm.arm.mve.minav.v8i16(i32, <8 x i16>) 808declare i32 @llvm.arm.mve.minav.v4i32(i32, <4 x i32>) 809declare i32 @llvm.arm.mve.maxav.v16i8(i32, <16 x i8>) 810declare i32 @llvm.arm.mve.maxav.v8i16(i32, <8 x i16>) 811declare i32 @llvm.arm.mve.maxav.v4i32(i32, <4 x i32>) 812declare i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32, <16 x i8>, i32, <16 x i1>) 813declare i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32, <8 x i16>, i32, <8 x i1>) 814declare i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32, <4 x i32>, i32, <4 x i1>) 815declare i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32, <16 x i8>, i32, <16 x i1>) 816declare i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32, <8 x i16>, i32, <8 x i1>) 817declare i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32, <4 x i32>, i32, <4 x i1>) 818declare i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32, <16 x i8>, <16 x i1>) 819declare i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32, <8 x i16>, <8 x i1>) 820declare i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32, <4 x i32>, <4 x i1>) 821declare i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32, <16 x i8>, <16 x i1>) 822declare i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32, <8 x i16>, <8 x i1>) 823declare i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32, <4 x i32>, <4 x i1>) 824 825declare half @llvm.arm.mve.minnmv.f16.v8f16(half, <8 x half>) 826declare half @llvm.arm.mve.minnmav.f16.v8f16(half, <8 x half>) 827declare half @llvm.arm.mve.maxnmv.f16.v8f16(half, <8 x half>) 828declare half @llvm.arm.mve.maxnmav.f16.v8f16(half, <8 x half>) 829declare half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>) 830declare half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>) 831declare half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>) 832declare half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>) 833 834declare float @llvm.arm.mve.minnmv.f32.v4f32(float, <4 x float>) 835declare float @llvm.arm.mve.minnmav.f32.v4f32(float, <4 x float>) 836declare float @llvm.arm.mve.maxnmv.f32.v4f32(float, <4 x float>) 837declare float @llvm.arm.mve.maxnmav.f32.v4f32(float, <4 x float>) 838declare float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>) 839declare float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>) 840declare float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>) 841declare float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>) 842