1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @test_vqdmladhq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) { 5; CHECK-LABEL: test_vqdmladhq_s8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vqdmladh.s8 q0, q1, q2 8; CHECK-NEXT: bx lr 9entry: 10 %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0) 11 ret <16 x i8> %0 12} 13 14define arm_aapcs_vfpcc <8 x i16> @test_vqdmladhq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) { 15; CHECK-LABEL: test_vqdmladhq_s16: 16; CHECK: @ %bb.0: @ %entry 17; CHECK-NEXT: vqdmladh.s16 q0, q1, q2 18; CHECK-NEXT: bx lr 19entry: 20 %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0) 21 ret <8 x i16> %0 22} 23 24define arm_aapcs_vfpcc <4 x i32> @test_vqdmladhq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) { 25; CHECK-LABEL: test_vqdmladhq_s32: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vqdmladh.s32 q0, q1, q2 28; CHECK-NEXT: bx lr 29entry: 30 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0) 31 ret <4 x i32> %0 32} 33 34define arm_aapcs_vfpcc <16 x i8> @test_vqdmladhxq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) { 35; CHECK-LABEL: test_vqdmladhxq_s8: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vqdmladhx.s8 q0, q1, q2 38; CHECK-NEXT: bx lr 39entry: 40 %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 0) 41 ret <16 x i8> %0 42} 43 44define arm_aapcs_vfpcc <8 x i16> @test_vqdmladhxq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) { 45; CHECK-LABEL: test_vqdmladhxq_s16: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vqdmladhx.s16 q0, q1, q2 48; CHECK-NEXT: bx lr 49entry: 50 %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 0) 51 ret <8 x i16> %0 52} 53 54define arm_aapcs_vfpcc <4 x i32> @test_vqdmladhxq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) { 55; CHECK-LABEL: test_vqdmladhxq_s32: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vqdmladhx.s32 q0, q1, q2 58; CHECK-NEXT: bx lr 59entry: 60 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 0) 61 ret <4 x i32> %0 62} 63 64define arm_aapcs_vfpcc <16 x i8> @test_vqdmlsdhq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) { 65; CHECK-LABEL: test_vqdmlsdhq_s8: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vqdmlsdh.s8 q0, q1, q2 68; CHECK-NEXT: bx lr 69entry: 70 %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1) 71 ret <16 x i8> %0 72} 73 74define arm_aapcs_vfpcc <8 x i16> @test_vqdmlsdhq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) { 75; CHECK-LABEL: test_vqdmlsdhq_s16: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vqdmlsdh.s16 q0, q1, q2 78; CHECK-NEXT: bx lr 79entry: 80 %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1) 81 ret <8 x i16> %0 82} 83 84define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsdhq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) { 85; CHECK-LABEL: test_vqdmlsdhq_s32: 86; CHECK: @ %bb.0: @ %entry 87; CHECK-NEXT: vqdmlsdh.s32 q0, q1, q2 88; CHECK-NEXT: bx lr 89entry: 90 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1) 91 ret <4 x i32> %0 92} 93 94define arm_aapcs_vfpcc <16 x i8> @test_vqdmlsdhxq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) { 95; CHECK-LABEL: test_vqdmlsdhxq_s8: 96; CHECK: @ %bb.0: @ %entry 97; CHECK-NEXT: vqdmlsdhx.s8 q0, q1, q2 98; CHECK-NEXT: bx lr 99entry: 100 %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 1) 101 ret <16 x i8> %0 102} 103 104define arm_aapcs_vfpcc <8 x i16> @test_vqdmlsdhxq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) { 105; CHECK-LABEL: test_vqdmlsdhxq_s16: 106; CHECK: @ %bb.0: @ %entry 107; CHECK-NEXT: vqdmlsdhx.s16 q0, q1, q2 108; CHECK-NEXT: bx lr 109entry: 110 %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 1) 111 ret <8 x i16> %0 112} 113 114define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsdhxq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) { 115; CHECK-LABEL: test_vqdmlsdhxq_s32: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vqdmlsdhx.s32 q0, q1, q2 118; CHECK-NEXT: bx lr 119entry: 120 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 1) 121 ret <4 x i32> %0 122} 123 124define arm_aapcs_vfpcc <16 x i8> @test_vqrdmladhq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) { 125; CHECK-LABEL: test_vqrdmladhq_s8: 126; CHECK: @ %bb.0: @ %entry 127; CHECK-NEXT: vqrdmladh.s8 q0, q1, q2 128; CHECK-NEXT: bx lr 129entry: 130 %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0) 131 ret <16 x i8> %0 132} 133 134define arm_aapcs_vfpcc <8 x i16> @test_vqrdmladhq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) { 135; CHECK-LABEL: test_vqrdmladhq_s16: 136; CHECK: @ %bb.0: @ %entry 137; CHECK-NEXT: vqrdmladh.s16 q0, q1, q2 138; CHECK-NEXT: bx lr 139entry: 140 %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0) 141 ret <8 x i16> %0 142} 143 144define arm_aapcs_vfpcc <4 x i32> @test_vqrdmladhq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) { 145; CHECK-LABEL: test_vqrdmladhq_s32: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vqrdmladh.s32 q0, q1, q2 148; CHECK-NEXT: bx lr 149entry: 150 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0) 151 ret <4 x i32> %0 152} 153 154define arm_aapcs_vfpcc <16 x i8> @test_vqrdmladhxq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) { 155; CHECK-LABEL: test_vqrdmladhxq_s8: 156; CHECK: @ %bb.0: @ %entry 157; CHECK-NEXT: vqrdmladhx.s8 q0, q1, q2 158; CHECK-NEXT: bx lr 159entry: 160 %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 0) 161 ret <16 x i8> %0 162} 163 164define arm_aapcs_vfpcc <8 x i16> @test_vqrdmladhxq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) { 165; CHECK-LABEL: test_vqrdmladhxq_s16: 166; CHECK: @ %bb.0: @ %entry 167; CHECK-NEXT: vqrdmladhx.s16 q0, q1, q2 168; CHECK-NEXT: bx lr 169entry: 170 %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 0) 171 ret <8 x i16> %0 172} 173 174define arm_aapcs_vfpcc <4 x i32> @test_vqrdmladhxq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) { 175; CHECK-LABEL: test_vqrdmladhxq_s32: 176; CHECK: @ %bb.0: @ %entry 177; CHECK-NEXT: vqrdmladhx.s32 q0, q1, q2 178; CHECK-NEXT: bx lr 179entry: 180 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 0) 181 ret <4 x i32> %0 182} 183 184define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlsdhq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) { 185; CHECK-LABEL: test_vqrdmlsdhq_s8: 186; CHECK: @ %bb.0: @ %entry 187; CHECK-NEXT: vqrdmlsdh.s8 q0, q1, q2 188; CHECK-NEXT: bx lr 189entry: 190 %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1) 191 ret <16 x i8> %0 192} 193 194define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlsdhq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) { 195; CHECK-LABEL: test_vqrdmlsdhq_s16: 196; CHECK: @ %bb.0: @ %entry 197; CHECK-NEXT: vqrdmlsdh.s16 q0, q1, q2 198; CHECK-NEXT: bx lr 199entry: 200 %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1) 201 ret <8 x i16> %0 202} 203 204define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlsdhq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) { 205; CHECK-LABEL: test_vqrdmlsdhq_s32: 206; CHECK: @ %bb.0: @ %entry 207; CHECK-NEXT: vqrdmlsdh.s32 q0, q1, q2 208; CHECK-NEXT: bx lr 209entry: 210 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1) 211 ret <4 x i32> %0 212} 213 214define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlsdhxq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) { 215; CHECK-LABEL: test_vqrdmlsdhxq_s8: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vqrdmlsdhx.s8 q0, q1, q2 218; CHECK-NEXT: bx lr 219entry: 220 %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 1) 221 ret <16 x i8> %0 222} 223 224define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlsdhxq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) { 225; CHECK-LABEL: test_vqrdmlsdhxq_s16: 226; CHECK: @ %bb.0: @ %entry 227; CHECK-NEXT: vqrdmlsdhx.s16 q0, q1, q2 228; CHECK-NEXT: bx lr 229entry: 230 %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 1) 231 ret <8 x i16> %0 232} 233 234define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlsdhxq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) { 235; CHECK-LABEL: test_vqrdmlsdhxq_s32: 236; CHECK: @ %bb.0: @ %entry 237; CHECK-NEXT: vqrdmlsdhx.s32 q0, q1, q2 238; CHECK-NEXT: bx lr 239entry: 240 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 1) 241 ret <4 x i32> %0 242} 243 244define arm_aapcs_vfpcc <16 x i8> @test_vqdmladhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 245; CHECK-LABEL: test_vqdmladhq_m_s8: 246; CHECK: @ %bb.0: @ %entry 247; CHECK-NEXT: vmsr p0, r0 248; CHECK-NEXT: vpst 249; CHECK-NEXT: vqdmladht.s8 q0, q1, q2 250; CHECK-NEXT: bx lr 251entry: 252 %0 = zext i16 %p to i32 253 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 254 %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0, <16 x i1> %1) 255 ret <16 x i8> %2 256} 257 258define arm_aapcs_vfpcc <8 x i16> @test_vqdmladhq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 259; CHECK-LABEL: test_vqdmladhq_m_s16: 260; CHECK: @ %bb.0: @ %entry 261; CHECK-NEXT: vmsr p0, r0 262; CHECK-NEXT: vpst 263; CHECK-NEXT: vqdmladht.s16 q0, q1, q2 264; CHECK-NEXT: bx lr 265entry: 266 %0 = zext i16 %p to i32 267 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 268 %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0, <8 x i1> %1) 269 ret <8 x i16> %2 270} 271 272define arm_aapcs_vfpcc <4 x i32> @test_vqdmladhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 273; CHECK-LABEL: test_vqdmladhq_m_s32: 274; CHECK: @ %bb.0: @ %entry 275; CHECK-NEXT: vmsr p0, r0 276; CHECK-NEXT: vpst 277; CHECK-NEXT: vqdmladht.s32 q0, q1, q2 278; CHECK-NEXT: bx lr 279entry: 280 %0 = zext i16 %p to i32 281 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 282 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0, <4 x i1> %1) 283 ret <4 x i32> %2 284} 285 286define arm_aapcs_vfpcc <16 x i8> @test_vqdmladhxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 287; CHECK-LABEL: test_vqdmladhxq_m_s8: 288; CHECK: @ %bb.0: @ %entry 289; CHECK-NEXT: vmsr p0, r0 290; CHECK-NEXT: vpst 291; CHECK-NEXT: vqdmladhxt.s8 q0, q1, q2 292; CHECK-NEXT: bx lr 293entry: 294 %0 = zext i16 %p to i32 295 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 296 %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 0, <16 x i1> %1) 297 ret <16 x i8> %2 298} 299 300define arm_aapcs_vfpcc <8 x i16> @test_vqdmladhxq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 301; CHECK-LABEL: test_vqdmladhxq_m_s16: 302; CHECK: @ %bb.0: @ %entry 303; CHECK-NEXT: vmsr p0, r0 304; CHECK-NEXT: vpst 305; CHECK-NEXT: vqdmladhxt.s16 q0, q1, q2 306; CHECK-NEXT: bx lr 307entry: 308 %0 = zext i16 %p to i32 309 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 310 %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 0, <8 x i1> %1) 311 ret <8 x i16> %2 312} 313 314define arm_aapcs_vfpcc <4 x i32> @test_vqdmladhxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 315; CHECK-LABEL: test_vqdmladhxq_m_s32: 316; CHECK: @ %bb.0: @ %entry 317; CHECK-NEXT: vmsr p0, r0 318; CHECK-NEXT: vpst 319; CHECK-NEXT: vqdmladhxt.s32 q0, q1, q2 320; CHECK-NEXT: bx lr 321entry: 322 %0 = zext i16 %p to i32 323 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 324 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 0, <4 x i1> %1) 325 ret <4 x i32> %2 326} 327 328define arm_aapcs_vfpcc <16 x i8> @test_vqdmlsdhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 329; CHECK-LABEL: test_vqdmlsdhq_m_s8: 330; CHECK: @ %bb.0: @ %entry 331; CHECK-NEXT: vmsr p0, r0 332; CHECK-NEXT: vpst 333; CHECK-NEXT: vqdmlsdht.s8 q0, q1, q2 334; CHECK-NEXT: bx lr 335entry: 336 %0 = zext i16 %p to i32 337 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 338 %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1, <16 x i1> %1) 339 ret <16 x i8> %2 340} 341 342define arm_aapcs_vfpcc <8 x i16> @test_vqdmlsdhq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 343; CHECK-LABEL: test_vqdmlsdhq_m_s16: 344; CHECK: @ %bb.0: @ %entry 345; CHECK-NEXT: vmsr p0, r0 346; CHECK-NEXT: vpst 347; CHECK-NEXT: vqdmlsdht.s16 q0, q1, q2 348; CHECK-NEXT: bx lr 349entry: 350 %0 = zext i16 %p to i32 351 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 352 %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1, <8 x i1> %1) 353 ret <8 x i16> %2 354} 355 356define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsdhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 357; CHECK-LABEL: test_vqdmlsdhq_m_s32: 358; CHECK: @ %bb.0: @ %entry 359; CHECK-NEXT: vmsr p0, r0 360; CHECK-NEXT: vpst 361; CHECK-NEXT: vqdmlsdht.s32 q0, q1, q2 362; CHECK-NEXT: bx lr 363entry: 364 %0 = zext i16 %p to i32 365 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 366 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1, <4 x i1> %1) 367 ret <4 x i32> %2 368} 369 370define arm_aapcs_vfpcc <16 x i8> @test_vqdmlsdhxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 371; CHECK-LABEL: test_vqdmlsdhxq_m_s8: 372; CHECK: @ %bb.0: @ %entry 373; CHECK-NEXT: vmsr p0, r0 374; CHECK-NEXT: vpst 375; CHECK-NEXT: vqdmlsdhxt.s8 q0, q1, q2 376; CHECK-NEXT: bx lr 377entry: 378 %0 = zext i16 %p to i32 379 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 380 %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 1, <16 x i1> %1) 381 ret <16 x i8> %2 382} 383 384define arm_aapcs_vfpcc <8 x i16> @test_vqdmlsdhxq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 385; CHECK-LABEL: test_vqdmlsdhxq_m_s16: 386; CHECK: @ %bb.0: @ %entry 387; CHECK-NEXT: vmsr p0, r0 388; CHECK-NEXT: vpst 389; CHECK-NEXT: vqdmlsdhxt.s16 q0, q1, q2 390; CHECK-NEXT: bx lr 391entry: 392 %0 = zext i16 %p to i32 393 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 394 %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 1, <8 x i1> %1) 395 ret <8 x i16> %2 396} 397 398define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsdhxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 399; CHECK-LABEL: test_vqdmlsdhxq_m_s32: 400; CHECK: @ %bb.0: @ %entry 401; CHECK-NEXT: vmsr p0, r0 402; CHECK-NEXT: vpst 403; CHECK-NEXT: vqdmlsdhxt.s32 q0, q1, q2 404; CHECK-NEXT: bx lr 405entry: 406 %0 = zext i16 %p to i32 407 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 408 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 1, <4 x i1> %1) 409 ret <4 x i32> %2 410} 411 412define arm_aapcs_vfpcc <16 x i8> @test_vqrdmladhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 413; CHECK-LABEL: test_vqrdmladhq_m_s8: 414; CHECK: @ %bb.0: @ %entry 415; CHECK-NEXT: vmsr p0, r0 416; CHECK-NEXT: vpst 417; CHECK-NEXT: vqrdmladht.s8 q0, q1, q2 418; CHECK-NEXT: bx lr 419entry: 420 %0 = zext i16 %p to i32 421 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 422 %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0, <16 x i1> %1) 423 ret <16 x i8> %2 424} 425 426define arm_aapcs_vfpcc <8 x i16> @test_vqrdmladhq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 427; CHECK-LABEL: test_vqrdmladhq_m_s16: 428; CHECK: @ %bb.0: @ %entry 429; CHECK-NEXT: vmsr p0, r0 430; CHECK-NEXT: vpst 431; CHECK-NEXT: vqrdmladht.s16 q0, q1, q2 432; CHECK-NEXT: bx lr 433entry: 434 %0 = zext i16 %p to i32 435 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 436 %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0, <8 x i1> %1) 437 ret <8 x i16> %2 438} 439 440define arm_aapcs_vfpcc <4 x i32> @test_vqrdmladhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 441; CHECK-LABEL: test_vqrdmladhq_m_s32: 442; CHECK: @ %bb.0: @ %entry 443; CHECK-NEXT: vmsr p0, r0 444; CHECK-NEXT: vpst 445; CHECK-NEXT: vqrdmladht.s32 q0, q1, q2 446; CHECK-NEXT: bx lr 447entry: 448 %0 = zext i16 %p to i32 449 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 450 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0, <4 x i1> %1) 451 ret <4 x i32> %2 452} 453 454define arm_aapcs_vfpcc <16 x i8> @test_vqrdmladhxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 455; CHECK-LABEL: test_vqrdmladhxq_m_s8: 456; CHECK: @ %bb.0: @ %entry 457; CHECK-NEXT: vmsr p0, r0 458; CHECK-NEXT: vpst 459; CHECK-NEXT: vqrdmladhxt.s8 q0, q1, q2 460; CHECK-NEXT: bx lr 461entry: 462 %0 = zext i16 %p to i32 463 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 464 %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 0, <16 x i1> %1) 465 ret <16 x i8> %2 466} 467 468define arm_aapcs_vfpcc <8 x i16> @test_vqrdmladhxq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 469; CHECK-LABEL: test_vqrdmladhxq_m_s16: 470; CHECK: @ %bb.0: @ %entry 471; CHECK-NEXT: vmsr p0, r0 472; CHECK-NEXT: vpst 473; CHECK-NEXT: vqrdmladhxt.s16 q0, q1, q2 474; CHECK-NEXT: bx lr 475entry: 476 %0 = zext i16 %p to i32 477 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 478 %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 0, <8 x i1> %1) 479 ret <8 x i16> %2 480} 481 482define arm_aapcs_vfpcc <4 x i32> @test_vqrdmladhxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 483; CHECK-LABEL: test_vqrdmladhxq_m_s32: 484; CHECK: @ %bb.0: @ %entry 485; CHECK-NEXT: vmsr p0, r0 486; CHECK-NEXT: vpst 487; CHECK-NEXT: vqrdmladhxt.s32 q0, q1, q2 488; CHECK-NEXT: bx lr 489entry: 490 %0 = zext i16 %p to i32 491 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 492 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 0, <4 x i1> %1) 493 ret <4 x i32> %2 494} 495 496define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlsdhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 497; CHECK-LABEL: test_vqrdmlsdhq_m_s8: 498; CHECK: @ %bb.0: @ %entry 499; CHECK-NEXT: vmsr p0, r0 500; CHECK-NEXT: vpst 501; CHECK-NEXT: vqrdmlsdht.s8 q0, q1, q2 502; CHECK-NEXT: bx lr 503entry: 504 %0 = zext i16 %p to i32 505 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 506 %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1, <16 x i1> %1) 507 ret <16 x i8> %2 508} 509 510define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlsdhq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 511; CHECK-LABEL: test_vqrdmlsdhq_m_s16: 512; CHECK: @ %bb.0: @ %entry 513; CHECK-NEXT: vmsr p0, r0 514; CHECK-NEXT: vpst 515; CHECK-NEXT: vqrdmlsdht.s16 q0, q1, q2 516; CHECK-NEXT: bx lr 517entry: 518 %0 = zext i16 %p to i32 519 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 520 %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1, <8 x i1> %1) 521 ret <8 x i16> %2 522} 523 524define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlsdhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 525; CHECK-LABEL: test_vqrdmlsdhq_m_s32: 526; CHECK: @ %bb.0: @ %entry 527; CHECK-NEXT: vmsr p0, r0 528; CHECK-NEXT: vpst 529; CHECK-NEXT: vqrdmlsdht.s32 q0, q1, q2 530; CHECK-NEXT: bx lr 531entry: 532 %0 = zext i16 %p to i32 533 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 534 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1, <4 x i1> %1) 535 ret <4 x i32> %2 536} 537 538define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlsdhxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 539; CHECK-LABEL: test_vqrdmlsdhxq_m_s8: 540; CHECK: @ %bb.0: @ %entry 541; CHECK-NEXT: vmsr p0, r0 542; CHECK-NEXT: vpst 543; CHECK-NEXT: vqrdmlsdhxt.s8 q0, q1, q2 544; CHECK-NEXT: bx lr 545entry: 546 %0 = zext i16 %p to i32 547 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 548 %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 1, <16 x i1> %1) 549 ret <16 x i8> %2 550} 551 552define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlsdhxq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 553; CHECK-LABEL: test_vqrdmlsdhxq_m_s16: 554; CHECK: @ %bb.0: @ %entry 555; CHECK-NEXT: vmsr p0, r0 556; CHECK-NEXT: vpst 557; CHECK-NEXT: vqrdmlsdhxt.s16 q0, q1, q2 558; CHECK-NEXT: bx lr 559entry: 560 %0 = zext i16 %p to i32 561 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 562 %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 1, <8 x i1> %1) 563 ret <8 x i16> %2 564} 565 566define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlsdhxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 567; CHECK-LABEL: test_vqrdmlsdhxq_m_s32: 568; CHECK: @ %bb.0: @ %entry 569; CHECK-NEXT: vmsr p0, r0 570; CHECK-NEXT: vpst 571; CHECK-NEXT: vqrdmlsdhxt.s32 q0, q1, q2 572; CHECK-NEXT: bx lr 573entry: 574 %0 = zext i16 %p to i32 575 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 576 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 1, <4 x i1> %1) 577 ret <4 x i32> %2 578} 579 580declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 581declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 582declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 583 584declare <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, i32, i32, i32) 585declare <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, i32, i32, i32) 586declare <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>, i32, i32, i32) 587declare <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i8>, i32, i32, i32, <16 x i1>) 588declare <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i16>, i32, i32, i32, <8 x i1>) 589declare <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i32>, i32, i32, i32, <4 x i1>) 590