1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck --check-prefix=LE %s 3; RUN: llc -mtriple=thumbebv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck --check-prefix=BE %s 4 5define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_s8(<16 x i8> %a) { 6; LE-LABEL: test_vmovlbq_s8: 7; LE: @ %bb.0: @ %entry 8; LE-NEXT: vmovlb.s8 q0, q0 9; LE-NEXT: bx lr 10; 11; BE-LABEL: test_vmovlbq_s8: 12; BE: @ %bb.0: @ %entry 13; BE-NEXT: vrev64.8 q1, q0 14; BE-NEXT: vmovlb.s8 q1, q1 15; BE-NEXT: vrev64.16 q0, q1 16; BE-NEXT: bx lr 17entry: 18 %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 19 %1 = sext <8 x i8> %0 to <8 x i16> 20 ret <8 x i16> %1 21} 22 23define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_s16(<8 x i16> %a) { 24; LE-LABEL: test_vmovlbq_s16: 25; LE: @ %bb.0: @ %entry 26; LE-NEXT: vmovlb.s16 q0, q0 27; LE-NEXT: bx lr 28; 29; BE-LABEL: test_vmovlbq_s16: 30; BE: @ %bb.0: @ %entry 31; BE-NEXT: vrev64.16 q1, q0 32; BE-NEXT: vmovlb.s16 q1, q1 33; BE-NEXT: vrev64.32 q0, q1 34; BE-NEXT: bx lr 35entry: 36 %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 37 %1 = sext <4 x i16> %0 to <4 x i32> 38 ret <4 x i32> %1 39} 40 41define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_u8(<16 x i8> %a) { 42; LE-LABEL: test_vmovlbq_u8: 43; LE: @ %bb.0: @ %entry 44; LE-NEXT: vmovlb.u8 q0, q0 45; LE-NEXT: bx lr 46; 47; BE-LABEL: test_vmovlbq_u8: 48; BE: @ %bb.0: @ %entry 49; BE-NEXT: vrev64.8 q1, q0 50; BE-NEXT: vmovlb.u8 q1, q1 51; BE-NEXT: vrev64.16 q0, q1 52; BE-NEXT: bx lr 53entry: 54 %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 55 %1 = zext <8 x i8> %0 to <8 x i16> 56 ret <8 x i16> %1 57} 58 59define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_u16(<8 x i16> %a) { 60; LE-LABEL: test_vmovlbq_u16: 61; LE: @ %bb.0: @ %entry 62; LE-NEXT: vmovlb.u16 q0, q0 63; LE-NEXT: bx lr 64; 65; BE-LABEL: test_vmovlbq_u16: 66; BE: @ %bb.0: @ %entry 67; BE-NEXT: vrev64.16 q1, q0 68; BE-NEXT: vmovlb.u16 q1, q1 69; BE-NEXT: vrev64.32 q0, q1 70; BE-NEXT: bx lr 71entry: 72 %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 73 %1 = zext <4 x i16> %0 to <4 x i32> 74 ret <4 x i32> %1 75} 76 77define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_s8(<16 x i8> %a) { 78; LE-LABEL: test_vmovltq_s8: 79; LE: @ %bb.0: @ %entry 80; LE-NEXT: vmovlt.s8 q0, q0 81; LE-NEXT: bx lr 82; 83; BE-LABEL: test_vmovltq_s8: 84; BE: @ %bb.0: @ %entry 85; BE-NEXT: vrev64.8 q1, q0 86; BE-NEXT: vmovlt.s8 q1, q1 87; BE-NEXT: vrev64.16 q0, q1 88; BE-NEXT: bx lr 89entry: 90 %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 91 %1 = sext <8 x i8> %0 to <8 x i16> 92 ret <8 x i16> %1 93} 94 95define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_s16(<8 x i16> %a) { 96; LE-LABEL: test_vmovltq_s16: 97; LE: @ %bb.0: @ %entry 98; LE-NEXT: vmovlt.s16 q0, q0 99; LE-NEXT: bx lr 100; 101; BE-LABEL: test_vmovltq_s16: 102; BE: @ %bb.0: @ %entry 103; BE-NEXT: vrev64.16 q1, q0 104; BE-NEXT: vmovlt.s16 q1, q1 105; BE-NEXT: vrev64.32 q0, q1 106; BE-NEXT: bx lr 107entry: 108 %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 109 %1 = sext <4 x i16> %0 to <4 x i32> 110 ret <4 x i32> %1 111} 112 113define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_u8(<16 x i8> %a) { 114; LE-LABEL: test_vmovltq_u8: 115; LE: @ %bb.0: @ %entry 116; LE-NEXT: vmovlt.u8 q0, q0 117; LE-NEXT: bx lr 118; 119; BE-LABEL: test_vmovltq_u8: 120; BE: @ %bb.0: @ %entry 121; BE-NEXT: vrev64.8 q1, q0 122; BE-NEXT: vmovlt.u8 q1, q1 123; BE-NEXT: vrev64.16 q0, q1 124; BE-NEXT: bx lr 125entry: 126 %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 127 %1 = zext <8 x i8> %0 to <8 x i16> 128 ret <8 x i16> %1 129} 130 131define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_u16(<8 x i16> %a) { 132; LE-LABEL: test_vmovltq_u16: 133; LE: @ %bb.0: @ %entry 134; LE-NEXT: vmovlt.u16 q0, q0 135; LE-NEXT: bx lr 136; 137; BE-LABEL: test_vmovltq_u16: 138; BE: @ %bb.0: @ %entry 139; BE-NEXT: vrev64.16 q1, q0 140; BE-NEXT: vmovlt.u16 q1, q1 141; BE-NEXT: vrev64.32 q0, q1 142; BE-NEXT: bx lr 143entry: 144 %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 145 %1 = zext <4 x i16> %0 to <4 x i32> 146 ret <4 x i32> %1 147} 148 149define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 150; LE-LABEL: test_vmovlbq_m_s8: 151; LE: @ %bb.0: @ %entry 152; LE-NEXT: vmsr p0, r0 153; LE-NEXT: vpst 154; LE-NEXT: vmovlbt.s8 q0, q1 155; LE-NEXT: bx lr 156; 157; BE-LABEL: test_vmovlbq_m_s8: 158; BE: @ %bb.0: @ %entry 159; BE-NEXT: vrev64.16 q2, q0 160; BE-NEXT: vrev64.8 q0, q1 161; BE-NEXT: vmsr p0, r0 162; BE-NEXT: vpst 163; BE-NEXT: vmovlbt.s8 q2, q0 164; BE-NEXT: vrev64.16 q0, q2 165; BE-NEXT: bx lr 166entry: 167 %0 = zext i16 %p to i32 168 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 169 %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive) 170 ret <8 x i16> %2 171} 172 173define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 174; LE-LABEL: test_vmovlbq_m_s16: 175; LE: @ %bb.0: @ %entry 176; LE-NEXT: vmsr p0, r0 177; LE-NEXT: vpst 178; LE-NEXT: vmovlbt.s16 q0, q1 179; LE-NEXT: bx lr 180; 181; BE-LABEL: test_vmovlbq_m_s16: 182; BE: @ %bb.0: @ %entry 183; BE-NEXT: vrev64.32 q2, q0 184; BE-NEXT: vrev64.16 q0, q1 185; BE-NEXT: vmsr p0, r0 186; BE-NEXT: vpst 187; BE-NEXT: vmovlbt.s16 q2, q0 188; BE-NEXT: vrev64.32 q0, q2 189; BE-NEXT: bx lr 190entry: 191 %0 = zext i16 %p to i32 192 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 193 %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive) 194 ret <4 x i32> %2 195} 196 197define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 198; LE-LABEL: test_vmovlbq_m_u8: 199; LE: @ %bb.0: @ %entry 200; LE-NEXT: vmsr p0, r0 201; LE-NEXT: vpst 202; LE-NEXT: vmovlbt.u8 q0, q1 203; LE-NEXT: bx lr 204; 205; BE-LABEL: test_vmovlbq_m_u8: 206; BE: @ %bb.0: @ %entry 207; BE-NEXT: vrev64.16 q2, q0 208; BE-NEXT: vrev64.8 q0, q1 209; BE-NEXT: vmsr p0, r0 210; BE-NEXT: vpst 211; BE-NEXT: vmovlbt.u8 q2, q0 212; BE-NEXT: vrev64.16 q0, q2 213; BE-NEXT: bx lr 214entry: 215 %0 = zext i16 %p to i32 216 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 217 %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive) 218 ret <8 x i16> %2 219} 220 221define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 222; LE-LABEL: test_vmovlbq_m_u16: 223; LE: @ %bb.0: @ %entry 224; LE-NEXT: vmsr p0, r0 225; LE-NEXT: vpst 226; LE-NEXT: vmovlbt.u16 q0, q1 227; LE-NEXT: bx lr 228; 229; BE-LABEL: test_vmovlbq_m_u16: 230; BE: @ %bb.0: @ %entry 231; BE-NEXT: vrev64.32 q2, q0 232; BE-NEXT: vrev64.16 q0, q1 233; BE-NEXT: vmsr p0, r0 234; BE-NEXT: vpst 235; BE-NEXT: vmovlbt.u16 q2, q0 236; BE-NEXT: vrev64.32 q0, q2 237; BE-NEXT: bx lr 238entry: 239 %0 = zext i16 %p to i32 240 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 241 %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive) 242 ret <4 x i32> %2 243} 244 245define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 246; LE-LABEL: test_vmovltq_m_s8: 247; LE: @ %bb.0: @ %entry 248; LE-NEXT: vmsr p0, r0 249; LE-NEXT: vpst 250; LE-NEXT: vmovltt.s8 q0, q1 251; LE-NEXT: bx lr 252; 253; BE-LABEL: test_vmovltq_m_s8: 254; BE: @ %bb.0: @ %entry 255; BE-NEXT: vrev64.16 q2, q0 256; BE-NEXT: vrev64.8 q0, q1 257; BE-NEXT: vmsr p0, r0 258; BE-NEXT: vpst 259; BE-NEXT: vmovltt.s8 q2, q0 260; BE-NEXT: vrev64.16 q0, q2 261; BE-NEXT: bx lr 262entry: 263 %0 = zext i16 %p to i32 264 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 265 %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive) 266 ret <8 x i16> %2 267} 268 269define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 270; LE-LABEL: test_vmovltq_m_s16: 271; LE: @ %bb.0: @ %entry 272; LE-NEXT: vmsr p0, r0 273; LE-NEXT: vpst 274; LE-NEXT: vmovltt.s16 q0, q1 275; LE-NEXT: bx lr 276; 277; BE-LABEL: test_vmovltq_m_s16: 278; BE: @ %bb.0: @ %entry 279; BE-NEXT: vrev64.32 q2, q0 280; BE-NEXT: vrev64.16 q0, q1 281; BE-NEXT: vmsr p0, r0 282; BE-NEXT: vpst 283; BE-NEXT: vmovltt.s16 q2, q0 284; BE-NEXT: vrev64.32 q0, q2 285; BE-NEXT: bx lr 286entry: 287 %0 = zext i16 %p to i32 288 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 289 %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive) 290 ret <4 x i32> %2 291} 292 293define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 294; LE-LABEL: test_vmovltq_m_u8: 295; LE: @ %bb.0: @ %entry 296; LE-NEXT: vmsr p0, r0 297; LE-NEXT: vpst 298; LE-NEXT: vmovltt.u8 q0, q1 299; LE-NEXT: bx lr 300; 301; BE-LABEL: test_vmovltq_m_u8: 302; BE: @ %bb.0: @ %entry 303; BE-NEXT: vrev64.16 q2, q0 304; BE-NEXT: vrev64.8 q0, q1 305; BE-NEXT: vmsr p0, r0 306; BE-NEXT: vpst 307; BE-NEXT: vmovltt.u8 q2, q0 308; BE-NEXT: vrev64.16 q0, q2 309; BE-NEXT: bx lr 310entry: 311 %0 = zext i16 %p to i32 312 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 313 %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive) 314 ret <8 x i16> %2 315} 316 317define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 318; LE-LABEL: test_vmovltq_m_u16: 319; LE: @ %bb.0: @ %entry 320; LE-NEXT: vmsr p0, r0 321; LE-NEXT: vpst 322; LE-NEXT: vmovltt.u16 q0, q1 323; LE-NEXT: bx lr 324; 325; BE-LABEL: test_vmovltq_m_u16: 326; BE: @ %bb.0: @ %entry 327; BE-NEXT: vrev64.32 q2, q0 328; BE-NEXT: vrev64.16 q0, q1 329; BE-NEXT: vmsr p0, r0 330; BE-NEXT: vpst 331; BE-NEXT: vmovltt.u16 q2, q0 332; BE-NEXT: vrev64.32 q0, q2 333; BE-NEXT: bx lr 334entry: 335 %0 = zext i16 %p to i32 336 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 337 %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive) 338 ret <4 x i32> %2 339} 340 341declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 342declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 343declare <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, <8 x i1>, <8 x i16>) 344declare <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, <4 x i1>, <4 x i32>) 345