1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: opt -instcombine -mtriple=thumbv8.1m.main %s | llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - | FileCheck %s 3 4declare <16 x i1> @llvm.arm.mve.vctp8(i32) 5declare <8 x i1> @llvm.arm.mve.vctp16(i32) 6declare <4 x i1> @llvm.arm.mve.vctp32(i32) 7declare <4 x i1> @llvm.arm.mve.vctp64(i32) 8 9declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>) 10declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>) 11declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>) 12 13declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 14declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 15declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 16 17define arm_aapcs_vfpcc zeroext i16 @test_vctp8q(i32 %a) { 18; CHECK-LABEL: test_vctp8q: 19; CHECK: @ %bb.0: @ %entry 20; CHECK-NEXT: vctp.8 r0 21; CHECK-NEXT: vmrs r0, p0 22; CHECK-NEXT: bx lr 23entry: 24 %0 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a) 25 %1 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %0) 26 %2 = trunc i32 %1 to i16 27 ret i16 %2 28} 29 30define arm_aapcs_vfpcc zeroext i16 @test_vctp8q_m(i32 %a, i16 zeroext %p) { 31; CHECK-LABEL: test_vctp8q_m: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vmsr p0, r1 34; CHECK-NEXT: vpst 35; CHECK-NEXT: vctpt.8 r0 36; CHECK-NEXT: vmrs r0, p0 37; CHECK-NEXT: bx lr 38entry: 39 %0 = zext i16 %p to i32 40 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 41 %2 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %a) 42 %3 = and <16 x i1> %1, %2 43 %4 = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %3) 44 %5 = trunc i32 %4 to i16 45 ret i16 %5 46} 47 48define arm_aapcs_vfpcc zeroext i16 @test_vctp16q(i32 %a) { 49; CHECK-LABEL: test_vctp16q: 50; CHECK: @ %bb.0: @ %entry 51; CHECK-NEXT: vctp.16 r0 52; CHECK-NEXT: vmrs r0, p0 53; CHECK-NEXT: bx lr 54entry: 55 %0 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a) 56 %1 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %0) 57 %2 = trunc i32 %1 to i16 58 ret i16 %2 59} 60 61define arm_aapcs_vfpcc zeroext i16 @test_vctp16q_m(i32 %a, i16 zeroext %p) { 62; CHECK-LABEL: test_vctp16q_m: 63; CHECK: @ %bb.0: @ %entry 64; CHECK-NEXT: vmsr p0, r1 65; CHECK-NEXT: vpst 66; CHECK-NEXT: vctpt.16 r0 67; CHECK-NEXT: vmrs r0, p0 68; CHECK-NEXT: bx lr 69entry: 70 %0 = zext i16 %p to i32 71 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 72 %2 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %a) 73 %3 = and <8 x i1> %1, %2 74 %4 = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %3) 75 %5 = trunc i32 %4 to i16 76 ret i16 %5 77} 78 79define arm_aapcs_vfpcc zeroext i16 @test_vctp32q(i32 %a) { 80; CHECK-LABEL: test_vctp32q: 81; CHECK: @ %bb.0: @ %entry 82; CHECK-NEXT: vctp.32 r0 83; CHECK-NEXT: vmrs r0, p0 84; CHECK-NEXT: bx lr 85entry: 86 %0 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a) 87 %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0) 88 %2 = trunc i32 %1 to i16 89 ret i16 %2 90} 91 92define arm_aapcs_vfpcc zeroext i16 @test_vctp32q_m(i32 %a, i16 zeroext %p) { 93; CHECK-LABEL: test_vctp32q_m: 94; CHECK: @ %bb.0: @ %entry 95; CHECK-NEXT: vmsr p0, r1 96; CHECK-NEXT: vpst 97; CHECK-NEXT: vctpt.32 r0 98; CHECK-NEXT: vmrs r0, p0 99; CHECK-NEXT: bx lr 100entry: 101 %0 = zext i16 %p to i32 102 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 103 %2 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %a) 104 %3 = and <4 x i1> %1, %2 105 %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3) 106 %5 = trunc i32 %4 to i16 107 ret i16 %5 108} 109 110define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) { 111; CHECK-LABEL: test_vctp64q: 112; CHECK: @ %bb.0: @ %entry 113; CHECK-NEXT: vctp.64 r0 114; CHECK-NEXT: vmrs r0, p0 115; CHECK-NEXT: bx lr 116entry: 117 %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) 118 %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0) 119 %2 = trunc i32 %1 to i16 120 ret i16 %2 121} 122 123define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) { 124; CHECK-LABEL: test_vctp64q_m: 125; CHECK: @ %bb.0: @ %entry 126; CHECK-NEXT: vmsr p0, r1 127; CHECK-NEXT: vpst 128; CHECK-NEXT: vctpt.64 r0 129; CHECK-NEXT: vmrs r0, p0 130; CHECK-NEXT: bx lr 131entry: 132 %0 = zext i16 %p to i32 133 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 134 %2 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) 135 %3 = and <4 x i1> %1, %2 136 %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3) 137 %5 = trunc i32 %4 to i16 138 ret i16 %5 139} 140 141define arm_aapcs_vfpcc <16 x i8> @test_vpselq_i8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) #2 { 142; CHECK-LABEL: test_vpselq_i8: 143; CHECK: @ %bb.0: @ %entry 144; CHECK-NEXT: vmsr p0, r0 145; CHECK-NEXT: vpsel q0, q0, q1 146; CHECK-NEXT: bx lr 147entry: 148 %0 = zext i16 %p to i32 149 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 150 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 151 ret <16 x i8> %2 152} 153 154define arm_aapcs_vfpcc <8 x i16> @test_vpselq_i16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) #2 { 155; CHECK-LABEL: test_vpselq_i16: 156; CHECK: @ %bb.0: @ %entry 157; CHECK-NEXT: vmsr p0, r0 158; CHECK-NEXT: vpsel q0, q0, q1 159; CHECK-NEXT: bx lr 160entry: 161 %0 = zext i16 %p to i32 162 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 163 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 164 ret <8 x i16> %2 165} 166 167define arm_aapcs_vfpcc <8 x half> @test_vpselq_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) #2 { 168; CHECK-LABEL: test_vpselq_f16: 169; CHECK: @ %bb.0: @ %entry 170; CHECK-NEXT: vmsr p0, r0 171; CHECK-NEXT: vpsel q0, q0, q1 172; CHECK-NEXT: bx lr 173entry: 174 %0 = zext i16 %p to i32 175 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 176 %2 = select <8 x i1> %1, <8 x half> %a, <8 x half> %b 177 ret <8 x half> %2 178} 179 180define arm_aapcs_vfpcc <4 x i32> @test_vpselq_i32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) #2 { 181; CHECK-LABEL: test_vpselq_i32: 182; CHECK: @ %bb.0: @ %entry 183; CHECK-NEXT: vmsr p0, r0 184; CHECK-NEXT: vpsel q0, q0, q1 185; CHECK-NEXT: bx lr 186entry: 187 %0 = zext i16 %p to i32 188 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 189 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 190 ret <4 x i32> %2 191} 192 193define arm_aapcs_vfpcc <4 x float> @test_vpselq_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) #2 { 194; CHECK-LABEL: test_vpselq_f32: 195; CHECK: @ %bb.0: @ %entry 196; CHECK-NEXT: vmsr p0, r0 197; CHECK-NEXT: vpsel q0, q0, q1 198; CHECK-NEXT: bx lr 199entry: 200 %0 = zext i16 %p to i32 201 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 202 %2 = select <4 x i1> %1, <4 x float> %a, <4 x float> %b 203 ret <4 x float> %2 204} 205 206define arm_aapcs_vfpcc <2 x i64> @test_vpselq_i64(<2 x i64> %a, <2 x i64> %b, i16 zeroext %p) #2 { 207; CHECK-LABEL: test_vpselq_i64: 208; CHECK: @ %bb.0: @ %entry 209; CHECK-NEXT: vmsr p0, r0 210; CHECK-NEXT: vpsel q0, q0, q1 211; CHECK-NEXT: bx lr 212entry: 213 %0 = zext i16 %p to i32 214 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 215 %2 = bitcast <2 x i64> %a to <4 x i32> 216 %3 = bitcast <2 x i64> %b to <4 x i32> 217 %4 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %3 218 %5 = bitcast <4 x i32> %4 to <2 x i64> 219 ret <2 x i64> %5 220} 221