1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi --verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s 3 4declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) 5 6define arm_aapcs_vfpcc <4 x i32> @vpt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 7; CHECK-LABEL: vpt_block: 8; CHECK: @ %bb.0: @ %entry 9; CHECK-NEXT: vpt.s32 ge, q0, q2 10; CHECK-NEXT: vorrt q0, q1, q2 11; CHECK-NEXT: bx lr 12entry: 13 %0 = icmp sge <4 x i32> %a, %c 14 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 15 ret <4 x i32> %1 16} 17 18define arm_aapcs_vfpcc <4 x i32> @vptt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 19; CHECK-LABEL: vptt_block: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vmov q3, q0 22; CHECK-NEXT: vptt.s32 ge, q0, q2 23; CHECK-NEXT: vorrt q3, q1, q2 24; CHECK-NEXT: vorrt q0, q3, q2 25; CHECK-NEXT: bx lr 26entry: 27 %0 = icmp sge <4 x i32> %a, %c 28 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 29 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %1, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 30 ret <4 x i32> %2 31} 32 33define arm_aapcs_vfpcc <4 x i32> @vpttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 34; CHECK-LABEL: vpttt_block: 35; CHECK: @ %bb.0: @ %entry 36; CHECK-NEXT: vpttt.s32 ge, q0, q2 37; CHECK-NEXT: vorrt q0, q1, q2 38; CHECK-NEXT: vorrt q0, q1, q2 39; CHECK-NEXT: vorrt q0, q1, q2 40; CHECK-NEXT: bx lr 41entry: 42 %0 = icmp sge <4 x i32> %a, %c 43 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 44 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) 45 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2) 46 ret <4 x i32> %3 47} 48 49define arm_aapcs_vfpcc <4 x i32> @vptttt_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 50; CHECK-LABEL: vptttt_block: 51; CHECK: @ %bb.0: @ %entry 52; CHECK-NEXT: vptttt.s32 ge, q0, q2 53; CHECK-NEXT: vorrt q0, q1, q2 54; CHECK-NEXT: vorrt q0, q1, q2 55; CHECK-NEXT: vorrt q0, q1, q2 56; CHECK-NEXT: vorrt q0, q1, q2 57; CHECK-NEXT: bx lr 58entry: 59 %0 = icmp sge <4 x i32> %a, %c 60 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 61 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) 62 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2) 63 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) 64 ret <4 x i32> %4 65} 66 67 68define arm_aapcs_vfpcc <4 x i32> @vpte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 69; CHECK-LABEL: vpte_block: 70; CHECK: @ %bb.0: @ %entry 71; CHECK-NEXT: vpte.s32 ge, q0, q2 72; CHECK-NEXT: vorrt q0, q1, q2 73; CHECK-NEXT: vmove q0, q2 74; CHECK-NEXT: bx lr 75entry: 76 %0 = icmp sge <4 x i32> %a, %c 77 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 78 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 79 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) 80 ret <4 x i32> %3 81} 82 83define arm_aapcs_vfpcc <4 x i32> @vptte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 84; CHECK-LABEL: vptte_block: 85; CHECK: @ %bb.0: @ %entry 86; CHECK-NEXT: vptte.s32 ge, q0, q2 87; CHECK-NEXT: vorrt q0, q1, q2 88; CHECK-NEXT: vorrt q0, q1, q2 89; CHECK-NEXT: vorre q0, q1, q2 90; CHECK-NEXT: bx lr 91entry: 92 %0 = icmp sge <4 x i32> %a, %c 93 %1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 94 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 95 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %2) 96 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3) 97 ret <4 x i32> %4 98} 99 100define arm_aapcs_vfpcc <4 x i32> @vptee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 101; CHECK-LABEL: vptee_block: 102; CHECK: @ %bb.0: @ %entry 103; CHECK-NEXT: vptee.s32 ge, q0, q2 104; CHECK-NEXT: vorrt q0, q1, q2 105; CHECK-NEXT: vorre q0, q1, q2 106; CHECK-NEXT: vorre q0, q1, q2 107; CHECK-NEXT: bx lr 108entry: 109 %0 = icmp sge <4 x i32> %a, %c 110 %1 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 111 %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 112 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %2) 113 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %1, <4 x i32> %3) 114 ret <4 x i32> %4 115} 116 117define arm_aapcs_vfpcc <4 x i32> @vptet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 118; CHECK-LABEL: vptet_block: 119; CHECK: @ %bb.0: @ %entry 120; CHECK-NEXT: vptet.s32 ge, q0, q2 121; CHECK-NEXT: vorrt q0, q1, q2 122; CHECK-NEXT: vmove q0, q2 123; CHECK-NEXT: vmovt q0, q2 124; CHECK-NEXT: bx lr 125entry: 126 %0 = icmp sge <4 x i32> %a, %c 127 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 128 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 129 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) 130 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) 131 ret <4 x i32> %4 132} 133 134define arm_aapcs_vfpcc <4 x i32> @vpttet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 135; CHECK-LABEL: vpttet_block: 136; CHECK: @ %bb.0: @ %entry 137; CHECK-NEXT: vpttet.s32 ge, q0, q2 138; CHECK-NEXT: vorrt q0, q1, q2 139; CHECK-NEXT: vmovt q0, q2 140; CHECK-NEXT: vmove q0, q2 141; CHECK-NEXT: vmovt q0, q2 142; CHECK-NEXT: bx lr 143entry: 144 %0 = icmp sge <4 x i32> %a, %c 145 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 146 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 147 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) 148 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3) 149 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4) 150 ret <4 x i32> %5 151} 152 153define arm_aapcs_vfpcc <4 x i32> @vptett_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 154; CHECK-LABEL: vptett_block: 155; CHECK: @ %bb.0: @ %entry 156; CHECK-NEXT: vptett.s32 ge, q0, q2 157; CHECK-NEXT: vorrt q0, q1, q2 158; CHECK-NEXT: vmove q0, q2 159; CHECK-NEXT: vmovt q0, q2 160; CHECK-NEXT: vmovt q0, q2 161; CHECK-NEXT: bx lr 162entry: 163 %0 = icmp sge <4 x i32> %a, %c 164 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 165 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 166 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) 167 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) 168 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4) 169 ret <4 x i32> %5 170} 171 172define arm_aapcs_vfpcc <4 x i32> @vpteet_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 173; CHECK-LABEL: vpteet_block: 174; CHECK: @ %bb.0: @ %entry 175; CHECK-NEXT: vpteet.s32 ge, q0, q2 176; CHECK-NEXT: vorrt q0, q1, q2 177; CHECK-NEXT: vmove q0, q2 178; CHECK-NEXT: vmove q0, q2 179; CHECK-NEXT: vmovt q0, q2 180; CHECK-NEXT: bx lr 181entry: 182 %0 = icmp sge <4 x i32> %a, %c 183 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 184 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 185 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) 186 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3) 187 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %4) 188 ret <4 x i32> %5 189} 190 191define arm_aapcs_vfpcc <4 x i32> @vpteee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 192; CHECK-LABEL: vpteee_block: 193; CHECK: @ %bb.0: @ %entry 194; CHECK-NEXT: vpteee.s32 ge, q0, q2 195; CHECK-NEXT: vorrt q0, q1, q2 196; CHECK-NEXT: vmove q0, q2 197; CHECK-NEXT: vmove q0, q2 198; CHECK-NEXT: vmove q0, q2 199; CHECK-NEXT: bx lr 200entry: 201 %0 = icmp sge <4 x i32> %a, %c 202 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 203 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 204 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) 205 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3) 206 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4) 207 ret <4 x i32> %5 208} 209 210define arm_aapcs_vfpcc <4 x i32> @vptete_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 211; CHECK-LABEL: vptete_block: 212; CHECK: @ %bb.0: @ %entry 213; CHECK-NEXT: vptete.s32 ge, q0, q2 214; CHECK-NEXT: vorrt q0, q1, q2 215; CHECK-NEXT: vmove q0, q2 216; CHECK-NEXT: vmovt q0, q2 217; CHECK-NEXT: vmove q0, q2 218; CHECK-NEXT: bx lr 219entry: 220 %0 = icmp sge <4 x i32> %a, %c 221 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 222 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 223 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %1) 224 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) 225 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4) 226 ret <4 x i32> %5 227} 228 229define arm_aapcs_vfpcc <4 x i32> @vpttte_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 230; CHECK-LABEL: vpttte_block: 231; CHECK: @ %bb.0: @ %entry 232; CHECK-NEXT: vpttte.s32 ge, q0, q2 233; CHECK-NEXT: vorrt q0, q1, q2 234; CHECK-NEXT: vmovt q0, q2 235; CHECK-NEXT: vmovt q0, q2 236; CHECK-NEXT: vmove q0, q2 237; CHECK-NEXT: bx lr 238entry: 239 %0 = icmp sge <4 x i32> %a, %c 240 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 241 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 242 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) 243 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %3) 244 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4) 245 ret <4 x i32> %5 246} 247 248define arm_aapcs_vfpcc <4 x i32> @vpttee_block(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 249; CHECK-LABEL: vpttee_block: 250; CHECK: @ %bb.0: @ %entry 251; CHECK-NEXT: vpttee.s32 ge, q0, q2 252; CHECK-NEXT: vorrt q0, q1, q2 253; CHECK-NEXT: vmovt q0, q2 254; CHECK-NEXT: vmove q0, q2 255; CHECK-NEXT: vmove q0, q2 256; CHECK-NEXT: bx lr 257entry: 258 %0 = icmp sge <4 x i32> %a, %c 259 %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %b, <4 x i32> %c, <4 x i1> %0, <4 x i32> %a) 260 %2 = xor <4 x i1> %0, <i1 true, i1 true, i1 true, i1 true> 261 %3 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %0, <4 x i32> %1) 262 %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %3) 263 %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %c, <4 x i32> %c, <4 x i1> %2, <4 x i32> %4) 264 ret <4 x i32> %5 265} 266