1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3 4define arm_aapcs_vfpcc <4 x i32> @add_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 5; CHECK-LABEL: add_v4i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vpt.i32 eq, q0, zr 8; CHECK-NEXT: vaddt.i32 q0, q1, q2 9; CHECK-NEXT: bx lr 10entry: 11 %c = icmp eq <4 x i32> %z, zeroinitializer 12 %a = add <4 x i32> %x, %y 13 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 14 ret <4 x i32> %b 15} 16 17define arm_aapcs_vfpcc <8 x i16> @add_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 18; CHECK-LABEL: add_v8i16: 19; CHECK: @ %bb.0: @ %entry 20; CHECK-NEXT: vpt.i16 eq, q0, zr 21; CHECK-NEXT: vaddt.i16 q0, q1, q2 22; CHECK-NEXT: bx lr 23entry: 24 %c = icmp eq <8 x i16> %z, zeroinitializer 25 %a = add <8 x i16> %x, %y 26 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 27 ret <8 x i16> %b 28} 29 30define arm_aapcs_vfpcc <16 x i8> @add_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 31; CHECK-LABEL: add_v16i8: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vpt.i8 eq, q0, zr 34; CHECK-NEXT: vaddt.i8 q0, q1, q2 35; CHECK-NEXT: bx lr 36entry: 37 %c = icmp eq <16 x i8> %z, zeroinitializer 38 %a = add <16 x i8> %x, %y 39 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 40 ret <16 x i8> %b 41} 42 43define arm_aapcs_vfpcc <4 x i32> @sub_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 44; CHECK-LABEL: sub_v4i32: 45; CHECK: @ %bb.0: @ %entry 46; CHECK-NEXT: vpt.i32 eq, q0, zr 47; CHECK-NEXT: vsubt.i32 q0, q1, q2 48; CHECK-NEXT: bx lr 49entry: 50 %c = icmp eq <4 x i32> %z, zeroinitializer 51 %a = sub <4 x i32> %x, %y 52 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 53 ret <4 x i32> %b 54} 55 56define arm_aapcs_vfpcc <8 x i16> @sub_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 57; CHECK-LABEL: sub_v8i16: 58; CHECK: @ %bb.0: @ %entry 59; CHECK-NEXT: vpt.i16 eq, q0, zr 60; CHECK-NEXT: vsubt.i16 q0, q1, q2 61; CHECK-NEXT: bx lr 62entry: 63 %c = icmp eq <8 x i16> %z, zeroinitializer 64 %a = sub <8 x i16> %x, %y 65 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 66 ret <8 x i16> %b 67} 68 69define arm_aapcs_vfpcc <16 x i8> @sub_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 70; CHECK-LABEL: sub_v16i8: 71; CHECK: @ %bb.0: @ %entry 72; CHECK-NEXT: vpt.i8 eq, q0, zr 73; CHECK-NEXT: vsubt.i8 q0, q1, q2 74; CHECK-NEXT: bx lr 75entry: 76 %c = icmp eq <16 x i8> %z, zeroinitializer 77 %a = sub <16 x i8> %x, %y 78 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 79 ret <16 x i8> %b 80} 81 82define arm_aapcs_vfpcc <4 x i32> @mul_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 83; CHECK-LABEL: mul_v4i32: 84; CHECK: @ %bb.0: @ %entry 85; CHECK-NEXT: vpt.i32 eq, q0, zr 86; CHECK-NEXT: vmult.i32 q0, q1, q2 87; CHECK-NEXT: bx lr 88entry: 89 %c = icmp eq <4 x i32> %z, zeroinitializer 90 %a = mul <4 x i32> %x, %y 91 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 92 ret <4 x i32> %b 93} 94 95define arm_aapcs_vfpcc <8 x i16> @mul_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 96; CHECK-LABEL: mul_v8i16: 97; CHECK: @ %bb.0: @ %entry 98; CHECK-NEXT: vpt.i16 eq, q0, zr 99; CHECK-NEXT: vmult.i16 q0, q1, q2 100; CHECK-NEXT: bx lr 101entry: 102 %c = icmp eq <8 x i16> %z, zeroinitializer 103 %a = mul <8 x i16> %x, %y 104 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 105 ret <8 x i16> %b 106} 107 108define arm_aapcs_vfpcc <16 x i8> @mul_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 109; CHECK-LABEL: mul_v16i8: 110; CHECK: @ %bb.0: @ %entry 111; CHECK-NEXT: vpt.i8 eq, q0, zr 112; CHECK-NEXT: vmult.i8 q0, q1, q2 113; CHECK-NEXT: bx lr 114entry: 115 %c = icmp eq <16 x i8> %z, zeroinitializer 116 %a = mul <16 x i8> %x, %y 117 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 118 ret <16 x i8> %b 119} 120 121define arm_aapcs_vfpcc <4 x i32> @and_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 122; CHECK-LABEL: and_v4i32: 123; CHECK: @ %bb.0: @ %entry 124; CHECK-NEXT: vpt.i32 eq, q0, zr 125; CHECK-NEXT: vandt q0, q1, q2 126; CHECK-NEXT: bx lr 127entry: 128 %c = icmp eq <4 x i32> %z, zeroinitializer 129 %a = and <4 x i32> %x, %y 130 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 131 ret <4 x i32> %b 132} 133 134define arm_aapcs_vfpcc <8 x i16> @and_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 135; CHECK-LABEL: and_v8i16: 136; CHECK: @ %bb.0: @ %entry 137; CHECK-NEXT: vpt.i16 eq, q0, zr 138; CHECK-NEXT: vandt q0, q1, q2 139; CHECK-NEXT: bx lr 140entry: 141 %c = icmp eq <8 x i16> %z, zeroinitializer 142 %a = and <8 x i16> %x, %y 143 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 144 ret <8 x i16> %b 145} 146 147define arm_aapcs_vfpcc <16 x i8> @and_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 148; CHECK-LABEL: and_v16i8: 149; CHECK: @ %bb.0: @ %entry 150; CHECK-NEXT: vpt.i8 eq, q0, zr 151; CHECK-NEXT: vandt q0, q1, q2 152; CHECK-NEXT: bx lr 153entry: 154 %c = icmp eq <16 x i8> %z, zeroinitializer 155 %a = and <16 x i8> %x, %y 156 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 157 ret <16 x i8> %b 158} 159 160define arm_aapcs_vfpcc <4 x i32> @or_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 161; CHECK-LABEL: or_v4i32: 162; CHECK: @ %bb.0: @ %entry 163; CHECK-NEXT: vpt.i32 eq, q0, zr 164; CHECK-NEXT: vorrt q0, q1, q2 165; CHECK-NEXT: bx lr 166entry: 167 %c = icmp eq <4 x i32> %z, zeroinitializer 168 %a = or <4 x i32> %x, %y 169 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 170 ret <4 x i32> %b 171} 172 173define arm_aapcs_vfpcc <8 x i16> @or_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 174; CHECK-LABEL: or_v8i16: 175; CHECK: @ %bb.0: @ %entry 176; CHECK-NEXT: vpt.i16 eq, q0, zr 177; CHECK-NEXT: vorrt q0, q1, q2 178; CHECK-NEXT: bx lr 179entry: 180 %c = icmp eq <8 x i16> %z, zeroinitializer 181 %a = or <8 x i16> %x, %y 182 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 183 ret <8 x i16> %b 184} 185 186define arm_aapcs_vfpcc <16 x i8> @or_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 187; CHECK-LABEL: or_v16i8: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vpt.i8 eq, q0, zr 190; CHECK-NEXT: vorrt q0, q1, q2 191; CHECK-NEXT: bx lr 192entry: 193 %c = icmp eq <16 x i8> %z, zeroinitializer 194 %a = or <16 x i8> %x, %y 195 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 196 ret <16 x i8> %b 197} 198 199define arm_aapcs_vfpcc <4 x i32> @xor_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 200; CHECK-LABEL: xor_v4i32: 201; CHECK: @ %bb.0: @ %entry 202; CHECK-NEXT: vpt.i32 eq, q0, zr 203; CHECK-NEXT: veort q0, q1, q2 204; CHECK-NEXT: bx lr 205entry: 206 %c = icmp eq <4 x i32> %z, zeroinitializer 207 %a = xor <4 x i32> %x, %y 208 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 209 ret <4 x i32> %b 210} 211 212define arm_aapcs_vfpcc <8 x i16> @xor_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 213; CHECK-LABEL: xor_v8i16: 214; CHECK: @ %bb.0: @ %entry 215; CHECK-NEXT: vpt.i16 eq, q0, zr 216; CHECK-NEXT: veort q0, q1, q2 217; CHECK-NEXT: bx lr 218entry: 219 %c = icmp eq <8 x i16> %z, zeroinitializer 220 %a = xor <8 x i16> %x, %y 221 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 222 ret <8 x i16> %b 223} 224 225define arm_aapcs_vfpcc <16 x i8> @xor_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 226; CHECK-LABEL: xor_v16i8: 227; CHECK: @ %bb.0: @ %entry 228; CHECK-NEXT: vpt.i8 eq, q0, zr 229; CHECK-NEXT: veort q0, q1, q2 230; CHECK-NEXT: bx lr 231entry: 232 %c = icmp eq <16 x i8> %z, zeroinitializer 233 %a = xor <16 x i8> %x, %y 234 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 235 ret <16 x i8> %b 236} 237 238define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 239; CHECK-LABEL: andnot_v4i32: 240; CHECK: @ %bb.0: @ %entry 241; CHECK-NEXT: vpt.i32 eq, q0, zr 242; CHECK-NEXT: vbict q0, q1, q2 243; CHECK-NEXT: bx lr 244entry: 245 %c = icmp eq <4 x i32> %z, zeroinitializer 246 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 247 %a = and <4 x i32> %x, %y1 248 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 249 ret <4 x i32> %b 250} 251 252define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 253; CHECK-LABEL: andnot_v8i16: 254; CHECK: @ %bb.0: @ %entry 255; CHECK-NEXT: vpt.i16 eq, q0, zr 256; CHECK-NEXT: vbict q0, q1, q2 257; CHECK-NEXT: bx lr 258entry: 259 %c = icmp eq <8 x i16> %z, zeroinitializer 260 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 261 %a = and <8 x i16> %x, %y1 262 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 263 ret <8 x i16> %b 264} 265 266define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 267; CHECK-LABEL: andnot_v16i8: 268; CHECK: @ %bb.0: @ %entry 269; CHECK-NEXT: vpt.i8 eq, q0, zr 270; CHECK-NEXT: vbict q0, q1, q2 271; CHECK-NEXT: bx lr 272entry: 273 %c = icmp eq <16 x i8> %z, zeroinitializer 274 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 275 %a = and <16 x i8> %x, %y1 276 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 277 ret <16 x i8> %b 278} 279 280define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 281; CHECK-LABEL: ornot_v4i32: 282; CHECK: @ %bb.0: @ %entry 283; CHECK-NEXT: vpt.i32 eq, q0, zr 284; CHECK-NEXT: vornt q0, q1, q2 285; CHECK-NEXT: bx lr 286entry: 287 %c = icmp eq <4 x i32> %z, zeroinitializer 288 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 289 %a = or <4 x i32> %x, %y1 290 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 291 ret <4 x i32> %b 292} 293 294define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 295; CHECK-LABEL: ornot_v8i16: 296; CHECK: @ %bb.0: @ %entry 297; CHECK-NEXT: vpt.i16 eq, q0, zr 298; CHECK-NEXT: vornt q0, q1, q2 299; CHECK-NEXT: bx lr 300entry: 301 %c = icmp eq <8 x i16> %z, zeroinitializer 302 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 303 %a = or <8 x i16> %x, %y1 304 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 305 ret <8 x i16> %b 306} 307 308define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 309; CHECK-LABEL: ornot_v16i8: 310; CHECK: @ %bb.0: @ %entry 311; CHECK-NEXT: vpt.i8 eq, q0, zr 312; CHECK-NEXT: vornt q0, q1, q2 313; CHECK-NEXT: bx lr 314entry: 315 %c = icmp eq <16 x i8> %z, zeroinitializer 316 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 317 %a = or <16 x i8> %x, %y1 318 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 319 ret <16 x i8> %b 320} 321 322define arm_aapcs_vfpcc <4 x float> @fadd_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) { 323; CHECK-LABEL: fadd_v4f32: 324; CHECK: @ %bb.0: @ %entry 325; CHECK-NEXT: vpt.f32 eq, q0, zr 326; CHECK-NEXT: vaddt.f32 q0, q1, q2 327; CHECK-NEXT: bx lr 328entry: 329 %c = fcmp oeq <4 x float> %z, zeroinitializer 330 %a = fadd <4 x float> %x, %y 331 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z 332 ret <4 x float> %b 333} 334 335define arm_aapcs_vfpcc <8 x half> @fadd_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) { 336; CHECK-LABEL: fadd_v8f16: 337; CHECK: @ %bb.0: @ %entry 338; CHECK-NEXT: vpt.f16 eq, q0, zr 339; CHECK-NEXT: vaddt.f16 q0, q1, q2 340; CHECK-NEXT: bx lr 341entry: 342 %c = fcmp oeq <8 x half> %z, zeroinitializer 343 %a = fadd <8 x half> %x, %y 344 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z 345 ret <8 x half> %b 346} 347 348define arm_aapcs_vfpcc <4 x float> @fsub_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) { 349; CHECK-LABEL: fsub_v4f32: 350; CHECK: @ %bb.0: @ %entry 351; CHECK-NEXT: vpt.f32 eq, q0, zr 352; CHECK-NEXT: vsubt.f32 q0, q1, q2 353; CHECK-NEXT: bx lr 354entry: 355 %c = fcmp oeq <4 x float> %z, zeroinitializer 356 %a = fsub <4 x float> %x, %y 357 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z 358 ret <4 x float> %b 359} 360 361define arm_aapcs_vfpcc <8 x half> @fsub_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) { 362; CHECK-LABEL: fsub_v8f16: 363; CHECK: @ %bb.0: @ %entry 364; CHECK-NEXT: vpt.f16 eq, q0, zr 365; CHECK-NEXT: vsubt.f16 q0, q1, q2 366; CHECK-NEXT: bx lr 367entry: 368 %c = fcmp oeq <8 x half> %z, zeroinitializer 369 %a = fsub <8 x half> %x, %y 370 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z 371 ret <8 x half> %b 372} 373 374define arm_aapcs_vfpcc <4 x float> @fmul_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) { 375; CHECK-LABEL: fmul_v4f32: 376; CHECK: @ %bb.0: @ %entry 377; CHECK-NEXT: vpt.f32 eq, q0, zr 378; CHECK-NEXT: vmult.f32 q0, q1, q2 379; CHECK-NEXT: bx lr 380entry: 381 %c = fcmp oeq <4 x float> %z, zeroinitializer 382 %a = fmul <4 x float> %x, %y 383 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z 384 ret <4 x float> %b 385} 386 387define arm_aapcs_vfpcc <8 x half> @fmul_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) { 388; CHECK-LABEL: fmul_v8f16: 389; CHECK: @ %bb.0: @ %entry 390; CHECK-NEXT: vpt.f16 eq, q0, zr 391; CHECK-NEXT: vmult.f16 q0, q1, q2 392; CHECK-NEXT: bx lr 393entry: 394 %c = fcmp oeq <8 x half> %z, zeroinitializer 395 %a = fmul <8 x half> %x, %y 396 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z 397 ret <8 x half> %b 398} 399 400define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 401; CHECK-LABEL: icmp_slt_v4i32: 402; CHECK: @ %bb.0: @ %entry 403; CHECK-NEXT: vpt.i32 eq, q0, zr 404; CHECK-NEXT: vmint.s32 q0, q1, q2 405; CHECK-NEXT: bx lr 406entry: 407 %c = icmp eq <4 x i32> %z, zeroinitializer 408 %a1 = icmp slt <4 x i32> %x, %y 409 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 410 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 411 ret <4 x i32> %b 412} 413 414define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 415; CHECK-LABEL: icmp_slt_v8i16: 416; CHECK: @ %bb.0: @ %entry 417; CHECK-NEXT: vpt.i16 eq, q0, zr 418; CHECK-NEXT: vmint.s16 q0, q1, q2 419; CHECK-NEXT: bx lr 420entry: 421 %c = icmp eq <8 x i16> %z, zeroinitializer 422 %a1 = icmp slt <8 x i16> %x, %y 423 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 424 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 425 ret <8 x i16> %b 426} 427 428define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 429; CHECK-LABEL: icmp_slt_v16i8: 430; CHECK: @ %bb.0: @ %entry 431; CHECK-NEXT: vpt.i8 eq, q0, zr 432; CHECK-NEXT: vmint.s8 q0, q1, q2 433; CHECK-NEXT: bx lr 434entry: 435 %c = icmp eq <16 x i8> %z, zeroinitializer 436 %a1 = icmp slt <16 x i8> %x, %y 437 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 438 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 439 ret <16 x i8> %b 440} 441 442define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 443; CHECK-LABEL: icmp_sgt_v4i32: 444; CHECK: @ %bb.0: @ %entry 445; CHECK-NEXT: vpt.i32 eq, q0, zr 446; CHECK-NEXT: vmaxt.s32 q0, q1, q2 447; CHECK-NEXT: bx lr 448entry: 449 %c = icmp eq <4 x i32> %z, zeroinitializer 450 %a1 = icmp sgt <4 x i32> %x, %y 451 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 452 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 453 ret <4 x i32> %b 454} 455 456define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 457; CHECK-LABEL: icmp_sgt_v8i16: 458; CHECK: @ %bb.0: @ %entry 459; CHECK-NEXT: vpt.i16 eq, q0, zr 460; CHECK-NEXT: vmaxt.s16 q0, q1, q2 461; CHECK-NEXT: bx lr 462entry: 463 %c = icmp eq <8 x i16> %z, zeroinitializer 464 %a1 = icmp sgt <8 x i16> %x, %y 465 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 466 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 467 ret <8 x i16> %b 468} 469 470define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 471; CHECK-LABEL: icmp_sgt_v16i8: 472; CHECK: @ %bb.0: @ %entry 473; CHECK-NEXT: vpt.i8 eq, q0, zr 474; CHECK-NEXT: vmaxt.s8 q0, q1, q2 475; CHECK-NEXT: bx lr 476entry: 477 %c = icmp eq <16 x i8> %z, zeroinitializer 478 %a1 = icmp sgt <16 x i8> %x, %y 479 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 480 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 481 ret <16 x i8> %b 482} 483 484define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 485; CHECK-LABEL: icmp_ult_v4i32: 486; CHECK: @ %bb.0: @ %entry 487; CHECK-NEXT: vpt.i32 eq, q0, zr 488; CHECK-NEXT: vmint.u32 q0, q1, q2 489; CHECK-NEXT: bx lr 490entry: 491 %c = icmp eq <4 x i32> %z, zeroinitializer 492 %a1 = icmp ult <4 x i32> %x, %y 493 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 494 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 495 ret <4 x i32> %b 496} 497 498define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 499; CHECK-LABEL: icmp_ult_v8i16: 500; CHECK: @ %bb.0: @ %entry 501; CHECK-NEXT: vpt.i16 eq, q0, zr 502; CHECK-NEXT: vmint.u16 q0, q1, q2 503; CHECK-NEXT: bx lr 504entry: 505 %c = icmp eq <8 x i16> %z, zeroinitializer 506 %a1 = icmp ult <8 x i16> %x, %y 507 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 508 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 509 ret <8 x i16> %b 510} 511 512define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 513; CHECK-LABEL: icmp_ult_v16i8: 514; CHECK: @ %bb.0: @ %entry 515; CHECK-NEXT: vpt.i8 eq, q0, zr 516; CHECK-NEXT: vmint.u8 q0, q1, q2 517; CHECK-NEXT: bx lr 518entry: 519 %c = icmp eq <16 x i8> %z, zeroinitializer 520 %a1 = icmp ult <16 x i8> %x, %y 521 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 522 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 523 ret <16 x i8> %b 524} 525 526define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 527; CHECK-LABEL: icmp_ugt_v4i32: 528; CHECK: @ %bb.0: @ %entry 529; CHECK-NEXT: vpt.i32 eq, q0, zr 530; CHECK-NEXT: vmaxt.u32 q0, q1, q2 531; CHECK-NEXT: bx lr 532entry: 533 %c = icmp eq <4 x i32> %z, zeroinitializer 534 %a1 = icmp ugt <4 x i32> %x, %y 535 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 536 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 537 ret <4 x i32> %b 538} 539 540define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 541; CHECK-LABEL: icmp_ugt_v8i16: 542; CHECK: @ %bb.0: @ %entry 543; CHECK-NEXT: vpt.i16 eq, q0, zr 544; CHECK-NEXT: vmaxt.u16 q0, q1, q2 545; CHECK-NEXT: bx lr 546entry: 547 %c = icmp eq <8 x i16> %z, zeroinitializer 548 %a1 = icmp ugt <8 x i16> %x, %y 549 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 550 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 551 ret <8 x i16> %b 552} 553 554define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 555; CHECK-LABEL: icmp_ugt_v16i8: 556; CHECK: @ %bb.0: @ %entry 557; CHECK-NEXT: vpt.i8 eq, q0, zr 558; CHECK-NEXT: vmaxt.u8 q0, q1, q2 559; CHECK-NEXT: bx lr 560entry: 561 %c = icmp eq <16 x i8> %z, zeroinitializer 562 %a1 = icmp ugt <16 x i8> %x, %y 563 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 564 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 565 ret <16 x i8> %b 566} 567 568define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) { 569; CHECK-LABEL: fcmp_fast_olt_v4f32: 570; CHECK: @ %bb.0: @ %entry 571; CHECK-NEXT: vpt.f32 eq, q0, zr 572; CHECK-NEXT: vminnmt.f32 q0, q1, q2 573; CHECK-NEXT: bx lr 574entry: 575 %c = fcmp oeq <4 x float> %z, zeroinitializer 576 %a1 = fcmp fast olt <4 x float> %x, %y 577 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 578 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z 579 ret <4 x float> %b 580} 581 582define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) { 583; CHECK-LABEL: fcmp_fast_olt_v8f16: 584; CHECK: @ %bb.0: @ %entry 585; CHECK-NEXT: vpt.f16 eq, q0, zr 586; CHECK-NEXT: vminnmt.f16 q0, q1, q2 587; CHECK-NEXT: bx lr 588entry: 589 %c = fcmp oeq <8 x half> %z, zeroinitializer 590 %a1 = fcmp fast olt <8 x half> %x, %y 591 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 592 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z 593 ret <8 x half> %b 594} 595 596define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32(<4 x float> %z, <4 x float> %x, <4 x float> %y) { 597; CHECK-LABEL: fcmp_fast_ogt_v4f32: 598; CHECK: @ %bb.0: @ %entry 599; CHECK-NEXT: vpt.f32 eq, q0, zr 600; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2 601; CHECK-NEXT: bx lr 602entry: 603 %c = fcmp oeq <4 x float> %z, zeroinitializer 604 %a1 = fcmp fast ogt <4 x float> %x, %y 605 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 606 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z 607 ret <4 x float> %b 608} 609 610define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y) { 611; CHECK-LABEL: fcmp_fast_ogt_v8f16: 612; CHECK: @ %bb.0: @ %entry 613; CHECK-NEXT: vpt.f16 eq, q0, zr 614; CHECK-NEXT: vmaxnmt.f16 q0, q1, q2 615; CHECK-NEXT: bx lr 616entry: 617 %c = fcmp oeq <8 x half> %z, zeroinitializer 618 %a1 = fcmp fast ogt <8 x half> %x, %y 619 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 620 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z 621 ret <8 x half> %b 622} 623 624define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 625; CHECK-LABEL: sadd_sat_v4i32: 626; CHECK: @ %bb.0: @ %entry 627; CHECK-NEXT: vpt.i32 eq, q0, zr 628; CHECK-NEXT: vqaddt.s32 q0, q1, q2 629; CHECK-NEXT: bx lr 630entry: 631 %c = icmp eq <4 x i32> %z, zeroinitializer 632 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 633 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 634 ret <4 x i32> %b 635} 636 637define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 638; CHECK-LABEL: sadd_sat_v8i16: 639; CHECK: @ %bb.0: @ %entry 640; CHECK-NEXT: vpt.i16 eq, q0, zr 641; CHECK-NEXT: vqaddt.s16 q0, q1, q2 642; CHECK-NEXT: bx lr 643entry: 644 %c = icmp eq <8 x i16> %z, zeroinitializer 645 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 646 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 647 ret <8 x i16> %b 648} 649 650define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 651; CHECK-LABEL: sadd_sat_v16i8: 652; CHECK: @ %bb.0: @ %entry 653; CHECK-NEXT: vpt.i8 eq, q0, zr 654; CHECK-NEXT: vqaddt.s8 q0, q1, q2 655; CHECK-NEXT: bx lr 656entry: 657 %c = icmp eq <16 x i8> %z, zeroinitializer 658 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 659 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 660 ret <16 x i8> %b 661} 662 663define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 664; CHECK-LABEL: uadd_sat_v4i32: 665; CHECK: @ %bb.0: @ %entry 666; CHECK-NEXT: vpt.i32 eq, q0, zr 667; CHECK-NEXT: vqaddt.u32 q0, q1, q2 668; CHECK-NEXT: bx lr 669entry: 670 %c = icmp eq <4 x i32> %z, zeroinitializer 671 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 672 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 673 ret <4 x i32> %b 674} 675 676define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 677; CHECK-LABEL: uadd_sat_v8i16: 678; CHECK: @ %bb.0: @ %entry 679; CHECK-NEXT: vpt.i16 eq, q0, zr 680; CHECK-NEXT: vqaddt.u16 q0, q1, q2 681; CHECK-NEXT: bx lr 682entry: 683 %c = icmp eq <8 x i16> %z, zeroinitializer 684 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 685 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 686 ret <8 x i16> %b 687} 688 689define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 690; CHECK-LABEL: uadd_sat_v16i8: 691; CHECK: @ %bb.0: @ %entry 692; CHECK-NEXT: vpt.i8 eq, q0, zr 693; CHECK-NEXT: vqaddt.u8 q0, q1, q2 694; CHECK-NEXT: bx lr 695entry: 696 %c = icmp eq <16 x i8> %z, zeroinitializer 697 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 698 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 699 ret <16 x i8> %b 700} 701 702define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 703; CHECK-LABEL: ssub_sat_v4i32: 704; CHECK: @ %bb.0: @ %entry 705; CHECK-NEXT: vpt.i32 eq, q0, zr 706; CHECK-NEXT: vqsubt.s32 q0, q1, q2 707; CHECK-NEXT: bx lr 708entry: 709 %c = icmp eq <4 x i32> %z, zeroinitializer 710 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 711 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 712 ret <4 x i32> %b 713} 714 715define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 716; CHECK-LABEL: ssub_sat_v8i16: 717; CHECK: @ %bb.0: @ %entry 718; CHECK-NEXT: vpt.i16 eq, q0, zr 719; CHECK-NEXT: vqsubt.s16 q0, q1, q2 720; CHECK-NEXT: bx lr 721entry: 722 %c = icmp eq <8 x i16> %z, zeroinitializer 723 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 724 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 725 ret <8 x i16> %b 726} 727 728define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 729; CHECK-LABEL: ssub_sat_v16i8: 730; CHECK: @ %bb.0: @ %entry 731; CHECK-NEXT: vpt.i8 eq, q0, zr 732; CHECK-NEXT: vqsubt.s8 q0, q1, q2 733; CHECK-NEXT: bx lr 734entry: 735 %c = icmp eq <16 x i8> %z, zeroinitializer 736 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 737 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 738 ret <16 x i8> %b 739} 740 741define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32(<4 x i32> %z, <4 x i32> %x, <4 x i32> %y) { 742; CHECK-LABEL: usub_sat_v4i32: 743; CHECK: @ %bb.0: @ %entry 744; CHECK-NEXT: vpt.i32 eq, q0, zr 745; CHECK-NEXT: vqsubt.u32 q0, q1, q2 746; CHECK-NEXT: bx lr 747entry: 748 %c = icmp eq <4 x i32> %z, zeroinitializer 749 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 750 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 751 ret <4 x i32> %b 752} 753 754define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16(<8 x i16> %z, <8 x i16> %x, <8 x i16> %y) { 755; CHECK-LABEL: usub_sat_v8i16: 756; CHECK: @ %bb.0: @ %entry 757; CHECK-NEXT: vpt.i16 eq, q0, zr 758; CHECK-NEXT: vqsubt.u16 q0, q1, q2 759; CHECK-NEXT: bx lr 760entry: 761 %c = icmp eq <8 x i16> %z, zeroinitializer 762 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 763 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 764 ret <8 x i16> %b 765} 766 767define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8(<16 x i8> %z, <16 x i8> %x, <16 x i8> %y) { 768; CHECK-LABEL: usub_sat_v16i8: 769; CHECK: @ %bb.0: @ %entry 770; CHECK-NEXT: vpt.i8 eq, q0, zr 771; CHECK-NEXT: vqsubt.u8 q0, q1, q2 772; CHECK-NEXT: bx lr 773entry: 774 %c = icmp eq <16 x i8> %z, zeroinitializer 775 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 776 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 777 ret <16 x i8> %b 778} 779 780define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) { 781; CHECK-LABEL: addqr_v4i32: 782; CHECK: @ %bb.0: @ %entry 783; CHECK-NEXT: vpt.i32 eq, q0, zr 784; CHECK-NEXT: vaddt.i32 q0, q1, r0 785; CHECK-NEXT: bx lr 786entry: 787 %c = icmp eq <4 x i32> %z, zeroinitializer 788 %i = insertelement <4 x i32> undef, i32 %y, i32 0 789 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 790 %a = add <4 x i32> %x, %ys 791 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 792 ret <4 x i32> %b 793} 794 795define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) { 796; CHECK-LABEL: addqr_v8i16: 797; CHECK: @ %bb.0: @ %entry 798; CHECK-NEXT: vpt.i16 eq, q0, zr 799; CHECK-NEXT: vaddt.i16 q0, q1, r0 800; CHECK-NEXT: bx lr 801entry: 802 %c = icmp eq <8 x i16> %z, zeroinitializer 803 %i = insertelement <8 x i16> undef, i16 %y, i32 0 804 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 805 %a = add <8 x i16> %x, %ys 806 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 807 ret <8 x i16> %b 808} 809 810define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) { 811; CHECK-LABEL: addqr_v16i8: 812; CHECK: @ %bb.0: @ %entry 813; CHECK-NEXT: vpt.i8 eq, q0, zr 814; CHECK-NEXT: vaddt.i8 q0, q1, r0 815; CHECK-NEXT: bx lr 816entry: 817 %c = icmp eq <16 x i8> %z, zeroinitializer 818 %i = insertelement <16 x i8> undef, i8 %y, i32 0 819 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 820 %a = add <16 x i8> %x, %ys 821 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 822 ret <16 x i8> %b 823} 824 825define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) { 826; CHECK-LABEL: subqr_v4i32: 827; CHECK: @ %bb.0: @ %entry 828; CHECK-NEXT: vpt.i32 eq, q0, zr 829; CHECK-NEXT: vsubt.i32 q0, q1, r0 830; CHECK-NEXT: bx lr 831entry: 832 %c = icmp eq <4 x i32> %z, zeroinitializer 833 %i = insertelement <4 x i32> undef, i32 %y, i32 0 834 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 835 %a = sub <4 x i32> %x, %ys 836 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 837 ret <4 x i32> %b 838} 839 840define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) { 841; CHECK-LABEL: subqr_v8i16: 842; CHECK: @ %bb.0: @ %entry 843; CHECK-NEXT: vpt.i16 eq, q0, zr 844; CHECK-NEXT: vsubt.i16 q0, q1, r0 845; CHECK-NEXT: bx lr 846entry: 847 %c = icmp eq <8 x i16> %z, zeroinitializer 848 %i = insertelement <8 x i16> undef, i16 %y, i32 0 849 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 850 %a = sub <8 x i16> %x, %ys 851 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 852 ret <8 x i16> %b 853} 854 855define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) { 856; CHECK-LABEL: subqr_v16i8: 857; CHECK: @ %bb.0: @ %entry 858; CHECK-NEXT: vpt.i8 eq, q0, zr 859; CHECK-NEXT: vsubt.i8 q0, q1, r0 860; CHECK-NEXT: bx lr 861entry: 862 %c = icmp eq <16 x i8> %z, zeroinitializer 863 %i = insertelement <16 x i8> undef, i8 %y, i32 0 864 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 865 %a = sub <16 x i8> %x, %ys 866 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 867 ret <16 x i8> %b 868} 869 870define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) { 871; CHECK-LABEL: mulqr_v4i32: 872; CHECK: @ %bb.0: @ %entry 873; CHECK-NEXT: vpt.i32 eq, q0, zr 874; CHECK-NEXT: vmult.i32 q0, q1, r0 875; CHECK-NEXT: bx lr 876entry: 877 %c = icmp eq <4 x i32> %z, zeroinitializer 878 %i = insertelement <4 x i32> undef, i32 %y, i32 0 879 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 880 %a = mul <4 x i32> %x, %ys 881 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 882 ret <4 x i32> %b 883} 884 885define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) { 886; CHECK-LABEL: mulqr_v8i16: 887; CHECK: @ %bb.0: @ %entry 888; CHECK-NEXT: vpt.i16 eq, q0, zr 889; CHECK-NEXT: vmult.i16 q0, q1, r0 890; CHECK-NEXT: bx lr 891entry: 892 %c = icmp eq <8 x i16> %z, zeroinitializer 893 %i = insertelement <8 x i16> undef, i16 %y, i32 0 894 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 895 %a = mul <8 x i16> %x, %ys 896 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 897 ret <8 x i16> %b 898} 899 900define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) { 901; CHECK-LABEL: mulqr_v16i8: 902; CHECK: @ %bb.0: @ %entry 903; CHECK-NEXT: vpt.i8 eq, q0, zr 904; CHECK-NEXT: vmult.i8 q0, q1, r0 905; CHECK-NEXT: bx lr 906entry: 907 %c = icmp eq <16 x i8> %z, zeroinitializer 908 %i = insertelement <16 x i8> undef, i8 %y, i32 0 909 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 910 %a = mul <16 x i8> %x, %ys 911 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 912 ret <16 x i8> %b 913} 914 915define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) { 916; CHECK-LABEL: faddqr_v4f32: 917; CHECK: @ %bb.0: @ %entry 918; CHECK-NEXT: vmov r0, s8 919; CHECK-NEXT: vpt.f32 eq, q0, zr 920; CHECK-NEXT: vaddt.f32 q0, q1, r0 921; CHECK-NEXT: bx lr 922entry: 923 %c = fcmp oeq <4 x float> %z, zeroinitializer 924 %i = insertelement <4 x float> undef, float %y, i32 0 925 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 926 %a = fadd <4 x float> %x, %ys 927 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z 928 ret <4 x float> %b 929} 930 931define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) { 932; CHECK-LABEL: faddqr_v8f16: 933; CHECK: @ %bb.0: @ %entry 934; CHECK-NEXT: vmov.f16 r0, s8 935; CHECK-NEXT: vpt.f16 eq, q0, zr 936; CHECK-NEXT: vaddt.f16 q0, q1, r0 937; CHECK-NEXT: bx lr 938entry: 939 %c = fcmp oeq <8 x half> %z, zeroinitializer 940 %i = insertelement <8 x half> undef, half %y, i32 0 941 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 942 %a = fadd <8 x half> %x, %ys 943 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z 944 ret <8 x half> %b 945} 946 947define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) { 948; CHECK-LABEL: fsubqr_v4f32: 949; CHECK: @ %bb.0: @ %entry 950; CHECK-NEXT: vmov r0, s8 951; CHECK-NEXT: vpt.f32 eq, q0, zr 952; CHECK-NEXT: vsubt.f32 q0, q1, r0 953; CHECK-NEXT: bx lr 954entry: 955 %c = fcmp oeq <4 x float> %z, zeroinitializer 956 %i = insertelement <4 x float> undef, float %y, i32 0 957 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 958 %a = fsub <4 x float> %x, %ys 959 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z 960 ret <4 x float> %b 961} 962 963define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) { 964; CHECK-LABEL: fsubqr_v8f16: 965; CHECK: @ %bb.0: @ %entry 966; CHECK-NEXT: vmov.f16 r0, s8 967; CHECK-NEXT: vpt.f16 eq, q0, zr 968; CHECK-NEXT: vsubt.f16 q0, q1, r0 969; CHECK-NEXT: bx lr 970entry: 971 %c = fcmp oeq <8 x half> %z, zeroinitializer 972 %i = insertelement <8 x half> undef, half %y, i32 0 973 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 974 %a = fsub <8 x half> %x, %ys 975 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z 976 ret <8 x half> %b 977} 978 979define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32(<4 x float> %z, <4 x float> %x, float %y) { 980; CHECK-LABEL: fmulqr_v4f32: 981; CHECK: @ %bb.0: @ %entry 982; CHECK-NEXT: vmov r0, s8 983; CHECK-NEXT: vpt.f32 eq, q0, zr 984; CHECK-NEXT: vmult.f32 q0, q1, r0 985; CHECK-NEXT: bx lr 986entry: 987 %c = fcmp oeq <4 x float> %z, zeroinitializer 988 %i = insertelement <4 x float> undef, float %y, i32 0 989 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 990 %a = fmul <4 x float> %x, %ys 991 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %z 992 ret <4 x float> %b 993} 994 995define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16(<8 x half> %z, <8 x half> %x, half %y) { 996; CHECK-LABEL: fmulqr_v8f16: 997; CHECK: @ %bb.0: @ %entry 998; CHECK-NEXT: vmov.f16 r0, s8 999; CHECK-NEXT: vpt.f16 eq, q0, zr 1000; CHECK-NEXT: vmult.f16 q0, q1, r0 1001; CHECK-NEXT: bx lr 1002entry: 1003 %c = fcmp oeq <8 x half> %z, zeroinitializer 1004 %i = insertelement <8 x half> undef, half %y, i32 0 1005 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1006 %a = fmul <8 x half> %x, %ys 1007 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %z 1008 ret <8 x half> %b 1009} 1010 1011define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) { 1012; CHECK-LABEL: sadd_satqr_v4i32: 1013; CHECK: @ %bb.0: @ %entry 1014; CHECK-NEXT: vpt.i32 eq, q0, zr 1015; CHECK-NEXT: vqaddt.s32 q0, q1, r0 1016; CHECK-NEXT: bx lr 1017entry: 1018 %c = icmp eq <4 x i32> %z, zeroinitializer 1019 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1020 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1021 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1022 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 1023 ret <4 x i32> %b 1024} 1025 1026define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) { 1027; CHECK-LABEL: sadd_satqr_v8i16: 1028; CHECK: @ %bb.0: @ %entry 1029; CHECK-NEXT: vpt.i16 eq, q0, zr 1030; CHECK-NEXT: vqaddt.s16 q0, q1, r0 1031; CHECK-NEXT: bx lr 1032entry: 1033 %c = icmp eq <8 x i16> %z, zeroinitializer 1034 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1035 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1036 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1037 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 1038 ret <8 x i16> %b 1039} 1040 1041define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) { 1042; CHECK-LABEL: sadd_satqr_v16i8: 1043; CHECK: @ %bb.0: @ %entry 1044; CHECK-NEXT: vpt.i8 eq, q0, zr 1045; CHECK-NEXT: vqaddt.s8 q0, q1, r0 1046; CHECK-NEXT: bx lr 1047entry: 1048 %c = icmp eq <16 x i8> %z, zeroinitializer 1049 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1050 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1051 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1052 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 1053 ret <16 x i8> %b 1054} 1055 1056define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) { 1057; CHECK-LABEL: uadd_satqr_v4i32: 1058; CHECK: @ %bb.0: @ %entry 1059; CHECK-NEXT: vpt.i32 eq, q0, zr 1060; CHECK-NEXT: vqaddt.u32 q0, q1, r0 1061; CHECK-NEXT: bx lr 1062entry: 1063 %c = icmp eq <4 x i32> %z, zeroinitializer 1064 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1065 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1066 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1067 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 1068 ret <4 x i32> %b 1069} 1070 1071define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) { 1072; CHECK-LABEL: uadd_satqr_v8i16: 1073; CHECK: @ %bb.0: @ %entry 1074; CHECK-NEXT: vpt.i16 eq, q0, zr 1075; CHECK-NEXT: vqaddt.u16 q0, q1, r0 1076; CHECK-NEXT: bx lr 1077entry: 1078 %c = icmp eq <8 x i16> %z, zeroinitializer 1079 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1080 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1081 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1082 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 1083 ret <8 x i16> %b 1084} 1085 1086define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) { 1087; CHECK-LABEL: uadd_satqr_v16i8: 1088; CHECK: @ %bb.0: @ %entry 1089; CHECK-NEXT: vpt.i8 eq, q0, zr 1090; CHECK-NEXT: vqaddt.u8 q0, q1, r0 1091; CHECK-NEXT: bx lr 1092entry: 1093 %c = icmp eq <16 x i8> %z, zeroinitializer 1094 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1095 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1096 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1097 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 1098 ret <16 x i8> %b 1099} 1100 1101define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) { 1102; CHECK-LABEL: ssub_satqr_v4i32: 1103; CHECK: @ %bb.0: @ %entry 1104; CHECK-NEXT: vpt.i32 eq, q0, zr 1105; CHECK-NEXT: vqsubt.s32 q0, q1, r0 1106; CHECK-NEXT: bx lr 1107entry: 1108 %c = icmp eq <4 x i32> %z, zeroinitializer 1109 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1110 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1111 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1112 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 1113 ret <4 x i32> %b 1114} 1115 1116define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) { 1117; CHECK-LABEL: ssub_satqr_v8i16: 1118; CHECK: @ %bb.0: @ %entry 1119; CHECK-NEXT: vpt.i16 eq, q0, zr 1120; CHECK-NEXT: vqsubt.s16 q0, q1, r0 1121; CHECK-NEXT: bx lr 1122entry: 1123 %c = icmp eq <8 x i16> %z, zeroinitializer 1124 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1125 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1126 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1127 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 1128 ret <8 x i16> %b 1129} 1130 1131define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) { 1132; CHECK-LABEL: ssub_satqr_v16i8: 1133; CHECK: @ %bb.0: @ %entry 1134; CHECK-NEXT: vpt.i8 eq, q0, zr 1135; CHECK-NEXT: vqsubt.s8 q0, q1, r0 1136; CHECK-NEXT: bx lr 1137entry: 1138 %c = icmp eq <16 x i8> %z, zeroinitializer 1139 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1140 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1141 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1142 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 1143 ret <16 x i8> %b 1144} 1145 1146define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32(<4 x i32> %z, <4 x i32> %x, i32 %y) { 1147; CHECK-LABEL: usub_satqr_v4i32: 1148; CHECK: @ %bb.0: @ %entry 1149; CHECK-NEXT: vpt.i32 eq, q0, zr 1150; CHECK-NEXT: vqsubt.u32 q0, q1, r0 1151; CHECK-NEXT: bx lr 1152entry: 1153 %c = icmp eq <4 x i32> %z, zeroinitializer 1154 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1155 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1156 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1157 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %z 1158 ret <4 x i32> %b 1159} 1160 1161define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16(<8 x i16> %z, <8 x i16> %x, i16 %y) { 1162; CHECK-LABEL: usub_satqr_v8i16: 1163; CHECK: @ %bb.0: @ %entry 1164; CHECK-NEXT: vpt.i16 eq, q0, zr 1165; CHECK-NEXT: vqsubt.u16 q0, q1, r0 1166; CHECK-NEXT: bx lr 1167entry: 1168 %c = icmp eq <8 x i16> %z, zeroinitializer 1169 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1170 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1171 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1172 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %z 1173 ret <8 x i16> %b 1174} 1175 1176define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8(<16 x i8> %z, <16 x i8> %x, i8 %y) { 1177; CHECK-LABEL: usub_satqr_v16i8: 1178; CHECK: @ %bb.0: @ %entry 1179; CHECK-NEXT: vpt.i8 eq, q0, zr 1180; CHECK-NEXT: vqsubt.u8 q0, q1, r0 1181; CHECK-NEXT: bx lr 1182entry: 1183 %c = icmp eq <16 x i8> %z, zeroinitializer 1184 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1185 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1186 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1187 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %z 1188 ret <16 x i8> %b 1189} 1190 1191declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2) 1192declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2) 1193declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2) 1194declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2) 1195declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2) 1196declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2) 1197declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2) 1198declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2) 1199declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2) 1200declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2) 1201declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2) 1202declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2) 1203