1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP 4 5define arm_aapcs_vfpcc float @fmin_v2f32(<2 x float> %x) { 6; CHECK-LABEL: fmin_v2f32: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vminnm.f32 s0, s0, s1 9; CHECK-NEXT: bx lr 10entry: 11 %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x) 12 ret float %z 13} 14 15define arm_aapcs_vfpcc float @fmin_v4f32(<4 x float> %x) { 16; CHECK-FP-LABEL: fmin_v4f32: 17; CHECK-FP: @ %bb.0: @ %entry 18; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 19; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 20; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 21; CHECK-FP-NEXT: bx lr 22; 23; CHECK-NOFP-LABEL: fmin_v4f32: 24; CHECK-NOFP: @ %bb.0: @ %entry 25; CHECK-NOFP-NEXT: vminnm.f32 s4, s0, s1 26; CHECK-NOFP-NEXT: vminnm.f32 s4, s4, s2 27; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s3 28; CHECK-NOFP-NEXT: bx lr 29entry: 30 %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x) 31 ret float %z 32} 33 34; FIXME fminnum (vector) -> fminnum (scalar) ? 35define arm_aapcs_vfpcc float @fmin_v8f32(<8 x float> %x) { 36; CHECK-FP-LABEL: fmin_v8f32: 37; CHECK-FP: @ %bb.0: @ %entry 38; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 39; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 40; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 41; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 42; CHECK-FP-NEXT: bx lr 43; 44; CHECK-NOFP-LABEL: fmin_v8f32: 45; CHECK-NOFP: @ %bb.0: @ %entry 46; CHECK-NOFP-NEXT: vcmp.f32 s5, s1 47; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 48; CHECK-NOFP-NEXT: vcmp.f32 s4, s0 49; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s5 50; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 51; CHECK-NOFP-NEXT: vcmp.f32 s6, s2 52; CHECK-NOFP-NEXT: vselgt.f32 s10, s0, s4 53; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 54; CHECK-NOFP-NEXT: vcmp.f32 s7, s3 55; CHECK-NOFP-NEXT: vselgt.f32 s12, s2, s6 56; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 57; CHECK-NOFP-NEXT: vselgt.f32 s0, s3, s7 58; CHECK-NOFP-NEXT: vminnm.f32 s2, s10, s8 59; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s12 60; CHECK-NOFP-NEXT: vminnm.f32 s0, s2, s0 61; CHECK-NOFP-NEXT: bx lr 62entry: 63 %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x) 64 ret float %z 65} 66 67define arm_aapcs_vfpcc half @fmin_v4f16(<4 x half> %x) { 68; CHECK-FP-LABEL: fmin_v4f16: 69; CHECK-FP: @ %bb.0: @ %entry 70; CHECK-FP-NEXT: vmovx.f16 s4, s1 71; CHECK-FP-NEXT: vmovx.f16 s6, s0 72; CHECK-FP-NEXT: vminnm.f16 s4, s1, s4 73; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6 74; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 75; CHECK-FP-NEXT: bx lr 76; 77; CHECK-NOFP-LABEL: fmin_v4f16: 78; CHECK-NOFP: @ %bb.0: @ %entry 79; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 80; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 81; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 82; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 83; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 84; CHECK-NOFP-NEXT: bx lr 85entry: 86 %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x) 87 ret half %z 88} 89 90define arm_aapcs_vfpcc half @fmin_v8f16(<8 x half> %x) { 91; CHECK-FP-LABEL: fmin_v8f16: 92; CHECK-FP: @ %bb.0: @ %entry 93; CHECK-FP-NEXT: vrev32.16 q1, q0 94; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 95; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 96; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 97; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 98; CHECK-FP-NEXT: bx lr 99; 100; CHECK-NOFP-LABEL: fmin_v8f16: 101; CHECK-NOFP: @ %bb.0: @ %entry 102; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 103; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 104; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 105; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 106; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 107; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 108; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 109; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s2 110; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 111; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s3 112; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 113; CHECK-NOFP-NEXT: bx lr 114entry: 115 %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x) 116 ret half %z 117} 118 119define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) { 120; CHECK-FP-LABEL: fmin_v16f16: 121; CHECK-FP: @ %bb.0: @ %entry 122; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 123; CHECK-FP-NEXT: vrev32.16 q1, q0 124; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 125; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 126; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 127; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 128; CHECK-FP-NEXT: bx lr 129; 130; CHECK-NOFP-LABEL: fmin_v16f16: 131; CHECK-NOFP: @ %bb.0: @ %entry 132; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 133; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 134; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 135; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 136; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 137; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 138; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 139; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 140; CHECK-NOFP-NEXT: vcmp.f16 s5, s1 141; CHECK-NOFP-NEXT: vselgt.f16 s10, s0, s4 142; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 143; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 144; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 145; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 146; CHECK-NOFP-NEXT: vselgt.f16 s10, s1, s5 147; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 148; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 149; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 150; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 151; CHECK-NOFP-NEXT: vcmp.f16 s6, s2 152; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 153; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 154; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 155; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 156; CHECK-NOFP-NEXT: vselgt.f16 s10, s2, s6 157; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 158; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 159; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 160; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 161; CHECK-NOFP-NEXT: vcmp.f16 s7, s3 162; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 163; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 164; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 165; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 166; CHECK-NOFP-NEXT: vselgt.f16 s10, s3, s7 167; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 168; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 169; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 170; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 171; CHECK-NOFP-NEXT: bx lr 172entry: 173 %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x) 174 ret half %z 175} 176 177define arm_aapcs_vfpcc double @fmin_v1f64(<1 x double> %x) { 178; CHECK-LABEL: fmin_v1f64: 179; CHECK: @ %bb.0: @ %entry 180; CHECK-NEXT: bx lr 181entry: 182 %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x) 183 ret double %z 184} 185 186define arm_aapcs_vfpcc double @fmin_v2f64(<2 x double> %x) { 187; CHECK-LABEL: fmin_v2f64: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vminnm.f64 d0, d0, d1 190; CHECK-NEXT: bx lr 191entry: 192 %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x) 193 ret double %z 194} 195 196define arm_aapcs_vfpcc double @fmin_v4f64(<4 x double> %x) { 197; CHECK-LABEL: fmin_v4f64: 198; CHECK: @ %bb.0: @ %entry 199; CHECK-NEXT: vcmp.f64 d3, d1 200; CHECK-NEXT: vmrs APSR_nzcv, fpscr 201; CHECK-NEXT: vcmp.f64 d2, d0 202; CHECK-NEXT: vselgt.f64 d4, d1, d3 203; CHECK-NEXT: vmrs APSR_nzcv, fpscr 204; CHECK-NEXT: vselgt.f64 d0, d0, d2 205; CHECK-NEXT: vminnm.f64 d0, d0, d4 206; CHECK-NEXT: bx lr 207entry: 208 %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x) 209 ret double %z 210} 211 212define arm_aapcs_vfpcc float @fmin_v2f32_nofast(<2 x float> %x) { 213; CHECK-LABEL: fmin_v2f32_nofast: 214; CHECK: @ %bb.0: @ %entry 215; CHECK-NEXT: vminnm.f32 s0, s0, s1 216; CHECK-NEXT: bx lr 217entry: 218 %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x) 219 ret float %z 220} 221 222define arm_aapcs_vfpcc float @fmin_v4f32_nofast(<4 x float> %x) { 223; CHECK-FP-LABEL: fmin_v4f32_nofast: 224; CHECK-FP: @ %bb.0: @ %entry 225; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 226; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 227; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 228; CHECK-FP-NEXT: bx lr 229; 230; CHECK-NOFP-LABEL: fmin_v4f32_nofast: 231; CHECK-NOFP: @ %bb.0: @ %entry 232; CHECK-NOFP-NEXT: vminnm.f32 s4, s0, s1 233; CHECK-NOFP-NEXT: vminnm.f32 s4, s4, s2 234; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s3 235; CHECK-NOFP-NEXT: bx lr 236entry: 237 %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x) 238 ret float %z 239} 240 241define arm_aapcs_vfpcc float @fmin_v8f32_nofast(<8 x float> %x) { 242; CHECK-FP-LABEL: fmin_v8f32_nofast: 243; CHECK-FP: @ %bb.0: @ %entry 244; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 245; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 246; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 247; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 248; CHECK-FP-NEXT: bx lr 249; 250; CHECK-NOFP-LABEL: fmin_v8f32_nofast: 251; CHECK-NOFP: @ %bb.0: @ %entry 252; CHECK-NOFP-NEXT: vminnm.f32 s10, s0, s4 253; CHECK-NOFP-NEXT: vminnm.f32 s8, s1, s5 254; CHECK-NOFP-NEXT: vminnm.f32 s8, s10, s8 255; CHECK-NOFP-NEXT: vminnm.f32 s10, s2, s6 256; CHECK-NOFP-NEXT: vminnm.f32 s8, s8, s10 257; CHECK-NOFP-NEXT: vminnm.f32 s0, s3, s7 258; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0 259; CHECK-NOFP-NEXT: bx lr 260entry: 261 %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x) 262 ret float %z 263} 264 265define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) { 266; CHECK-FP-LABEL: fmin_v4f16_nofast: 267; CHECK-FP: @ %bb.0: @ %entry 268; CHECK-FP-NEXT: vmovx.f16 s4, s1 269; CHECK-FP-NEXT: vmovx.f16 s6, s0 270; CHECK-FP-NEXT: vminnm.f16 s4, s1, s4 271; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6 272; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 273; CHECK-FP-NEXT: bx lr 274; 275; CHECK-NOFP-LABEL: fmin_v4f16_nofast: 276; CHECK-NOFP: @ %bb.0: @ %entry 277; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 278; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 279; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 280; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 281; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 282; CHECK-NOFP-NEXT: bx lr 283entry: 284 %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x) 285 ret half %z 286} 287 288define arm_aapcs_vfpcc half @fmin_v8f16_nofast(<8 x half> %x) { 289; CHECK-FP-LABEL: fmin_v8f16_nofast: 290; CHECK-FP: @ %bb.0: @ %entry 291; CHECK-FP-NEXT: vrev32.16 q1, q0 292; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 293; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 294; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 295; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 296; CHECK-FP-NEXT: bx lr 297; 298; CHECK-NOFP-LABEL: fmin_v8f16_nofast: 299; CHECK-NOFP: @ %bb.0: @ %entry 300; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 301; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 302; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 303; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 304; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 305; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 306; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 307; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s2 308; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 309; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s3 310; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 311; CHECK-NOFP-NEXT: bx lr 312entry: 313 %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x) 314 ret half %z 315} 316 317define arm_aapcs_vfpcc half @fmin_v16f16_nofast(<16 x half> %x) { 318; CHECK-FP-LABEL: fmin_v16f16_nofast: 319; CHECK-FP: @ %bb.0: @ %entry 320; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 321; CHECK-FP-NEXT: vrev32.16 q1, q0 322; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 323; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 324; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 325; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 326; CHECK-FP-NEXT: bx lr 327; 328; CHECK-NOFP-LABEL: fmin_v16f16_nofast: 329; CHECK-NOFP: @ %bb.0: @ %entry 330; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 331; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 332; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 333; CHECK-NOFP-NEXT: vminnm.f16 s10, s0, s4 334; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 335; CHECK-NOFP-NEXT: vminnm.f16 s10, s1, s5 336; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 337; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 338; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 339; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 340; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10 341; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 342; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 343; CHECK-NOFP-NEXT: vminnm.f16 s10, s2, s6 344; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 345; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 346; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10 347; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 348; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 349; CHECK-NOFP-NEXT: vminnm.f16 s10, s3, s7 350; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 351; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4 352; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 353; CHECK-NOFP-NEXT: bx lr 354entry: 355 %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x) 356 ret half %z 357} 358 359define arm_aapcs_vfpcc double @fmin_v1f64_nofast(<1 x double> %x) { 360; CHECK-LABEL: fmin_v1f64_nofast: 361; CHECK: @ %bb.0: @ %entry 362; CHECK-NEXT: bx lr 363entry: 364 %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x) 365 ret double %z 366} 367 368define arm_aapcs_vfpcc double @fmin_v2f64_nofast(<2 x double> %x) { 369; CHECK-LABEL: fmin_v2f64_nofast: 370; CHECK: @ %bb.0: @ %entry 371; CHECK-NEXT: vminnm.f64 d0, d0, d1 372; CHECK-NEXT: bx lr 373entry: 374 %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x) 375 ret double %z 376} 377 378define arm_aapcs_vfpcc double @fmin_v4f64_nofast(<4 x double> %x) { 379; CHECK-LABEL: fmin_v4f64_nofast: 380; CHECK: @ %bb.0: @ %entry 381; CHECK-NEXT: vminnm.f64 d4, d1, d3 382; CHECK-NEXT: vminnm.f64 d0, d0, d2 383; CHECK-NEXT: vminnm.f64 d0, d0, d4 384; CHECK-NEXT: bx lr 385entry: 386 %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x) 387 ret double %z 388} 389 390define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) { 391; CHECK-LABEL: fmin_v2f32_acc: 392; CHECK: @ %bb.0: @ %entry 393; CHECK-NEXT: vminnm.f32 s0, s0, s1 394; CHECK-NEXT: vminnm.f32 s0, s4, s0 395; CHECK-NEXT: bx lr 396entry: 397 %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x) 398 %c = fcmp fast olt float %y, %z 399 %r = select i1 %c, float %y, float %z 400 ret float %r 401} 402 403define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) { 404; CHECK-FP-LABEL: fmin_v4f32_acc: 405; CHECK-FP: @ %bb.0: @ %entry 406; CHECK-FP-NEXT: vminnm.f32 s6, s2, s3 407; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 408; CHECK-FP-NEXT: vminnm.f32 s0, s0, s6 409; CHECK-FP-NEXT: vminnm.f32 s0, s4, s0 410; CHECK-FP-NEXT: bx lr 411; 412; CHECK-NOFP-LABEL: fmin_v4f32_acc: 413; CHECK-NOFP: @ %bb.0: @ %entry 414; CHECK-NOFP-NEXT: vminnm.f32 s6, s0, s1 415; CHECK-NOFP-NEXT: vminnm.f32 s6, s6, s2 416; CHECK-NOFP-NEXT: vminnm.f32 s0, s6, s3 417; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s0 418; CHECK-NOFP-NEXT: bx lr 419entry: 420 %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x) 421 %c = fcmp fast olt float %y, %z 422 %r = select i1 %c, float %y, float %z 423 ret float %r 424} 425 426define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) { 427; CHECK-FP-LABEL: fmin_v8f32_acc: 428; CHECK-FP: @ %bb.0: @ %entry 429; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 430; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 431; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 432; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 433; CHECK-FP-NEXT: vminnm.f32 s0, s8, s0 434; CHECK-FP-NEXT: bx lr 435; 436; CHECK-NOFP-LABEL: fmin_v8f32_acc: 437; CHECK-NOFP: @ %bb.0: @ %entry 438; CHECK-NOFP-NEXT: vcmp.f32 s5, s1 439; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 440; CHECK-NOFP-NEXT: vcmp.f32 s4, s0 441; CHECK-NOFP-NEXT: vselgt.f32 s10, s1, s5 442; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 443; CHECK-NOFP-NEXT: vcmp.f32 s6, s2 444; CHECK-NOFP-NEXT: vselgt.f32 s12, s0, s4 445; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 446; CHECK-NOFP-NEXT: vcmp.f32 s7, s3 447; CHECK-NOFP-NEXT: vselgt.f32 s14, s2, s6 448; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 449; CHECK-NOFP-NEXT: vselgt.f32 s0, s3, s7 450; CHECK-NOFP-NEXT: vminnm.f32 s2, s12, s10 451; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s14 452; CHECK-NOFP-NEXT: vminnm.f32 s0, s2, s0 453; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0 454; CHECK-NOFP-NEXT: bx lr 455entry: 456 %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x) 457 %c = fcmp fast olt float %y, %z 458 %r = select i1 %c, float %y, float %z 459 ret float %r 460} 461 462define arm_aapcs_vfpcc void @fmin_v4f16_acc(<4 x half> %x, half* %yy) { 463; CHECK-FP-LABEL: fmin_v4f16_acc: 464; CHECK-FP: @ %bb.0: @ %entry 465; CHECK-FP-NEXT: vmovx.f16 s4, s1 466; CHECK-FP-NEXT: vmovx.f16 s6, s0 467; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6 468; CHECK-FP-NEXT: vminnm.f16 s4, s1, s4 469; CHECK-FP-NEXT: vldr.16 s2, [r0] 470; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 471; CHECK-FP-NEXT: vminnm.f16 s0, s2, s0 472; CHECK-FP-NEXT: vstr.16 s0, [r0] 473; CHECK-FP-NEXT: bx lr 474; 475; CHECK-NOFP-LABEL: fmin_v4f16_acc: 476; CHECK-NOFP: @ %bb.0: @ %entry 477; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 478; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 479; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 480; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 481; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 482; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 483; CHECK-NOFP-NEXT: vminnm.f16 s0, s2, s0 484; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 485; CHECK-NOFP-NEXT: bx lr 486entry: 487 %y = load half, half* %yy 488 %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x) 489 %c = fcmp fast olt half %y, %z 490 %r = select i1 %c, half %y, half %z 491 store half %r, half* %yy 492 ret void 493} 494 495define arm_aapcs_vfpcc void @fmin_v2f16_acc(<2 x half> %x, half* %yy) { 496; CHECK-LABEL: fmin_v2f16_acc: 497; CHECK: @ %bb.0: @ %entry 498; CHECK-NEXT: vmovx.f16 s4, s0 499; CHECK-NEXT: vminnm.f16 s0, s0, s4 500; CHECK-NEXT: vldr.16 s2, [r0] 501; CHECK-NEXT: vminnm.f16 s0, s2, s0 502; CHECK-NEXT: vstr.16 s0, [r0] 503; CHECK-NEXT: bx lr 504entry: 505 %y = load half, half* %yy 506 %z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x) 507 %c = fcmp fast olt half %y, %z 508 %r = select i1 %c, half %y, half %z 509 store half %r, half* %yy 510 ret void 511} 512 513define arm_aapcs_vfpcc void @fmin_v8f16_acc(<8 x half> %x, half* %yy) { 514; CHECK-FP-LABEL: fmin_v8f16_acc: 515; CHECK-FP: @ %bb.0: @ %entry 516; CHECK-FP-NEXT: vrev32.16 q1, q0 517; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 518; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 519; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 520; CHECK-FP-NEXT: vldr.16 s2, [r0] 521; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 522; CHECK-FP-NEXT: vminnm.f16 s0, s2, s0 523; CHECK-FP-NEXT: vstr.16 s0, [r0] 524; CHECK-FP-NEXT: bx lr 525; 526; CHECK-NOFP-LABEL: fmin_v8f16_acc: 527; CHECK-NOFP: @ %bb.0: @ %entry 528; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 529; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 530; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 531; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 532; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 533; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 534; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 535; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s2 536; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 537; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 538; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s3 539; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 540; CHECK-NOFP-NEXT: vminnm.f16 s0, s2, s0 541; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 542; CHECK-NOFP-NEXT: bx lr 543entry: 544 %y = load half, half* %yy 545 %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x) 546 %c = fcmp fast olt half %y, %z 547 %r = select i1 %c, half %y, half %z 548 store half %r, half* %yy 549 ret void 550} 551 552define arm_aapcs_vfpcc void @fmin_v16f16_acc(<16 x half> %x, half* %yy) { 553; CHECK-FP-LABEL: fmin_v16f16_acc: 554; CHECK-FP: @ %bb.0: @ %entry 555; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 556; CHECK-FP-NEXT: vrev32.16 q1, q0 557; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 558; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 559; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 560; CHECK-FP-NEXT: vldr.16 s2, [r0] 561; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 562; CHECK-FP-NEXT: vminnm.f16 s0, s2, s0 563; CHECK-FP-NEXT: vstr.16 s0, [r0] 564; CHECK-FP-NEXT: bx lr 565; 566; CHECK-NOFP-LABEL: fmin_v16f16_acc: 567; CHECK-NOFP: @ %bb.0: @ %entry 568; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 569; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 570; CHECK-NOFP-NEXT: vcmp.f16 s8, s10 571; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 572; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 573; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 574; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 575; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 576; CHECK-NOFP-NEXT: vcmp.f16 s5, s1 577; CHECK-NOFP-NEXT: vselgt.f16 s10, s0, s4 578; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 579; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 580; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 581; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 582; CHECK-NOFP-NEXT: vselgt.f16 s10, s1, s5 583; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 584; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 585; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 586; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 587; CHECK-NOFP-NEXT: vcmp.f16 s6, s2 588; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 589; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 590; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 591; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 592; CHECK-NOFP-NEXT: vselgt.f16 s10, s2, s6 593; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 594; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 595; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 596; CHECK-NOFP-NEXT: vcmp.f16 s10, s12 597; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 598; CHECK-NOFP-NEXT: vcmp.f16 s7, s3 599; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 600; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 601; CHECK-NOFP-NEXT: vcmp.f16 s4, s0 602; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 603; CHECK-NOFP-NEXT: vselgt.f16 s10, s3, s7 604; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 605; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 606; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 607; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 608; CHECK-NOFP-NEXT: vminnm.f16 s0, s2, s0 609; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 610; CHECK-NOFP-NEXT: bx lr 611entry: 612 %y = load half, half* %yy 613 %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x) 614 %c = fcmp fast olt half %y, %z 615 %r = select i1 %c, half %y, half %z 616 store half %r, half* %yy 617 ret void 618} 619 620define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) { 621; CHECK-LABEL: fmin_v1f64_acc: 622; CHECK: @ %bb.0: @ %entry 623; CHECK-NEXT: vminnm.f64 d0, d1, d0 624; CHECK-NEXT: bx lr 625entry: 626 %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x) 627 %c = fcmp fast olt double %y, %z 628 %r = select i1 %c, double %y, double %z 629 ret double %r 630} 631 632define arm_aapcs_vfpcc double @fmin_v2f64_acc(<2 x double> %x, double %y) { 633; CHECK-LABEL: fmin_v2f64_acc: 634; CHECK: @ %bb.0: @ %entry 635; CHECK-NEXT: vminnm.f64 d0, d0, d1 636; CHECK-NEXT: vminnm.f64 d0, d2, d0 637; CHECK-NEXT: bx lr 638entry: 639 %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x) 640 %c = fcmp fast olt double %y, %z 641 %r = select i1 %c, double %y, double %z 642 ret double %r 643} 644 645define arm_aapcs_vfpcc double @fmin_v4f64_acc(<4 x double> %x, double %y) { 646; CHECK-LABEL: fmin_v4f64_acc: 647; CHECK: @ %bb.0: @ %entry 648; CHECK-NEXT: vcmp.f64 d3, d1 649; CHECK-NEXT: vmrs APSR_nzcv, fpscr 650; CHECK-NEXT: vcmp.f64 d2, d0 651; CHECK-NEXT: vselgt.f64 d5, d1, d3 652; CHECK-NEXT: vmrs APSR_nzcv, fpscr 653; CHECK-NEXT: vselgt.f64 d0, d0, d2 654; CHECK-NEXT: vminnm.f64 d0, d0, d5 655; CHECK-NEXT: vminnm.f64 d0, d4, d0 656; CHECK-NEXT: bx lr 657entry: 658 %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x) 659 %c = fcmp fast olt double %y, %z 660 %r = select i1 %c, double %y, double %z 661 ret double %r 662} 663 664define arm_aapcs_vfpcc float @fmin_v2f32_acc_nofast(<2 x float> %x, float %y) { 665; CHECK-LABEL: fmin_v2f32_acc_nofast: 666; CHECK: @ %bb.0: @ %entry 667; CHECK-NEXT: vminnm.f32 s0, s0, s1 668; CHECK-NEXT: vcmp.f32 s0, s4 669; CHECK-NEXT: vmrs APSR_nzcv, fpscr 670; CHECK-NEXT: vselgt.f32 s0, s4, s0 671; CHECK-NEXT: bx lr 672entry: 673 %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x) 674 %c = fcmp olt float %y, %z 675 %r = select i1 %c, float %y, float %z 676 ret float %r 677} 678 679define arm_aapcs_vfpcc float @fmin_v4f32_acc_nofast(<4 x float> %x, float %y) { 680; CHECK-FP-LABEL: fmin_v4f32_acc_nofast: 681; CHECK-FP: @ %bb.0: @ %entry 682; CHECK-FP-NEXT: vminnm.f32 s6, s2, s3 683; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 684; CHECK-FP-NEXT: vminnm.f32 s0, s0, s6 685; CHECK-FP-NEXT: vcmp.f32 s0, s4 686; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 687; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0 688; CHECK-FP-NEXT: bx lr 689; 690; CHECK-NOFP-LABEL: fmin_v4f32_acc_nofast: 691; CHECK-NOFP: @ %bb.0: @ %entry 692; CHECK-NOFP-NEXT: vminnm.f32 s6, s0, s1 693; CHECK-NOFP-NEXT: vminnm.f32 s6, s6, s2 694; CHECK-NOFP-NEXT: vminnm.f32 s0, s6, s3 695; CHECK-NOFP-NEXT: vcmp.f32 s0, s4 696; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 697; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0 698; CHECK-NOFP-NEXT: bx lr 699entry: 700 %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x) 701 %c = fcmp olt float %y, %z 702 %r = select i1 %c, float %y, float %z 703 ret float %r 704} 705 706define arm_aapcs_vfpcc float @fmin_v8f32_acc_nofast(<8 x float> %x, float %y) { 707; CHECK-FP-LABEL: fmin_v8f32_acc_nofast: 708; CHECK-FP: @ %bb.0: @ %entry 709; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1 710; CHECK-FP-NEXT: vminnm.f32 s4, s2, s3 711; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1 712; CHECK-FP-NEXT: vminnm.f32 s0, s0, s4 713; CHECK-FP-NEXT: vcmp.f32 s0, s8 714; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 715; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0 716; CHECK-FP-NEXT: bx lr 717; 718; CHECK-NOFP-LABEL: fmin_v8f32_acc_nofast: 719; CHECK-NOFP: @ %bb.0: @ %entry 720; CHECK-NOFP-NEXT: vminnm.f32 s12, s0, s4 721; CHECK-NOFP-NEXT: vminnm.f32 s10, s1, s5 722; CHECK-NOFP-NEXT: vminnm.f32 s10, s12, s10 723; CHECK-NOFP-NEXT: vminnm.f32 s12, s2, s6 724; CHECK-NOFP-NEXT: vminnm.f32 s10, s10, s12 725; CHECK-NOFP-NEXT: vminnm.f32 s0, s3, s7 726; CHECK-NOFP-NEXT: vminnm.f32 s0, s10, s0 727; CHECK-NOFP-NEXT: vcmp.f32 s0, s8 728; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 729; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0 730; CHECK-NOFP-NEXT: bx lr 731entry: 732 %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x) 733 %c = fcmp olt float %y, %z 734 %r = select i1 %c, float %y, float %z 735 ret float %r 736} 737 738define arm_aapcs_vfpcc void @fmin_v4f16_acc_nofast(<4 x half> %x, half* %yy) { 739; CHECK-FP-LABEL: fmin_v4f16_acc_nofast: 740; CHECK-FP: @ %bb.0: @ %entry 741; CHECK-FP-NEXT: vmovx.f16 s4, s1 742; CHECK-FP-NEXT: vmovx.f16 s6, s0 743; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6 744; CHECK-FP-NEXT: vminnm.f16 s4, s1, s4 745; CHECK-FP-NEXT: vldr.16 s2, [r0] 746; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 747; CHECK-FP-NEXT: vcmp.f16 s0, s2 748; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 749; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 750; CHECK-FP-NEXT: vstr.16 s0, [r0] 751; CHECK-FP-NEXT: bx lr 752; 753; CHECK-NOFP-LABEL: fmin_v4f16_acc_nofast: 754; CHECK-NOFP: @ %bb.0: @ %entry 755; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 756; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 757; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 758; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 759; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 760; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 761; CHECK-NOFP-NEXT: vcmp.f16 s0, s2 762; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 763; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 764; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 765; CHECK-NOFP-NEXT: bx lr 766entry: 767 %y = load half, half* %yy 768 %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x) 769 %c = fcmp olt half %y, %z 770 %r = select i1 %c, half %y, half %z 771 store half %r, half* %yy 772 ret void 773} 774 775define arm_aapcs_vfpcc void @fmin_v8f16_acc_nofast(<8 x half> %x, half* %yy) { 776; CHECK-FP-LABEL: fmin_v8f16_acc_nofast: 777; CHECK-FP: @ %bb.0: @ %entry 778; CHECK-FP-NEXT: vrev32.16 q1, q0 779; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 780; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 781; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 782; CHECK-FP-NEXT: vldr.16 s2, [r0] 783; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 784; CHECK-FP-NEXT: vcmp.f16 s0, s2 785; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 786; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 787; CHECK-FP-NEXT: vstr.16 s0, [r0] 788; CHECK-FP-NEXT: bx lr 789; 790; CHECK-NOFP-LABEL: fmin_v8f16_acc_nofast: 791; CHECK-NOFP: @ %bb.0: @ %entry 792; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 793; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 794; CHECK-NOFP-NEXT: vminnm.f16 s4, s0, s4 795; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 796; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s1 797; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 798; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 799; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s2 800; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 801; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s6 802; CHECK-NOFP-NEXT: vminnm.f16 s4, s4, s3 803; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0 804; CHECK-NOFP-NEXT: vcmp.f16 s0, s2 805; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 806; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 807; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 808; CHECK-NOFP-NEXT: bx lr 809entry: 810 %y = load half, half* %yy 811 %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x) 812 %c = fcmp olt half %y, %z 813 %r = select i1 %c, half %y, half %z 814 store half %r, half* %yy 815 ret void 816} 817 818define arm_aapcs_vfpcc void @fmin_v16f16_acc_nofast(<16 x half> %x, half* %yy) { 819; CHECK-FP-LABEL: fmin_v16f16_acc_nofast: 820; CHECK-FP: @ %bb.0: @ %entry 821; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 822; CHECK-FP-NEXT: vrev32.16 q1, q0 823; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1 824; CHECK-FP-NEXT: vminnm.f16 s4, s2, s3 825; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1 826; CHECK-FP-NEXT: vldr.16 s2, [r0] 827; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4 828; CHECK-FP-NEXT: vcmp.f16 s0, s2 829; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 830; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 831; CHECK-FP-NEXT: vstr.16 s0, [r0] 832; CHECK-FP-NEXT: bx lr 833; 834; CHECK-NOFP-LABEL: fmin_v16f16_acc_nofast: 835; CHECK-NOFP: @ %bb.0: @ %entry 836; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 837; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 838; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 839; CHECK-NOFP-NEXT: vminnm.f16 s10, s0, s4 840; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8 841; CHECK-NOFP-NEXT: vminnm.f16 s10, s1, s5 842; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 843; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 844; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 845; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 846; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10 847; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 848; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 849; CHECK-NOFP-NEXT: vminnm.f16 s10, s2, s6 850; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 851; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 852; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10 853; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 854; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 855; CHECK-NOFP-NEXT: vminnm.f16 s10, s3, s7 856; CHECK-NOFP-NEXT: vminnm.f16 s8, s8, s10 857; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4 858; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 859; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0 860; CHECK-NOFP-NEXT: vcmp.f16 s0, s2 861; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 862; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 863; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 864; CHECK-NOFP-NEXT: bx lr 865entry: 866 %y = load half, half* %yy 867 %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x) 868 %c = fcmp olt half %y, %z 869 %r = select i1 %c, half %y, half %z 870 store half %r, half* %yy 871 ret void 872} 873 874define arm_aapcs_vfpcc double @fmin_v1f64_acc_nofast(<1 x double> %x, double %y) { 875; CHECK-LABEL: fmin_v1f64_acc_nofast: 876; CHECK: @ %bb.0: @ %entry 877; CHECK-NEXT: vcmp.f64 d0, d1 878; CHECK-NEXT: vmrs APSR_nzcv, fpscr 879; CHECK-NEXT: vselgt.f64 d0, d1, d0 880; CHECK-NEXT: bx lr 881entry: 882 %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x) 883 %c = fcmp olt double %y, %z 884 %r = select i1 %c, double %y, double %z 885 ret double %r 886} 887 888define arm_aapcs_vfpcc double @fmin_v2f64_acc_nofast(<2 x double> %x, double %y) { 889; CHECK-LABEL: fmin_v2f64_acc_nofast: 890; CHECK: @ %bb.0: @ %entry 891; CHECK-NEXT: vminnm.f64 d0, d0, d1 892; CHECK-NEXT: vcmp.f64 d0, d2 893; CHECK-NEXT: vmrs APSR_nzcv, fpscr 894; CHECK-NEXT: vselgt.f64 d0, d2, d0 895; CHECK-NEXT: bx lr 896entry: 897 %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x) 898 %c = fcmp olt double %y, %z 899 %r = select i1 %c, double %y, double %z 900 ret double %r 901} 902 903define arm_aapcs_vfpcc double @fmin_v4f64_acc_nofast(<4 x double> %x, double %y) { 904; CHECK-LABEL: fmin_v4f64_acc_nofast: 905; CHECK: @ %bb.0: @ %entry 906; CHECK-NEXT: vminnm.f64 d5, d1, d3 907; CHECK-NEXT: vminnm.f64 d0, d0, d2 908; CHECK-NEXT: vminnm.f64 d0, d0, d5 909; CHECK-NEXT: vcmp.f64 d0, d4 910; CHECK-NEXT: vmrs APSR_nzcv, fpscr 911; CHECK-NEXT: vselgt.f64 d0, d4, d0 912; CHECK-NEXT: bx lr 913entry: 914 %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x) 915 %c = fcmp olt double %y, %z 916 %r = select i1 %c, double %y, double %z 917 ret double %r 918} 919 920define arm_aapcs_vfpcc float @fmax_v2f32(<2 x float> %x) { 921; CHECK-LABEL: fmax_v2f32: 922; CHECK: @ %bb.0: @ %entry 923; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 924; CHECK-NEXT: bx lr 925entry: 926 %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x) 927 ret float %z 928} 929 930define arm_aapcs_vfpcc float @fmax_v4f32(<4 x float> %x) { 931; CHECK-FP-LABEL: fmax_v4f32: 932; CHECK-FP: @ %bb.0: @ %entry 933; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 934; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 935; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 936; CHECK-FP-NEXT: bx lr 937; 938; CHECK-NOFP-LABEL: fmax_v4f32: 939; CHECK-NOFP: @ %bb.0: @ %entry 940; CHECK-NOFP-NEXT: vmaxnm.f32 s4, s0, s1 941; CHECK-NOFP-NEXT: vmaxnm.f32 s4, s4, s2 942; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s3 943; CHECK-NOFP-NEXT: bx lr 944entry: 945 %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x) 946 ret float %z 947} 948 949define arm_aapcs_vfpcc float @fmax_v8f32(<8 x float> %x) { 950; CHECK-FP-LABEL: fmax_v8f32: 951; CHECK-FP: @ %bb.0: @ %entry 952; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 953; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 954; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 955; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 956; CHECK-FP-NEXT: bx lr 957; 958; CHECK-NOFP-LABEL: fmax_v8f32: 959; CHECK-NOFP: @ %bb.0: @ %entry 960; CHECK-NOFP-NEXT: vcmp.f32 s1, s5 961; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 962; CHECK-NOFP-NEXT: vcmp.f32 s0, s4 963; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s5 964; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 965; CHECK-NOFP-NEXT: vcmp.f32 s2, s6 966; CHECK-NOFP-NEXT: vselgt.f32 s10, s0, s4 967; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 968; CHECK-NOFP-NEXT: vcmp.f32 s3, s7 969; CHECK-NOFP-NEXT: vselgt.f32 s12, s2, s6 970; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 971; CHECK-NOFP-NEXT: vselgt.f32 s0, s3, s7 972; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s10, s8 973; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s12 974; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s2, s0 975; CHECK-NOFP-NEXT: bx lr 976entry: 977 %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x) 978 ret float %z 979} 980 981define arm_aapcs_vfpcc half @fmax_v4f16(<4 x half> %x) { 982; CHECK-FP-LABEL: fmax_v4f16: 983; CHECK-FP: @ %bb.0: @ %entry 984; CHECK-FP-NEXT: vmovx.f16 s4, s1 985; CHECK-FP-NEXT: vmovx.f16 s6, s0 986; CHECK-FP-NEXT: vmaxnm.f16 s4, s1, s4 987; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6 988; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 989; CHECK-FP-NEXT: bx lr 990; 991; CHECK-NOFP-LABEL: fmax_v4f16: 992; CHECK-NOFP: @ %bb.0: @ %entry 993; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 994; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 995; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 996; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 997; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 998; CHECK-NOFP-NEXT: bx lr 999entry: 1000 %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x) 1001 ret half %z 1002} 1003 1004define arm_aapcs_vfpcc half @fmax_v8f16(<8 x half> %x) { 1005; CHECK-FP-LABEL: fmax_v8f16: 1006; CHECK-FP: @ %bb.0: @ %entry 1007; CHECK-FP-NEXT: vrev32.16 q1, q0 1008; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1009; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 1010; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 1011; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1012; CHECK-FP-NEXT: bx lr 1013; 1014; CHECK-NOFP-LABEL: fmax_v8f16: 1015; CHECK-NOFP: @ %bb.0: @ %entry 1016; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 1017; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 1018; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 1019; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 1020; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 1021; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 1022; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 1023; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s2 1024; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 1025; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s3 1026; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 1027; CHECK-NOFP-NEXT: bx lr 1028entry: 1029 %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x) 1030 ret half %z 1031} 1032 1033define arm_aapcs_vfpcc half @fmax_v16f16(<16 x half> %x) { 1034; CHECK-FP-LABEL: fmax_v16f16: 1035; CHECK-FP: @ %bb.0: @ %entry 1036; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1037; CHECK-FP-NEXT: vrev32.16 q1, q0 1038; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1039; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 1040; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 1041; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1042; CHECK-FP-NEXT: bx lr 1043; 1044; CHECK-NOFP-LABEL: fmax_v16f16: 1045; CHECK-NOFP: @ %bb.0: @ %entry 1046; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 1047; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 1048; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 1049; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 1050; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1051; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 1052; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 1053; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1054; CHECK-NOFP-NEXT: vcmp.f16 s1, s5 1055; CHECK-NOFP-NEXT: vselgt.f16 s10, s0, s4 1056; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1057; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 1058; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 1059; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 1060; CHECK-NOFP-NEXT: vselgt.f16 s10, s1, s5 1061; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1062; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 1063; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 1064; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1065; CHECK-NOFP-NEXT: vcmp.f16 s2, s6 1066; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 1067; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1068; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1069; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 1070; CHECK-NOFP-NEXT: vselgt.f16 s10, s2, s6 1071; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1072; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 1073; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 1074; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1075; CHECK-NOFP-NEXT: vcmp.f16 s3, s7 1076; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 1077; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1078; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 1079; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1080; CHECK-NOFP-NEXT: vselgt.f16 s10, s3, s7 1081; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1082; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1083; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 1084; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 1085; CHECK-NOFP-NEXT: bx lr 1086entry: 1087 %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x) 1088 ret half %z 1089} 1090 1091define arm_aapcs_vfpcc double @fmax_v1f64(<1 x double> %x) { 1092; CHECK-LABEL: fmax_v1f64: 1093; CHECK: @ %bb.0: @ %entry 1094; CHECK-NEXT: bx lr 1095entry: 1096 %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x) 1097 ret double %z 1098} 1099 1100define arm_aapcs_vfpcc double @fmax_v2f64(<2 x double> %x) { 1101; CHECK-LABEL: fmax_v2f64: 1102; CHECK: @ %bb.0: @ %entry 1103; CHECK-NEXT: vmaxnm.f64 d0, d0, d1 1104; CHECK-NEXT: bx lr 1105entry: 1106 %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x) 1107 ret double %z 1108} 1109 1110define arm_aapcs_vfpcc double @fmax_v4f64(<4 x double> %x) { 1111; CHECK-LABEL: fmax_v4f64: 1112; CHECK: @ %bb.0: @ %entry 1113; CHECK-NEXT: vcmp.f64 d1, d3 1114; CHECK-NEXT: vmrs APSR_nzcv, fpscr 1115; CHECK-NEXT: vcmp.f64 d0, d2 1116; CHECK-NEXT: vselgt.f64 d4, d1, d3 1117; CHECK-NEXT: vmrs APSR_nzcv, fpscr 1118; CHECK-NEXT: vselgt.f64 d0, d0, d2 1119; CHECK-NEXT: vmaxnm.f64 d0, d0, d4 1120; CHECK-NEXT: bx lr 1121entry: 1122 %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x) 1123 ret double %z 1124} 1125 1126define arm_aapcs_vfpcc float @fmax_v2f32_nofast(<2 x float> %x) { 1127; CHECK-LABEL: fmax_v2f32_nofast: 1128; CHECK: @ %bb.0: @ %entry 1129; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 1130; CHECK-NEXT: bx lr 1131entry: 1132 %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x) 1133 ret float %z 1134} 1135 1136define arm_aapcs_vfpcc float @fmax_v4f32_nofast(<4 x float> %x) { 1137; CHECK-FP-LABEL: fmax_v4f32_nofast: 1138; CHECK-FP: @ %bb.0: @ %entry 1139; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 1140; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 1141; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 1142; CHECK-FP-NEXT: bx lr 1143; 1144; CHECK-NOFP-LABEL: fmax_v4f32_nofast: 1145; CHECK-NOFP: @ %bb.0: @ %entry 1146; CHECK-NOFP-NEXT: vmaxnm.f32 s4, s0, s1 1147; CHECK-NOFP-NEXT: vmaxnm.f32 s4, s4, s2 1148; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s3 1149; CHECK-NOFP-NEXT: bx lr 1150entry: 1151 %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x) 1152 ret float %z 1153} 1154 1155define arm_aapcs_vfpcc float @fmax_v8f32_nofast(<8 x float> %x) { 1156; CHECK-FP-LABEL: fmax_v8f32_nofast: 1157; CHECK-FP: @ %bb.0: @ %entry 1158; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 1159; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 1160; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 1161; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 1162; CHECK-FP-NEXT: bx lr 1163; 1164; CHECK-NOFP-LABEL: fmax_v8f32_nofast: 1165; CHECK-NOFP: @ %bb.0: @ %entry 1166; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s0, s4 1167; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s1, s5 1168; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s10, s8 1169; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s2, s6 1170; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s8, s10 1171; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s3, s7 1172; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0 1173; CHECK-NOFP-NEXT: bx lr 1174entry: 1175 %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x) 1176 ret float %z 1177} 1178 1179define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) { 1180; CHECK-FP-LABEL: fmax_v4f16_nofast: 1181; CHECK-FP: @ %bb.0: @ %entry 1182; CHECK-FP-NEXT: vmovx.f16 s4, s1 1183; CHECK-FP-NEXT: vmovx.f16 s6, s0 1184; CHECK-FP-NEXT: vmaxnm.f16 s4, s1, s4 1185; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6 1186; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1187; CHECK-FP-NEXT: bx lr 1188; 1189; CHECK-NOFP-LABEL: fmax_v4f16_nofast: 1190; CHECK-NOFP: @ %bb.0: @ %entry 1191; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 1192; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 1193; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 1194; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 1195; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 1196; CHECK-NOFP-NEXT: bx lr 1197entry: 1198 %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x) 1199 ret half %z 1200} 1201 1202define arm_aapcs_vfpcc half @fmax_v8f16_nofast(<8 x half> %x) { 1203; CHECK-FP-LABEL: fmax_v8f16_nofast: 1204; CHECK-FP: @ %bb.0: @ %entry 1205; CHECK-FP-NEXT: vrev32.16 q1, q0 1206; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1207; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 1208; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 1209; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1210; CHECK-FP-NEXT: bx lr 1211; 1212; CHECK-NOFP-LABEL: fmax_v8f16_nofast: 1213; CHECK-NOFP: @ %bb.0: @ %entry 1214; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 1215; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 1216; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 1217; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 1218; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 1219; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 1220; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 1221; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s2 1222; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 1223; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s3 1224; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 1225; CHECK-NOFP-NEXT: bx lr 1226entry: 1227 %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x) 1228 ret half %z 1229} 1230 1231define arm_aapcs_vfpcc half @fmax_v16f16_nofast(<16 x half> %x) { 1232; CHECK-FP-LABEL: fmax_v16f16_nofast: 1233; CHECK-FP: @ %bb.0: @ %entry 1234; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1235; CHECK-FP-NEXT: vrev32.16 q1, q0 1236; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1237; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 1238; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 1239; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1240; CHECK-FP-NEXT: bx lr 1241; 1242; CHECK-NOFP-LABEL: fmax_v16f16_nofast: 1243; CHECK-NOFP: @ %bb.0: @ %entry 1244; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 1245; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 1246; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 1247; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s0, s4 1248; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 1249; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s1, s5 1250; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1251; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 1252; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 1253; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 1254; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10 1255; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 1256; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1257; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s2, s6 1258; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1259; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 1260; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10 1261; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 1262; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1263; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s3, s7 1264; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1265; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4 1266; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 1267; CHECK-NOFP-NEXT: bx lr 1268entry: 1269 %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x) 1270 ret half %z 1271} 1272 1273define arm_aapcs_vfpcc double @fmax_v1f64_nofast(<1 x double> %x) { 1274; CHECK-LABEL: fmax_v1f64_nofast: 1275; CHECK: @ %bb.0: @ %entry 1276; CHECK-NEXT: bx lr 1277entry: 1278 %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x) 1279 ret double %z 1280} 1281 1282define arm_aapcs_vfpcc double @fmax_v2f64_nofast(<2 x double> %x) { 1283; CHECK-LABEL: fmax_v2f64_nofast: 1284; CHECK: @ %bb.0: @ %entry 1285; CHECK-NEXT: vmaxnm.f64 d0, d0, d1 1286; CHECK-NEXT: bx lr 1287entry: 1288 %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x) 1289 ret double %z 1290} 1291 1292define arm_aapcs_vfpcc double @fmax_v4f64_nofast(<4 x double> %x) { 1293; CHECK-LABEL: fmax_v4f64_nofast: 1294; CHECK: @ %bb.0: @ %entry 1295; CHECK-NEXT: vmaxnm.f64 d4, d1, d3 1296; CHECK-NEXT: vmaxnm.f64 d0, d0, d2 1297; CHECK-NEXT: vmaxnm.f64 d0, d0, d4 1298; CHECK-NEXT: bx lr 1299entry: 1300 %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x) 1301 ret double %z 1302} 1303 1304define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) { 1305; CHECK-LABEL: fmax_v2f32_acc: 1306; CHECK: @ %bb.0: @ %entry 1307; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 1308; CHECK-NEXT: vmaxnm.f32 s0, s4, s0 1309; CHECK-NEXT: bx lr 1310entry: 1311 %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x) 1312 %c = fcmp fast ogt float %y, %z 1313 %r = select i1 %c, float %y, float %z 1314 ret float %r 1315} 1316 1317define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) { 1318; CHECK-FP-LABEL: fmax_v4f32_acc: 1319; CHECK-FP: @ %bb.0: @ %entry 1320; CHECK-FP-NEXT: vmaxnm.f32 s6, s2, s3 1321; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 1322; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s6 1323; CHECK-FP-NEXT: vmaxnm.f32 s0, s4, s0 1324; CHECK-FP-NEXT: bx lr 1325; 1326; CHECK-NOFP-LABEL: fmax_v4f32_acc: 1327; CHECK-NOFP: @ %bb.0: @ %entry 1328; CHECK-NOFP-NEXT: vmaxnm.f32 s6, s0, s1 1329; CHECK-NOFP-NEXT: vmaxnm.f32 s6, s6, s2 1330; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s6, s3 1331; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s0 1332; CHECK-NOFP-NEXT: bx lr 1333entry: 1334 %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x) 1335 %c = fcmp fast ogt float %y, %z 1336 %r = select i1 %c, float %y, float %z 1337 ret float %r 1338} 1339 1340define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) { 1341; CHECK-FP-LABEL: fmax_v8f32_acc: 1342; CHECK-FP: @ %bb.0: @ %entry 1343; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 1344; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 1345; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 1346; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 1347; CHECK-FP-NEXT: vmaxnm.f32 s0, s8, s0 1348; CHECK-FP-NEXT: bx lr 1349; 1350; CHECK-NOFP-LABEL: fmax_v8f32_acc: 1351; CHECK-NOFP: @ %bb.0: @ %entry 1352; CHECK-NOFP-NEXT: vcmp.f32 s1, s5 1353; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1354; CHECK-NOFP-NEXT: vcmp.f32 s0, s4 1355; CHECK-NOFP-NEXT: vselgt.f32 s10, s1, s5 1356; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1357; CHECK-NOFP-NEXT: vcmp.f32 s2, s6 1358; CHECK-NOFP-NEXT: vselgt.f32 s12, s0, s4 1359; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1360; CHECK-NOFP-NEXT: vcmp.f32 s3, s7 1361; CHECK-NOFP-NEXT: vselgt.f32 s14, s2, s6 1362; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1363; CHECK-NOFP-NEXT: vselgt.f32 s0, s3, s7 1364; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s12, s10 1365; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s14 1366; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s2, s0 1367; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0 1368; CHECK-NOFP-NEXT: bx lr 1369entry: 1370 %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x) 1371 %c = fcmp fast ogt float %y, %z 1372 %r = select i1 %c, float %y, float %z 1373 ret float %r 1374} 1375 1376define arm_aapcs_vfpcc void @fmax_v2f16_acc(<2 x half> %x, half* %yy) { 1377; CHECK-LABEL: fmax_v2f16_acc: 1378; CHECK: @ %bb.0: @ %entry 1379; CHECK-NEXT: vmovx.f16 s4, s0 1380; CHECK-NEXT: vmaxnm.f16 s0, s0, s4 1381; CHECK-NEXT: vldr.16 s2, [r0] 1382; CHECK-NEXT: vmaxnm.f16 s0, s2, s0 1383; CHECK-NEXT: vstr.16 s0, [r0] 1384; CHECK-NEXT: bx lr 1385entry: 1386 %y = load half, half* %yy 1387 %z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x) 1388 %c = fcmp fast ogt half %y, %z 1389 %r = select i1 %c, half %y, half %z 1390 store half %r, half* %yy 1391 ret void 1392} 1393 1394define arm_aapcs_vfpcc void @fmax_v4f16_acc(<4 x half> %x, half* %yy) { 1395; CHECK-FP-LABEL: fmax_v4f16_acc: 1396; CHECK-FP: @ %bb.0: @ %entry 1397; CHECK-FP-NEXT: vmovx.f16 s4, s1 1398; CHECK-FP-NEXT: vmovx.f16 s6, s0 1399; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6 1400; CHECK-FP-NEXT: vmaxnm.f16 s4, s1, s4 1401; CHECK-FP-NEXT: vldr.16 s2, [r0] 1402; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1403; CHECK-FP-NEXT: vmaxnm.f16 s0, s2, s0 1404; CHECK-FP-NEXT: vstr.16 s0, [r0] 1405; CHECK-FP-NEXT: bx lr 1406; 1407; CHECK-NOFP-LABEL: fmax_v4f16_acc: 1408; CHECK-NOFP: @ %bb.0: @ %entry 1409; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 1410; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 1411; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 1412; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 1413; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 1414; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 1415; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s2, s0 1416; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 1417; CHECK-NOFP-NEXT: bx lr 1418entry: 1419 %y = load half, half* %yy 1420 %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x) 1421 %c = fcmp fast ogt half %y, %z 1422 %r = select i1 %c, half %y, half %z 1423 store half %r, half* %yy 1424 ret void 1425} 1426 1427define arm_aapcs_vfpcc void @fmax_v8f16_acc(<8 x half> %x, half* %yy) { 1428; CHECK-FP-LABEL: fmax_v8f16_acc: 1429; CHECK-FP: @ %bb.0: @ %entry 1430; CHECK-FP-NEXT: vrev32.16 q1, q0 1431; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1432; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 1433; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 1434; CHECK-FP-NEXT: vldr.16 s2, [r0] 1435; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1436; CHECK-FP-NEXT: vmaxnm.f16 s0, s2, s0 1437; CHECK-FP-NEXT: vstr.16 s0, [r0] 1438; CHECK-FP-NEXT: bx lr 1439; 1440; CHECK-NOFP-LABEL: fmax_v8f16_acc: 1441; CHECK-NOFP: @ %bb.0: @ %entry 1442; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 1443; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 1444; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 1445; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 1446; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 1447; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 1448; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 1449; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s2 1450; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 1451; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 1452; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s3 1453; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 1454; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s2, s0 1455; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 1456; CHECK-NOFP-NEXT: bx lr 1457entry: 1458 %y = load half, half* %yy 1459 %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x) 1460 %c = fcmp fast ogt half %y, %z 1461 %r = select i1 %c, half %y, half %z 1462 store half %r, half* %yy 1463 ret void 1464} 1465 1466define arm_aapcs_vfpcc void @fmax_v16f16_acc(<16 x half> %x, half* %yy) { 1467; CHECK-FP-LABEL: fmax_v16f16_acc: 1468; CHECK-FP: @ %bb.0: @ %entry 1469; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1470; CHECK-FP-NEXT: vrev32.16 q1, q0 1471; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1472; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 1473; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 1474; CHECK-FP-NEXT: vldr.16 s2, [r0] 1475; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1476; CHECK-FP-NEXT: vmaxnm.f16 s0, s2, s0 1477; CHECK-FP-NEXT: vstr.16 s0, [r0] 1478; CHECK-FP-NEXT: bx lr 1479; 1480; CHECK-NOFP-LABEL: fmax_v16f16_acc: 1481; CHECK-NOFP: @ %bb.0: @ %entry 1482; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 1483; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 1484; CHECK-NOFP-NEXT: vcmp.f16 s10, s8 1485; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 1486; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1487; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 1488; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8 1489; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1490; CHECK-NOFP-NEXT: vcmp.f16 s1, s5 1491; CHECK-NOFP-NEXT: vselgt.f16 s10, s0, s4 1492; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1493; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 1494; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 1495; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 1496; CHECK-NOFP-NEXT: vselgt.f16 s10, s1, s5 1497; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1498; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 1499; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 1500; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1501; CHECK-NOFP-NEXT: vcmp.f16 s2, s6 1502; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 1503; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1504; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1505; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 1506; CHECK-NOFP-NEXT: vselgt.f16 s10, s2, s6 1507; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 1508; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1509; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 1510; CHECK-NOFP-NEXT: vcmp.f16 s12, s10 1511; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1512; CHECK-NOFP-NEXT: vcmp.f16 s3, s7 1513; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10 1514; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1515; CHECK-NOFP-NEXT: vcmp.f16 s0, s4 1516; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1517; CHECK-NOFP-NEXT: vselgt.f16 s10, s3, s7 1518; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1519; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1520; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4 1521; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 1522; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s2, s0 1523; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 1524; CHECK-NOFP-NEXT: bx lr 1525entry: 1526 %y = load half, half* %yy 1527 %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x) 1528 %c = fcmp fast ogt half %y, %z 1529 %r = select i1 %c, half %y, half %z 1530 store half %r, half* %yy 1531 ret void 1532} 1533 1534define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) { 1535; CHECK-LABEL: fmax_v1f64_acc: 1536; CHECK: @ %bb.0: @ %entry 1537; CHECK-NEXT: vmaxnm.f64 d0, d1, d0 1538; CHECK-NEXT: bx lr 1539entry: 1540 %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x) 1541 %c = fcmp fast ogt double %y, %z 1542 %r = select i1 %c, double %y, double %z 1543 ret double %r 1544} 1545 1546define arm_aapcs_vfpcc double @fmax_v2f64_acc(<2 x double> %x, double %y) { 1547; CHECK-LABEL: fmax_v2f64_acc: 1548; CHECK: @ %bb.0: @ %entry 1549; CHECK-NEXT: vmaxnm.f64 d0, d0, d1 1550; CHECK-NEXT: vmaxnm.f64 d0, d2, d0 1551; CHECK-NEXT: bx lr 1552entry: 1553 %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x) 1554 %c = fcmp fast ogt double %y, %z 1555 %r = select i1 %c, double %y, double %z 1556 ret double %r 1557} 1558 1559define arm_aapcs_vfpcc double @fmax_v4f64_acc(<4 x double> %x, double %y) { 1560; CHECK-LABEL: fmax_v4f64_acc: 1561; CHECK: @ %bb.0: @ %entry 1562; CHECK-NEXT: vcmp.f64 d1, d3 1563; CHECK-NEXT: vmrs APSR_nzcv, fpscr 1564; CHECK-NEXT: vcmp.f64 d0, d2 1565; CHECK-NEXT: vselgt.f64 d5, d1, d3 1566; CHECK-NEXT: vmrs APSR_nzcv, fpscr 1567; CHECK-NEXT: vselgt.f64 d0, d0, d2 1568; CHECK-NEXT: vmaxnm.f64 d0, d0, d5 1569; CHECK-NEXT: vmaxnm.f64 d0, d4, d0 1570; CHECK-NEXT: bx lr 1571entry: 1572 %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x) 1573 %c = fcmp fast ogt double %y, %z 1574 %r = select i1 %c, double %y, double %z 1575 ret double %r 1576} 1577 1578define arm_aapcs_vfpcc float @fmax_v2f32_acc_nofast(<2 x float> %x, float %y) { 1579; CHECK-LABEL: fmax_v2f32_acc_nofast: 1580; CHECK: @ %bb.0: @ %entry 1581; CHECK-NEXT: vmaxnm.f32 s0, s0, s1 1582; CHECK-NEXT: vcmp.f32 s4, s0 1583; CHECK-NEXT: vmrs APSR_nzcv, fpscr 1584; CHECK-NEXT: vselgt.f32 s0, s4, s0 1585; CHECK-NEXT: bx lr 1586entry: 1587 %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x) 1588 %c = fcmp ogt float %y, %z 1589 %r = select i1 %c, float %y, float %z 1590 ret float %r 1591} 1592 1593define arm_aapcs_vfpcc float @fmax_v4f32_acc_nofast(<4 x float> %x, float %y) { 1594; CHECK-FP-LABEL: fmax_v4f32_acc_nofast: 1595; CHECK-FP: @ %bb.0: @ %entry 1596; CHECK-FP-NEXT: vmaxnm.f32 s6, s2, s3 1597; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 1598; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s6 1599; CHECK-FP-NEXT: vcmp.f32 s4, s0 1600; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 1601; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0 1602; CHECK-FP-NEXT: bx lr 1603; 1604; CHECK-NOFP-LABEL: fmax_v4f32_acc_nofast: 1605; CHECK-NOFP: @ %bb.0: @ %entry 1606; CHECK-NOFP-NEXT: vmaxnm.f32 s6, s0, s1 1607; CHECK-NOFP-NEXT: vmaxnm.f32 s6, s6, s2 1608; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s6, s3 1609; CHECK-NOFP-NEXT: vcmp.f32 s4, s0 1610; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1611; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0 1612; CHECK-NOFP-NEXT: bx lr 1613entry: 1614 %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x) 1615 %c = fcmp ogt float %y, %z 1616 %r = select i1 %c, float %y, float %z 1617 ret float %r 1618} 1619 1620define arm_aapcs_vfpcc float @fmax_v8f32_acc_nofast(<8 x float> %x, float %y) { 1621; CHECK-FP-LABEL: fmax_v8f32_acc_nofast: 1622; CHECK-FP: @ %bb.0: @ %entry 1623; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1 1624; CHECK-FP-NEXT: vmaxnm.f32 s4, s2, s3 1625; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1 1626; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s4 1627; CHECK-FP-NEXT: vcmp.f32 s8, s0 1628; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 1629; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0 1630; CHECK-FP-NEXT: bx lr 1631; 1632; CHECK-NOFP-LABEL: fmax_v8f32_acc_nofast: 1633; CHECK-NOFP: @ %bb.0: @ %entry 1634; CHECK-NOFP-NEXT: vmaxnm.f32 s12, s0, s4 1635; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s1, s5 1636; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s12, s10 1637; CHECK-NOFP-NEXT: vmaxnm.f32 s12, s2, s6 1638; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s10, s12 1639; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s3, s7 1640; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s10, s0 1641; CHECK-NOFP-NEXT: vcmp.f32 s8, s0 1642; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1643; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0 1644; CHECK-NOFP-NEXT: bx lr 1645entry: 1646 %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x) 1647 %c = fcmp ogt float %y, %z 1648 %r = select i1 %c, float %y, float %z 1649 ret float %r 1650} 1651 1652define arm_aapcs_vfpcc void @fmax_v4f16_acc_nofast(<4 x half> %x, half* %yy) { 1653; CHECK-FP-LABEL: fmax_v4f16_acc_nofast: 1654; CHECK-FP: @ %bb.0: @ %entry 1655; CHECK-FP-NEXT: vmovx.f16 s4, s1 1656; CHECK-FP-NEXT: vmovx.f16 s6, s0 1657; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6 1658; CHECK-FP-NEXT: vmaxnm.f16 s4, s1, s4 1659; CHECK-FP-NEXT: vldr.16 s2, [r0] 1660; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1661; CHECK-FP-NEXT: vcmp.f16 s2, s0 1662; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 1663; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 1664; CHECK-FP-NEXT: vstr.16 s0, [r0] 1665; CHECK-FP-NEXT: bx lr 1666; 1667; CHECK-NOFP-LABEL: fmax_v4f16_acc_nofast: 1668; CHECK-NOFP: @ %bb.0: @ %entry 1669; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 1670; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 1671; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 1672; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 1673; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 1674; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 1675; CHECK-NOFP-NEXT: vcmp.f16 s2, s0 1676; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1677; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 1678; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 1679; CHECK-NOFP-NEXT: bx lr 1680entry: 1681 %y = load half, half* %yy 1682 %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x) 1683 %c = fcmp ogt half %y, %z 1684 %r = select i1 %c, half %y, half %z 1685 store half %r, half* %yy 1686 ret void 1687} 1688 1689define arm_aapcs_vfpcc void @fmax_v8f16_acc_nofast(<8 x half> %x, half* %yy) { 1690; CHECK-FP-LABEL: fmax_v8f16_acc_nofast: 1691; CHECK-FP: @ %bb.0: @ %entry 1692; CHECK-FP-NEXT: vrev32.16 q1, q0 1693; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1694; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 1695; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 1696; CHECK-FP-NEXT: vldr.16 s2, [r0] 1697; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1698; CHECK-FP-NEXT: vcmp.f16 s2, s0 1699; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 1700; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 1701; CHECK-FP-NEXT: vstr.16 s0, [r0] 1702; CHECK-FP-NEXT: bx lr 1703; 1704; CHECK-NOFP-LABEL: fmax_v8f16_acc_nofast: 1705; CHECK-NOFP: @ %bb.0: @ %entry 1706; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 1707; CHECK-NOFP-NEXT: vmovx.f16 s6, s1 1708; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s0, s4 1709; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 1710; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s1 1711; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 1712; CHECK-NOFP-NEXT: vmovx.f16 s6, s2 1713; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s2 1714; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 1715; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s6 1716; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s4, s3 1717; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0 1718; CHECK-NOFP-NEXT: vcmp.f16 s2, s0 1719; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1720; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 1721; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 1722; CHECK-NOFP-NEXT: bx lr 1723entry: 1724 %y = load half, half* %yy 1725 %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x) 1726 %c = fcmp ogt half %y, %z 1727 %r = select i1 %c, half %y, half %z 1728 store half %r, half* %yy 1729 ret void 1730} 1731 1732define arm_aapcs_vfpcc void @fmax_v16f16_acc_nofast(<16 x half> %x, half* %yy) { 1733; CHECK-FP-LABEL: fmax_v16f16_acc_nofast: 1734; CHECK-FP: @ %bb.0: @ %entry 1735; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1736; CHECK-FP-NEXT: vrev32.16 q1, q0 1737; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1 1738; CHECK-FP-NEXT: vmaxnm.f16 s4, s2, s3 1739; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1 1740; CHECK-FP-NEXT: vldr.16 s2, [r0] 1741; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4 1742; CHECK-FP-NEXT: vcmp.f16 s2, s0 1743; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr 1744; CHECK-FP-NEXT: vselgt.f16 s0, s2, s0 1745; CHECK-FP-NEXT: vstr.16 s0, [r0] 1746; CHECK-FP-NEXT: bx lr 1747; 1748; CHECK-NOFP-LABEL: fmax_v16f16_acc_nofast: 1749; CHECK-NOFP: @ %bb.0: @ %entry 1750; CHECK-NOFP-NEXT: vmovx.f16 s8, s4 1751; CHECK-NOFP-NEXT: vmovx.f16 s10, s0 1752; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 1753; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s0, s4 1754; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8 1755; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s1, s5 1756; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1757; CHECK-NOFP-NEXT: vmovx.f16 s10, s5 1758; CHECK-NOFP-NEXT: vmovx.f16 s12, s1 1759; CHECK-NOFP-NEXT: vmovx.f16 s4, s7 1760; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10 1761; CHECK-NOFP-NEXT: vmovx.f16 s12, s2 1762; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1763; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s2, s6 1764; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1765; CHECK-NOFP-NEXT: vmovx.f16 s10, s6 1766; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10 1767; CHECK-NOFP-NEXT: vmovx.f16 s0, s3 1768; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1769; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s3, s7 1770; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s8, s10 1771; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4 1772; CHECK-NOFP-NEXT: vldr.16 s2, [r0] 1773; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0 1774; CHECK-NOFP-NEXT: vcmp.f16 s2, s0 1775; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr 1776; CHECK-NOFP-NEXT: vselgt.f16 s0, s2, s0 1777; CHECK-NOFP-NEXT: vstr.16 s0, [r0] 1778; CHECK-NOFP-NEXT: bx lr 1779entry: 1780 %y = load half, half* %yy 1781 %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x) 1782 %c = fcmp ogt half %y, %z 1783 %r = select i1 %c, half %y, half %z 1784 store half %r, half* %yy 1785 ret void 1786} 1787 1788define arm_aapcs_vfpcc double @fmax_v1f64_acc_nofast(<1 x double> %x, double %y) { 1789; CHECK-LABEL: fmax_v1f64_acc_nofast: 1790; CHECK: @ %bb.0: @ %entry 1791; CHECK-NEXT: vcmp.f64 d1, d0 1792; CHECK-NEXT: vmrs APSR_nzcv, fpscr 1793; CHECK-NEXT: vselgt.f64 d0, d1, d0 1794; CHECK-NEXT: bx lr 1795entry: 1796 %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x) 1797 %c = fcmp ogt double %y, %z 1798 %r = select i1 %c, double %y, double %z 1799 ret double %r 1800} 1801 1802define arm_aapcs_vfpcc double @fmax_v2f64_acc_nofast(<2 x double> %x, double %y) { 1803; CHECK-LABEL: fmax_v2f64_acc_nofast: 1804; CHECK: @ %bb.0: @ %entry 1805; CHECK-NEXT: vmaxnm.f64 d0, d0, d1 1806; CHECK-NEXT: vcmp.f64 d2, d0 1807; CHECK-NEXT: vmrs APSR_nzcv, fpscr 1808; CHECK-NEXT: vselgt.f64 d0, d2, d0 1809; CHECK-NEXT: bx lr 1810entry: 1811 %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x) 1812 %c = fcmp ogt double %y, %z 1813 %r = select i1 %c, double %y, double %z 1814 ret double %r 1815} 1816 1817define arm_aapcs_vfpcc double @fmax_v4f64_acc_nofast(<4 x double> %x, double %y) { 1818; CHECK-LABEL: fmax_v4f64_acc_nofast: 1819; CHECK: @ %bb.0: @ %entry 1820; CHECK-NEXT: vmaxnm.f64 d5, d1, d3 1821; CHECK-NEXT: vmaxnm.f64 d0, d0, d2 1822; CHECK-NEXT: vmaxnm.f64 d0, d0, d5 1823; CHECK-NEXT: vcmp.f64 d4, d0 1824; CHECK-NEXT: vmrs APSR_nzcv, fpscr 1825; CHECK-NEXT: vselgt.f64 d0, d4, d0 1826; CHECK-NEXT: bx lr 1827entry: 1828 %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x) 1829 %c = fcmp ogt double %y, %z 1830 %r = select i1 %c, double %y, double %z 1831 ret double %r 1832} 1833 1834declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>) 1835declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) 1836declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) 1837declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>) 1838declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 1839declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) 1840declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) 1841declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 1842declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) 1843declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) 1844declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 1845declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) 1846declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>) 1847declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) 1848declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) 1849declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>) 1850declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>) 1851declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) 1852declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) 1853declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>) 1854