1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s 3; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE 4 5; Check generated fp16 fused MAC and MLS. 6 7define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) { 8; CHECK-LABEL: fusedMACTest2: 9; CHECK: @ %bb.0: 10; CHECK-NEXT: vldr.16 s0, [r1] 11; CHECK-NEXT: vldr.16 s2, [r0] 12; CHECK-NEXT: vldr.16 s4, [r2] 13; CHECK-NEXT: vfma.f16 s4, s2, s0 14; CHECK-NEXT: vstr.16 s4, [r0] 15; CHECK-NEXT: bx lr 16; 17; DONT-FUSE-LABEL: fusedMACTest2: 18; DONT-FUSE: @ %bb.0: 19; DONT-FUSE-NEXT: vldr.16 s0, [r1] 20; DONT-FUSE-NEXT: vldr.16 s2, [r0] 21; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0 22; DONT-FUSE-NEXT: vldr.16 s2, [r2] 23; DONT-FUSE-NEXT: vadd.f16 s0, s0, s2 24; DONT-FUSE-NEXT: vstr.16 s0, [r0] 25; DONT-FUSE-NEXT: bx lr 26 27 %f1 = load half, half *%a1, align 2 28 %f2 = load half, half *%a2, align 2 29 %f3 = load half, half *%a3, align 2 30 %1 = fmul half %f1, %f2 31 %2 = fadd half %1, %f3 32 store half %2, half *%a1, align 2 33 ret void 34} 35 36define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) { 37; CHECK-LABEL: fusedMACTest4: 38; CHECK: @ %bb.0: 39; CHECK-NEXT: vldr.16 s0, [r2] 40; CHECK-NEXT: vldr.16 s2, [r1] 41; CHECK-NEXT: vldr.16 s4, [r0] 42; CHECK-NEXT: vfms.f16 s4, s2, s0 43; CHECK-NEXT: vstr.16 s4, [r0] 44; CHECK-NEXT: bx lr 45; 46; DONT-FUSE-LABEL: fusedMACTest4: 47; DONT-FUSE: @ %bb.0: 48; DONT-FUSE-NEXT: vldr.16 s0, [r2] 49; DONT-FUSE-NEXT: vldr.16 s2, [r1] 50; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0 51; DONT-FUSE-NEXT: vldr.16 s2, [r0] 52; DONT-FUSE-NEXT: vsub.f16 s0, s2, s0 53; DONT-FUSE-NEXT: vstr.16 s0, [r0] 54; DONT-FUSE-NEXT: bx lr 55 56 %f1 = load half, half *%a1, align 2 57 %f2 = load half, half *%a2, align 2 58 %f3 = load half, half *%a3, align 2 59 %1 = fmul half %f2, %f3 60 %2 = fsub half %f1, %1 61 store half %2, half *%a1, align 2 62 ret void 63} 64 65define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) { 66; CHECK-LABEL: fusedMACTest6: 67; CHECK: @ %bb.0: 68; CHECK-NEXT: vldr.16 s0, [r1] 69; CHECK-NEXT: vldr.16 s2, [r0] 70; CHECK-NEXT: vldr.16 s4, [r2] 71; CHECK-NEXT: vfnma.f16 s4, s2, s0 72; CHECK-NEXT: vstr.16 s4, [r0] 73; CHECK-NEXT: bx lr 74; 75; DONT-FUSE-LABEL: fusedMACTest6: 76; DONT-FUSE: @ %bb.0: 77; DONT-FUSE-NEXT: vldr.16 s0, [r1] 78; DONT-FUSE-NEXT: vldr.16 s2, [r0] 79; DONT-FUSE-NEXT: vnmul.f16 s0, s2, s0 80; DONT-FUSE-NEXT: vldr.16 s2, [r2] 81; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2 82; DONT-FUSE-NEXT: vstr.16 s0, [r0] 83; DONT-FUSE-NEXT: bx lr 84 85 %f1 = load half, half *%a1, align 2 86 %f2 = load half, half *%a2, align 2 87 %f3 = load half, half *%a3, align 2 88 %1 = fmul half %f1, %f2 89 %2 = fsub half -0.0, %1 90 %3 = fsub half %2, %f3 91 store half %3, half *%a1, align 2 92 ret void 93} 94 95define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) { 96; CHECK-LABEL: fusedMACTest8: 97; CHECK: @ %bb.0: 98; CHECK-NEXT: vldr.16 s0, [r1] 99; CHECK-NEXT: vldr.16 s2, [r0] 100; CHECK-NEXT: vldr.16 s4, [r2] 101; CHECK-NEXT: vfnms.f16 s4, s2, s0 102; CHECK-NEXT: vstr.16 s4, [r0] 103; CHECK-NEXT: bx lr 104; 105; DONT-FUSE-LABEL: fusedMACTest8: 106; DONT-FUSE: @ %bb.0: 107; DONT-FUSE-NEXT: vldr.16 s0, [r1] 108; DONT-FUSE-NEXT: vldr.16 s2, [r0] 109; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0 110; DONT-FUSE-NEXT: vldr.16 s2, [r2] 111; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2 112; DONT-FUSE-NEXT: vstr.16 s0, [r0] 113; DONT-FUSE-NEXT: bx lr 114 115 %f1 = load half, half *%a1, align 2 116 %f2 = load half, half *%a2, align 2 117 %f3 = load half, half *%a3, align 2 118 %1 = fmul half %f1, %f2 119 %2 = fsub half %1, %f3 120 store half %2, half *%a1, align 2 121 ret void 122} 123 124define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp { 125; CHECK-LABEL: test_fma_f16: 126; CHECK: @ %bb.0: @ %entry 127; CHECK-NEXT: vldr.16 s0, [r1] 128; CHECK-NEXT: vldr.16 s2, [r0] 129; CHECK-NEXT: vldr.16 s4, [r2] 130; CHECK-NEXT: vfma.f16 s4, s2, s0 131; CHECK-NEXT: vstr.16 s4, [r0] 132; CHECK-NEXT: bx lr 133; 134; DONT-FUSE-LABEL: test_fma_f16: 135; DONT-FUSE: @ %bb.0: @ %entry 136; DONT-FUSE-NEXT: vldr.16 s0, [r1] 137; DONT-FUSE-NEXT: vldr.16 s2, [r0] 138; DONT-FUSE-NEXT: vldr.16 s4, [r2] 139; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0 140; DONT-FUSE-NEXT: vstr.16 s4, [r0] 141; DONT-FUSE-NEXT: bx lr 142entry: 143 %a = load half, half *%aa, align 2 144 %b = load half, half *%bb, align 2 145 %c = load half, half *%cc, align 2 146 %tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone 147 store half %tmp1, half *%aa, align 2 148 ret void 149} 150 151define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp { 152; CHECK-LABEL: test_fnms_f16: 153; CHECK: @ %bb.0: 154; CHECK-NEXT: vldr.16 s0, [r1] 155; CHECK-NEXT: vldr.16 s2, [r0] 156; CHECK-NEXT: vldr.16 s4, [r2] 157; CHECK-NEXT: vfma.f16 s4, s2, s0 158; CHECK-NEXT: vstr.16 s4, [r0] 159; CHECK-NEXT: bx lr 160; 161; DONT-FUSE-LABEL: test_fnms_f16: 162; DONT-FUSE: @ %bb.0: 163; DONT-FUSE-NEXT: vldr.16 s0, [r1] 164; DONT-FUSE-NEXT: vldr.16 s2, [r0] 165; DONT-FUSE-NEXT: vldr.16 s4, [r2] 166; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0 167; DONT-FUSE-NEXT: vstr.16 s4, [r0] 168; DONT-FUSE-NEXT: bx lr 169 170 %a = load half, half *%aa, align 2 171 %b = load half, half *%bb, align 2 172 %c = load half, half *%cc, align 2 173 %tmp2 = fsub half -0.0, %c 174 %tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone 175 store half %tmp3, half *%aa, align 2 176 ret void 177} 178 179define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind { 180; CHECK-LABEL: test_fma_const_fold: 181; CHECK: @ %bb.0: 182; CHECK-NEXT: vldr.16 s0, [r1] 183; CHECK-NEXT: vldr.16 s2, [r0] 184; CHECK-NEXT: vadd.f16 s0, s2, s0 185; CHECK-NEXT: vstr.16 s0, [r0] 186; CHECK-NEXT: bx lr 187; 188; DONT-FUSE-LABEL: test_fma_const_fold: 189; DONT-FUSE: @ %bb.0: 190; DONT-FUSE-NEXT: vldr.16 s0, [r1] 191; DONT-FUSE-NEXT: vldr.16 s2, [r0] 192; DONT-FUSE-NEXT: vadd.f16 s0, s2, s0 193; DONT-FUSE-NEXT: vstr.16 s0, [r0] 194; DONT-FUSE-NEXT: bx lr 195 196 %a = load half, half *%aa, align 2 197 %b = load half, half *%bb, align 2 198 %ret = call half @llvm.fma.f16(half %a, half 1.0, half %b) 199 store half %ret, half *%aa, align 2 200 ret void 201} 202 203define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind { 204; CHECK-LABEL: test_fma_canonicalize: 205; CHECK: @ %bb.0: 206; CHECK-NEXT: vldr.16 s0, [r0] 207; CHECK-NEXT: vldr.16 s2, [r1] 208; CHECK-NEXT: vmov.f16 s4, #2.000000e+00 209; CHECK-NEXT: vfma.f16 s2, s0, s4 210; CHECK-NEXT: vstr.16 s2, [r0] 211; CHECK-NEXT: bx lr 212; 213; DONT-FUSE-LABEL: test_fma_canonicalize: 214; DONT-FUSE: @ %bb.0: 215; DONT-FUSE-NEXT: vldr.16 s0, [r0] 216; DONT-FUSE-NEXT: vldr.16 s2, [r1] 217; DONT-FUSE-NEXT: vmov.f16 s4, #2.000000e+00 218; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4 219; DONT-FUSE-NEXT: vstr.16 s2, [r0] 220; DONT-FUSE-NEXT: bx lr 221 222 %a = load half, half *%aa, align 2 223 %b = load half, half *%bb, align 2 224 %ret = call half @llvm.fma.f16(half 2.0, half %a, half %b) 225 store half %ret, half *%aa, align 2 226 ret void 227} 228 229define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) { 230; CHECK-LABEL: fms1: 231; CHECK: @ %bb.0: 232; CHECK-NEXT: vldr.16 s0, [r1] 233; CHECK-NEXT: vldr.16 s2, [r0] 234; CHECK-NEXT: vldr.16 s4, [r2] 235; CHECK-NEXT: vfms.f16 s4, s2, s0 236; CHECK-NEXT: vstr.16 s4, [r0] 237; CHECK-NEXT: bx lr 238; 239; DONT-FUSE-LABEL: fms1: 240; DONT-FUSE: @ %bb.0: 241; DONT-FUSE-NEXT: vldr.16 s0, [r1] 242; DONT-FUSE-NEXT: vldr.16 s2, [r0] 243; DONT-FUSE-NEXT: vldr.16 s4, [r2] 244; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0 245; DONT-FUSE-NEXT: vstr.16 s4, [r0] 246; DONT-FUSE-NEXT: bx lr 247 248 %f1 = load half, half *%a1, align 2 249 %f2 = load half, half *%a2, align 2 250 %f3 = load half, half *%a3, align 2 251 %s = fsub half -0.0, %f1 252 %ret = call half @llvm.fma.f16(half %s, half %f2, half %f3) 253 store half %ret, half *%a1, align 2 254 ret void 255} 256 257define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) { 258; CHECK-LABEL: fms2: 259; CHECK: @ %bb.0: 260; CHECK-NEXT: vldr.16 s0, [r1] 261; CHECK-NEXT: vldr.16 s2, [r0] 262; CHECK-NEXT: vldr.16 s4, [r2] 263; CHECK-NEXT: vfms.f16 s4, s2, s0 264; CHECK-NEXT: vstr.16 s4, [r0] 265; CHECK-NEXT: bx lr 266; 267; DONT-FUSE-LABEL: fms2: 268; DONT-FUSE: @ %bb.0: 269; DONT-FUSE-NEXT: vldr.16 s0, [r1] 270; DONT-FUSE-NEXT: vldr.16 s2, [r0] 271; DONT-FUSE-NEXT: vldr.16 s4, [r2] 272; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0 273; DONT-FUSE-NEXT: vstr.16 s4, [r0] 274; DONT-FUSE-NEXT: bx lr 275 276 %f1 = load half, half *%a1, align 2 277 %f2 = load half, half *%a2, align 2 278 %f3 = load half, half *%a3, align 2 279 %s = fsub half -0.0, %f1 280 %ret = call half @llvm.fma.f16(half %f2, half %s, half %f3) 281 store half %ret, half *%a1, align 2 282 ret void 283} 284 285define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) { 286; CHECK-LABEL: fnma1: 287; CHECK: @ %bb.0: 288; CHECK-NEXT: vldr.16 s0, [r1] 289; CHECK-NEXT: vldr.16 s2, [r0] 290; CHECK-NEXT: vldr.16 s4, [r2] 291; CHECK-NEXT: vfnma.f16 s4, s2, s0 292; CHECK-NEXT: vstr.16 s4, [r0] 293; CHECK-NEXT: bx lr 294; 295; DONT-FUSE-LABEL: fnma1: 296; DONT-FUSE: @ %bb.0: 297; DONT-FUSE-NEXT: vldr.16 s0, [r1] 298; DONT-FUSE-NEXT: vldr.16 s2, [r0] 299; DONT-FUSE-NEXT: vldr.16 s4, [r2] 300; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0 301; DONT-FUSE-NEXT: vstr.16 s4, [r0] 302; DONT-FUSE-NEXT: bx lr 303 304 %f1 = load half, half *%a1, align 2 305 %f2 = load half, half *%a2, align 2 306 %f3 = load half, half *%a3, align 2 307 %fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3) 308 %n1 = fsub half -0.0, %fma 309 store half %n1, half *%a1, align 2 310 ret void 311} 312 313define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) { 314; CHECK-LABEL: fnma2: 315; CHECK: @ %bb.0: 316; CHECK-NEXT: vldr.16 s0, [r1] 317; CHECK-NEXT: vldr.16 s2, [r0] 318; CHECK-NEXT: vldr.16 s4, [r2] 319; CHECK-NEXT: vfnma.f16 s4, s2, s0 320; CHECK-NEXT: vstr.16 s4, [r0] 321; CHECK-NEXT: bx lr 322; 323; DONT-FUSE-LABEL: fnma2: 324; DONT-FUSE: @ %bb.0: 325; DONT-FUSE-NEXT: vldr.16 s0, [r1] 326; DONT-FUSE-NEXT: vldr.16 s2, [r0] 327; DONT-FUSE-NEXT: vldr.16 s4, [r2] 328; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0 329; DONT-FUSE-NEXT: vstr.16 s4, [r0] 330; DONT-FUSE-NEXT: bx lr 331 332 %f1 = load half, half *%a1, align 2 333 %f2 = load half, half *%a2, align 2 334 %f3 = load half, half *%a3, align 2 335 %n1 = fsub half -0.0, %f1 336 %n3 = fsub half -0.0, %f3 337 %ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3) 338 store half %ret, half *%a1, align 2 339 ret void 340} 341 342define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) { 343; CHECK-LABEL: fnms1: 344; CHECK: @ %bb.0: 345; CHECK-NEXT: vldr.16 s0, [r1] 346; CHECK-NEXT: vldr.16 s2, [r0] 347; CHECK-NEXT: vldr.16 s4, [r2] 348; CHECK-NEXT: vfnms.f16 s4, s2, s0 349; CHECK-NEXT: vstr.16 s4, [r0] 350; CHECK-NEXT: bx lr 351; 352; DONT-FUSE-LABEL: fnms1: 353; DONT-FUSE: @ %bb.0: 354; DONT-FUSE-NEXT: vldr.16 s0, [r1] 355; DONT-FUSE-NEXT: vldr.16 s2, [r0] 356; DONT-FUSE-NEXT: vldr.16 s4, [r2] 357; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 358; DONT-FUSE-NEXT: vstr.16 s4, [r0] 359; DONT-FUSE-NEXT: bx lr 360 361 %f1 = load half, half *%a1, align 2 362 %f2 = load half, half *%a2, align 2 363 %f3 = load half, half *%a3, align 2 364 %n3 = fsub half -0.0, %f3 365 %ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3) 366 store half %ret, half *%a1, align 2 367 ret void 368} 369 370define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) { 371; CHECK-LABEL: fnms2: 372; CHECK: @ %bb.0: 373; CHECK-NEXT: vldr.16 s0, [r1] 374; CHECK-NEXT: vldr.16 s2, [r0] 375; CHECK-NEXT: vldr.16 s4, [r2] 376; CHECK-NEXT: vfnms.f16 s4, s2, s0 377; CHECK-NEXT: vstr.16 s4, [r0] 378; CHECK-NEXT: bx lr 379; 380; DONT-FUSE-LABEL: fnms2: 381; DONT-FUSE: @ %bb.0: 382; DONT-FUSE-NEXT: vldr.16 s0, [r1] 383; DONT-FUSE-NEXT: vldr.16 s2, [r0] 384; DONT-FUSE-NEXT: vldr.16 s4, [r2] 385; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 386; DONT-FUSE-NEXT: vstr.16 s4, [r0] 387; DONT-FUSE-NEXT: bx lr 388 389 %f1 = load half, half *%a1, align 2 390 %f2 = load half, half *%a2, align 2 391 %f3 = load half, half *%a3, align 2 392 %n1 = fsub half -0.0, %f1 393 %fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3) 394 %n = fsub half -0.0, %fma 395 store half %n, half *%a1, align 2 396 ret void 397} 398 399define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) { 400; CHECK-LABEL: fnms3: 401; CHECK: @ %bb.0: 402; CHECK-NEXT: vldr.16 s0, [r0] 403; CHECK-NEXT: vldr.16 s2, [r1] 404; CHECK-NEXT: vldr.16 s4, [r2] 405; CHECK-NEXT: vfnms.f16 s4, s2, s0 406; CHECK-NEXT: vstr.16 s4, [r0] 407; CHECK-NEXT: bx lr 408; 409; DONT-FUSE-LABEL: fnms3: 410; DONT-FUSE: @ %bb.0: 411; DONT-FUSE-NEXT: vldr.16 s0, [r0] 412; DONT-FUSE-NEXT: vldr.16 s2, [r1] 413; DONT-FUSE-NEXT: vldr.16 s4, [r2] 414; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 415; DONT-FUSE-NEXT: vstr.16 s4, [r0] 416; DONT-FUSE-NEXT: bx lr 417 418 %f1 = load half, half *%a1, align 2 419 %f2 = load half, half *%a2, align 2 420 %f3 = load half, half *%a3, align 2 421 %n2 = fsub half -0.0, %f2 422 %fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3) 423 %n1 = fsub half -0.0, %fma 424 store half %n1, half *%a1, align 2 425 ret void 426} 427 428 429declare half @llvm.fma.f16(half, half, half) nounwind readnone 430