1; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s 2; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s 3; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s 4; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s 5 6; fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 7 8; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32: 9; GCN: s_waitcnt 10; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}} 11; GFX9-F32FLUSH-NEXT: s_setpc_b64 12 13; GFX9-F32DENORM-NEXT: v_mul_f16 14; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 15; GFX9-F32DENORM-NEXT: v_add_f32 16define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 17entry: 18 %mul = fmul half %x, %y 19 %mul.ext = fpext half %mul to float 20 %add = fadd float %mul.ext, %z 21 ret float %add 22} 23 24; f16->f64 is not free. 25; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f64: 26; GFX89: v_mul_f16 27; GFX89: v_cvt_f32_f16 28; GFX89: v_cvt_f64_f32 29; GFX89: v_add_f64 30define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 { 31entry: 32 %mul = fmul half %x, %y 33 %mul.ext = fpext half %mul to double 34 %add = fadd double %mul.ext, %z 35 ret double %add 36} 37 38; f32->f64 is not free. 39; GCN-LABEL: {{^}}fadd_fpext_fmul_f32_to_f64: 40; GCN: v_mul_f32 41; GCN: v_cvt_f64_f32 42; GCN: v_add_f64 43define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 { 44entry: 45 %mul = fmul float %x, %y 46 %mul.ext = fpext float %mul to double 47 %add = fadd double %mul.ext, %z 48 ret double %add 49} 50 51; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) 52; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32_commute: 53; GCN: s_waitcnt 54; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}} 55; GFX9-F32FLUSH-NEXT: s_setpc_b64 56 57; GFX9-F32DENORM-NEXT: v_mul_f16 58; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 59; GFX9-F32DENORM-NEXT: v_add_f32 60; GFX9-F32DENORM-NEXT: s_setpc_b64 61define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 { 62entry: 63 %mul = fmul half %x, %y 64 %mul.ext = fpext half %mul to float 65 %add = fadd float %z, %mul.ext 66 ret float %add 67} 68 69; fold (fadd (fma x, y, (fpext (fmul u, v))), z) 70; -> (fma x, y, (fma (fpext u), (fpext v), z)) 71 72; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32: 73; GCN: s_waitcnt 74; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 75; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 76; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 77; GFX9-F32FLUSH-NEXT: s_setpc_b64 78 79; GFX9-F32DENORM-NEXT: v_mul_f16 80; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 81; GFX9-F32DENORM-NEXT: v_fma_f32 82; GFX9-F32DENORM-NEXT: v_add_f32 83; GFX9-F32DENORM-NEXT: s_setpc_b64 84define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 85entry: 86 %mul = fmul half %u, %v 87 %mul.ext = fpext half %mul to float 88 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 89 %add = fadd float %fma, %z 90 ret float %add 91} 92 93; fold (fadd x, (fma y, z, (fpext (fmul u, v))) 94; -> (fma y, z, (fma (fpext u), (fpext v), x)) 95; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32_commute: 96; GCN: s_waitcnt 97; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 98; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 99; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 100; GFX9-F32FLUSH-NEXT: s_setpc_b64 101 102; GFX9-F32DENORM-NEXT: v_mul_f16 103; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 104; GFX9-F32DENORM-NEXT: v_fma_f32 105; GFX9-F32DENORM-NEXT: v_add_f32 106; GFX9-F32DENORM-NEXT: s_setpc_b64 107define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 { 108entry: 109 %mul = fmul half %u, %v 110 %mul.ext = fpext half %mul to float 111 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 112 %add = fadd float %z, %fma 113 ret float %add 114} 115 116; GCN-LABEL: {{^}}fadd_fmad_fpext_fmul_f16_to_f32: 117; GCN: s_waitcnt 118; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 119; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 120; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 121; GFX9-F32FLUSH-NEXT: s_setpc_b64 122 123; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 124; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 125; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 126define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 127entry: 128 %mul = fmul half %u, %v 129 %mul.ext = fpext half %mul to float 130 %mul1 = fmul contract float %x, %y 131 %fmad = fadd contract float %mul1, %mul.ext 132 %add = fadd float %fmad, %z 133 ret float %add 134} 135 136; fold (fadd (fma x, y, (fpext (fmul u, v))), z) 137; -> (fma x, y, (fma (fpext u), (fpext v), z)) 138 139; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32: 140; GCN: s_waitcnt 141; GFX89: v_mul_f16 142; GFX89: v_cvt_f32_f16 143; GFX89: v_fma_f32 144; GFX89: v_add_f32 145define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 146entry: 147 %mul = fmul contract half %u, %v 148 %mul.ext = fpext half %mul to float 149 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext) 150 %add = fadd float %fma, %z 151 ret float %add 152} 153 154; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32_commute: 155; GCN: s_waitcnt 156; GFX89: v_mul_f16 157; GFX89: v_cvt_f32_f16 158; GFX89: v_fma_f32 159; GFX89: v_add_f32 160define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 { 161entry: 162 %mul = fmul contract half %u, %v 163 %mul.ext = fpext half %mul to float 164 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext) 165 %add = fadd float %z, %fma 166 ret float %add 167} 168 169; fold (fadd x, (fpext (fma y, z, (fmul u, v))) 170; -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) 171 172; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32: 173; GFX9: v_mul_f16 174; GFX9: v_fma_legacy_f16 175; GFX9: v_cvt_f32_f16 176; GFX9: v_add_f32_e32 177define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { 178entry: 179 %mul = fmul contract half %u, %v 180 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul) 181 %ext.fma = fpext half %fma to float 182 %add = fadd float %x, %ext.fma 183 ret float %add 184} 185 186; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32: 187; GFX9: v_mul_f16 188; GFX9: v_fma_legacy_f16 189; GFX9: v_cvt_f32_f16 190; GFX9: v_add_f32_e32 191define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { 192entry: 193 %mul = fmul contract half %u, %v 194 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul) 195 %ext.fma = fpext half %fma to float 196 %add = fadd float %x, %ext.fma 197 ret float %add 198} 199 200; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute: 201; GFX9: v_mul_f16 202; GFX9: v_fma_legacy_f16 203; GFX9: v_cvt_f32_f16 204; GFX9: v_add_f32_e32 205define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { 206entry: 207 %mul = fmul contract half %u, %v 208 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul) 209 %ext.fma = fpext half %fma to float 210 %add = fadd float %ext.fma, %x 211 ret float %add 212} 213 214; fold (fsub (fpext (fmul x, y)), z) 215; -> (fma (fpext x), (fpext y), (fneg z)) 216 217; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32: 218; GCN: s_waitcnt 219; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}} 220; GFX9-F32FLUSH-NEXT: s_setpc_b64 221 222; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 223; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 224; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 225; GFX9-F32DENORM-NEXT: s_setpc_b64 226define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 227entry: 228 %mul = fmul half %x, %y 229 %mul.ext = fpext half %mul to float 230 %add = fsub float %mul.ext, %z 231 ret float %add 232} 233 234; fold (fsub x, (fpext (fmul y, z))) 235; -> (fma (fneg (fpext y)), (fpext z), x) 236 237; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32_commute: 238; GCN: s_waitcnt 239; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] 240; GFX9-F32FLUSH-NEXT: s_setpc_b64 241 242; GFX9-F32DENORM-NEXT: v_mul_f16_e32 243; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 244; GFX9-F32DENORM-NEXT: v_sub_f32_e32 245; GFX9-F32DENORM-NEXT: s_setpc_b64 246define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 { 247entry: 248 %mul = fmul contract half %y, %z 249 %mul.ext = fpext half %mul to float 250 %add = fsub contract float %x, %mul.ext 251 ret float %add 252} 253 254; fold (fsub (fpext (fneg (fmul, x, y))), z) 255; -> (fneg (fma (fpext x), (fpext y), z)) 256 257; GCN-LABEL: {{^}}fsub_fpext_fneg_fmul_f16_to_f32: 258; GCN: s_waitcnt 259; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}} 260; GFX9-F32FLUSH-NEXT: s_setpc_b64 261 262; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 263; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 264; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 265; GFX9-F32DENORM-NEXT: s_setpc_b64 266define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 267entry: 268 %mul = fmul half %x, %y 269 %neg.mul = fsub half -0.0, %mul 270 %neg.mul.ext = fpext half %neg.mul to float 271 %add = fsub float %neg.mul.ext, %z 272 ret float %add 273} 274 275; fold (fsub (fneg (fpext (fmul, x, y))), z) 276; -> (fneg (fma (fpext x)), (fpext y), z) 277 278; GCN-LABEL: {{^}}fsub_fneg_fpext_fmul_f16_to_f32: 279; GCN: s_waitcnt 280; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}} 281; GFX9-F32FLUSH-NEXT: s_setpc_b64 282 283; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 284; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 285; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 286; GFX9-F32DENORM-NEXT: s_setpc_b64 287define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 288entry: 289 %mul = fmul half %x, %y 290 %mul.ext = fpext half %mul to float 291 %neg.mul.ext = fsub float -0.0, %mul.ext 292 %add = fsub float %neg.mul.ext, %z 293 ret float %add 294} 295 296; fold (fsub (fmad x, y, (fpext (fmul u, v))), z) 297; -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z))) 298; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32: 299; GCN: s_waitcnt 300; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v3, v4, -v2 op_sel_hi:[1,1,0]{{$}} 301; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 302; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 303; GFX9-F32FLUSH-NEXT: s_setpc_b64 304 305; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 306; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 307; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3 308; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 309; GFX9-F32DENORM-NEXT: s_setpc_b64 310define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 { 311entry: 312 %mul = fmul half %u, %v 313 %mul.ext = fpext half %mul to float 314 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 315 %add = fsub float %fma, %z 316 ret float %add 317} 318 319; fold (fsub (fpext (fmad x, y, (fmul u, v))), z) 320; -> (fmad (fpext x), (fpext y), 321; (fmad (fpext u), (fpext v), (fneg z))) 322 323; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32: 324; GFX9: v_mul_f16 325; GFX9: v_fma_legacy_f16 326; GFX9: v_cvt_f32_f16 327; GFX9: v_sub_f32 328; GCN: s_setpc_b64 329define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 { 330entry: 331 %mul = fmul half %u, %v 332 %fma = call half @llvm.fmuladd.f16(half %x, half %y, half %mul) 333 %fma.ext = fpext half %fma to float 334 %add = fsub float %fma.ext, %z 335 ret float %add 336} 337 338; fold (fsub x, (fmad y, z, (fpext (fmul u, v)))) 339; -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x)) 340; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32_commute: 341; GCN: s_waitcnt 342; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v3, v4, v0 op_sel_hi:[1,1,0]{{$}} 343; GFX9-F32FLUSH-NEXT: v_mad_f32 v0, -v1, v2, v0{{$}} 344; GFX9-F32FLUSH-NEXT: s_setpc_b64 345 346; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 347; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 348; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3 349; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1 350; GFX9-F32DENORM-NEXT: s_setpc_b64 351define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 { 352entry: 353 %mul = fmul half %u, %v 354 %mul.ext = fpext half %mul to float 355 %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext) 356 %add = fsub float %x, %fma 357 ret float %add 358} 359 360; fold (fsub x, (fpext (fma y, z, (fmul u, v)))) 361; -> (fma (fneg (fpext y)), (fpext z), 362; (fma (fneg (fpext u)), (fpext v), x)) 363; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute: 364; GCN: s_waitcnt 365; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4 366; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3 367; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 368; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1 369; GFX9-NEXT: s_setpc_b64 370define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { 371entry: 372 %mul = fmul half %u, %v 373 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul) 374 %fma.ext = fpext half %fma to float 375 %add = fsub float %x, %fma.ext 376 ret float %add 377} 378 379declare float @llvm.fmuladd.f32(float, float, float) #0 380declare float @llvm.fma.f32(float, float, float) #0 381declare half @llvm.fmuladd.f16(half, half, half) #0 382declare half @llvm.fma.f16(half, half, half) #0 383 384attributes #0 = { nounwind readnone speculatable } 385