1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s 3; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s 4 5; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: 6; GFX9: s_waitcnt 7; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 8; GFX9-NEXT: s_setpc_b64 9define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) #0 { 10 %src0.ext = fpext half %src0 to float 11 %src1.ext = fpext half %src1 to float 12 %src2.ext = fpext half %src2 to float 13 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 14 %cvt.result = fptrunc float %result to half 15 %vec.result = insertelement <2 x half> undef, half %cvt.result, i32 1 16 ret <2 x half> %vec.result 17} 18 19; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: 20; GFX9: s_waitcnt 21; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 22; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 23; GFX9-NEXT: v_mov_b32_e32 v0, v3 24; GFX9-NEXT: s_setpc_b64 25define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %src1, half %src2) #0 { 26 %src0.ext = fpext half %src0 to float 27 %src1.ext = fpext half %src1 to float 28 %src2.ext = fpext half %src2 to float 29 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 30 %cvt.result = fptrunc float %result to half 31 %vec.result = insertelement <2 x half> <half 1.0, half undef>, half %cvt.result, i32 1 32 ret <2 x half> %vec.result 33} 34 35; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: 36; GFX9: s_waitcnt 37; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 38; GFX9-NEXT: v_mov_b32_e32 v0, v3 39; GFX9-NEXT: s_setpc_b64 40define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src1, half %src2, half %lo) #0 { 41 %src0.ext = fpext half %src0 to float 42 %src1.ext = fpext half %src1 to float 43 %src2.ext = fpext half %src2 to float 44 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 45 %cvt.result = fptrunc float %result to half 46 %vec = insertelement <2 x half> undef, half %lo, i32 0 47 %vec.result = insertelement <2 x half> %vec, half %cvt.result, i32 1 48 ret <2 x half> %vec.result 49} 50 51; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: 52; GFX9: s_waitcnt 53; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] 54; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 55; GFX9-NEXT: s_setpc_b64 56define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 { 57 %src0.ext = fpext half %src0 to float 58 %src1.ext = fpext half %src1 to float 59 %src2.ext = fpext half %src2 to float 60 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 61 %cvt.result = fptrunc float %result to half 62 %bc = bitcast half %cvt.result to i16 63 %ext = zext i16 %bc to i32 64 %shr = shl i32 %ext, 16 65 ret i32 %shr 66} 67 68; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: 69; GFX9: s_waitcnt 70; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] 71; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 72; GFX9-NEXT: s_setpc_b64 73define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 { 74 %src0.ext = fpext half %src0 to float 75 %src1.ext = fpext half %src1 to float 76 %src2.ext = fpext half %src2 to float 77 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 78 %cvt.result = fptrunc float %result to half 79 %bc = bitcast half %cvt.result to i16 80 %ext = sext i16 %bc to i32 81 %shr = shl i32 %ext, 16 82 ret i32 %shr 83} 84 85; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: 86; GCN: s_waitcnt 87; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 88; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 89; GFX9-NEXT: s_setpc_b64 90define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 { 91 %src0.ext = fpext half %src0 to float 92 %src1.ext = fpext half %src1 to float 93 %src2.ext = fpext half %src2 to float 94 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 95 %max = call float @llvm.maxnum.f32(float %result, float 0.0) 96 %clamp = call float @llvm.minnum.f32(float %max, float 1.0) 97 %cvt.result = fptrunc float %clamp to half 98 %vec.result = insertelement <2 x half> undef, half %cvt.result, i32 1 99 ret <2 x half> %vec.result 100} 101 102; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: 103; GCN: s_waitcnt 104; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 105; GFX9-NEXT: s_setpc_b64 106define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half %src0, half %src1, half %src2) #0 { 107 %src0.ext = fpext half %src0 to float 108 %src1.ext = fpext half %src1 to float 109 %src2.ext = fpext half %src2 to float 110 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 111 %cvt.result = fptrunc float %result to half 112 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0) 113 %clamp = call half @llvm.minnum.f16(half %max, half 1.0) 114 %vec.result = insertelement <2 x half> undef, half %clamp, i32 1 115 ret <2 x half> %vec.result 116} 117 118 119; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: 120; GCN: s_waitcnt 121; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}} 122; GFX9-NEXT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, v3 123; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 124; GFX9-NEXT: s_waitcnt vmcnt(0) 125; GFX9-NEXT: s_setpc_b64 126define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use(half %src0, half %src1, half %src2) #0 { 127 %src0.ext = fpext half %src0 to float 128 %src1.ext = fpext half %src1 to float 129 %src2.ext = fpext half %src2 to float 130 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 131 %cvt.result = fptrunc float %result to half 132 store volatile half %cvt.result, half addrspace(1)* undef 133 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0) 134 %clamp = call half @llvm.minnum.f16(half %max, half 1.0) 135 %vec.result = insertelement <2 x half> undef, half %clamp, i32 1 136 ret <2 x half> %vec.result 137} 138 139declare half @llvm.minnum.f16(half, half) #1 140declare half @llvm.maxnum.f16(half, half) #1 141declare float @llvm.minnum.f32(float, float) #1 142declare float @llvm.maxnum.f32(float, float) #1 143declare float @llvm.fmuladd.f32(float, float, float) #1 144declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 145 146attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 147attributes #1 = { nounwind readnone speculatable } 148