1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s 3; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s 4 5; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: 6; GFX9: s_waitcnt 7; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 8; GFX9-NEXT: s_setpc_b64 9define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) #0 { 10 %src0.ext = fpext half %src0 to float 11 %src1.ext = fpext half %src1 to float 12 %src2.ext = fpext half %src2 to float 13 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 14 %cvt.result = fptrunc float %result to half 15 %vec.result = insertelement <2 x half> undef, half %cvt.result, i32 1 16 ret <2 x half> %vec.result 17} 18 19; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: 20; GFX9: s_waitcnt 21; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 22; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 23; GFX9-NEXT: v_mov_b32_e32 v0, v3 24; GFX9-NEXT: s_setpc_b64 25define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %src1, half %src2) #0 { 26 %src0.ext = fpext half %src0 to float 27 %src1.ext = fpext half %src1 to float 28 %src2.ext = fpext half %src2 to float 29 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 30 %cvt.result = fptrunc float %result to half 31 %vec.result = insertelement <2 x half> <half 1.0, half undef>, half %cvt.result, i32 1 32 ret <2 x half> %vec.result 33} 34 35; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: 36; GFX9: s_waitcnt 37; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 38; GFX9-NEXT: v_mov_b32_e32 v0, v3 39; GFX9-NEXT: s_setpc_b64 40define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src1, half %src2, half %lo) #0 { 41 %src0.ext = fpext half %src0 to float 42 %src1.ext = fpext half %src1 to float 43 %src2.ext = fpext half %src2 to float 44 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 45 %cvt.result = fptrunc float %result to half 46 %vec = insertelement <2 x half> undef, half %lo, i32 0 47 %vec.result = insertelement <2 x half> %vec, half %cvt.result, i32 1 48 ret <2 x half> %vec.result 49} 50 51; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: 52; GFX9: v_mov_b32_e32 v3, 0 53; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 54; GFX9-NEXT: v_mov_b32_e32 v0, v3 55; GFX9-NEXT: s_setpc_b64 56define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 { 57 %src0.ext = fpext half %src0 to float 58 %src1.ext = fpext half %src1 to float 59 %src2.ext = fpext half %src2 to float 60 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 61 %cvt.result = fptrunc float %result to half 62 %bc = bitcast half %cvt.result to i16 63 %ext = zext i16 %bc to i32 64 %shr = shl i32 %ext, 16 65 ret i32 %shr 66} 67 68; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: 69; GFX9: v_mov_b32_e32 v3, 0 70; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 71; GFX9-NEXT: v_mov_b32_e32 v0, v3 72; GFX9-NEXT: s_setpc_b64 73define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 { 74 %src0.ext = fpext half %src0 to float 75 %src1.ext = fpext half %src1 to float 76 %src2.ext = fpext half %src2 to float 77 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 78 %cvt.result = fptrunc float %result to half 79 %bc = bitcast half %cvt.result to i16 80 %ext = sext i16 %bc to i32 81 %shr = shl i32 %ext, 16 82 ret i32 %shr 83} 84 85; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: 86; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 87; GFX9: v_cvt_f16_f32_e32 v0, v0 88define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 { 89 %src0.ext = fpext half %src0 to float 90 %src1.ext = fpext half %src1 to float 91 %src2.ext = fpext half %src2 to float 92 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 93 %max = call float @llvm.maxnum.f32(float %result, float 0.0) 94 %clamp = call float @llvm.minnum.f32(float %max, float 1.0) 95 %cvt.result = fptrunc float %clamp to half 96 %vec.result = insertelement <2 x half> undef, half %cvt.result, i32 1 97 ret <2 x half> %vec.result 98} 99 100; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: 101; GCN: s_waitcnt 102; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 103; GFX9-NEXT: s_setpc_b64 104define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half %src0, half %src1, half %src2) #0 { 105 %src0.ext = fpext half %src0 to float 106 %src1.ext = fpext half %src1 to float 107 %src2.ext = fpext half %src2 to float 108 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 109 %cvt.result = fptrunc float %result to half 110 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0) 111 %clamp = call half @llvm.minnum.f16(half %max, half 1.0) 112 %vec.result = insertelement <2 x half> undef, half %clamp, i32 1 113 ret <2 x half> %vec.result 114} 115 116 117; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: 118; GCN: s_waitcnt 119; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}} 120; GFX9-NEXT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, v3 121; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 122; GFX9-NEXT: s_waitcnt vmcnt(0) 123; GFX9-NEXT: s_setpc_b64 124define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use(half %src0, half %src1, half %src2) #0 { 125 %src0.ext = fpext half %src0 to float 126 %src1.ext = fpext half %src1 to float 127 %src2.ext = fpext half %src2 to float 128 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 129 %cvt.result = fptrunc float %result to half 130 store volatile half %cvt.result, half addrspace(1)* undef 131 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0) 132 %clamp = call half @llvm.minnum.f16(half %max, half 1.0) 133 %vec.result = insertelement <2 x half> undef, half %clamp, i32 1 134 ret <2 x half> %vec.result 135} 136 137declare half @llvm.minnum.f16(half, half) #1 138declare half @llvm.maxnum.f16(half, half) #1 139declare float @llvm.minnum.f32(float, float) #1 140declare float @llvm.maxnum.f32(float, float) #1 141declare float @llvm.fmuladd.f32(float, float, float) #1 142declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 143 144attributes #0 = { nounwind } 145attributes #1 = { nounwind readnone speculatable } 146