1; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s 3; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s 4 5; GCN-LABEL: mixlo_simple: 6; GCN: s_waitcnt 7; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}} 8; GFX9-NEXT: s_setpc_b64 9 10; CIVI: v_mac_f32_e32 11; CIVI: v_cvt_f16_f32_e32 12define half @mixlo_simple(float %src0, float %src1, float %src2) #0 { 13 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) 14 %cvt.result = fptrunc float %result to half 15 ret half %cvt.result 16} 17 18; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f16lo: 19; GFX9: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}} 20; CI: v_mac_f32 21; CIVI: v_cvt_f16_f32 22define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 23 %src0.ext = fpext half %src0 to float 24 %src1.ext = fpext half %src1 to float 25 %src2.ext = fpext half %src2 to float 26 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 27 %cvt.result = fptrunc float %result to half 28 ret half %cvt.result 29} 30 31; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32: 32; GCN: s_waitcnt 33; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}} 34; GFX9-NEXT: s_setpc_b64 35 36; CIVI: v_mac_f32 37define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { 38 %src0.ext = fpext half %src0 to float 39 %src1.ext = fpext half %src1 to float 40 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 41 %cvt.result = fptrunc float %result to half 42 ret half %cvt.result 43} 44 45; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: 46; GCN: s_waitcnt 47; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}} 48; GFX9-NEXT: s_setpc_b64 49 50; CIVI: v_mac_f32_e32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]$}} 51define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 { 52 %src0.ext = fpext half %src0 to float 53 %src1.ext = fpext half %src1 to float 54 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 55 %cvt.result = fptrunc float %result to half 56 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0) 57 %clamp = call half @llvm.minnum.f16(half %max, half 1.0) 58 ret half %clamp 59} 60 61; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: 62; GCN: s_waitcnt 63; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}} 64; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 65; GFX9-NEXT: s_setpc_b64 66 67; CIVI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} 68define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 { 69 %src0.ext = fpext half %src0 to float 70 %src1.ext = fpext half %src1 to float 71 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 72 %max = call float @llvm.maxnum.f32(float %result, float 0.0) 73 %clamp = call float @llvm.minnum.f32(float %max, float 1.0) 74 %cvt.result = fptrunc float %clamp to half 75 ret half %cvt.result 76} 77 78; FIXME: Should abe able to avoid extra register because first 79; operation only clobbers relevant lane. 80; GCN-LABEL: {{^}}v_mad_mix_v2f32: 81; GCN: s_waitcnt 82; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}} 83; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}} 84; GFX9-NEXT: v_mov_b32_e32 v0, v3 85; GFX9-NEXT: s_setpc_b64 86define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 87 %src0.ext = fpext <2 x half> %src0 to <2 x float> 88 %src1.ext = fpext <2 x half> %src1 to <2 x float> 89 %src2.ext = fpext <2 x half> %src2 to <2 x float> 90 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 91 %cvt.result = fptrunc <2 x float> %result to <2 x half> 92 ret <2 x half> %cvt.result 93} 94 95; GCN-LABEL: {{^}}v_mad_mix_v3f32: 96; GCN: s_waitcnt 97; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] 98; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] 99; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] 100; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] 101; GFX9-NEXT: v_mov_b32_e32 v0, v6 102; GFX9-NEXT: v_mov_b32_e32 v1, v7 103; GFX9-NEXT: s_setpc_b64 104define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { 105 %src0.ext = fpext <3 x half> %src0 to <3 x float> 106 %src1.ext = fpext <3 x half> %src1 to <3 x float> 107 %src2.ext = fpext <3 x half> %src2 to <3 x float> 108 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext) 109 %cvt.result = fptrunc <3 x float> %result to <3 x half> 110 ret <3 x half> %cvt.result 111} 112 113; GCN-LABEL: {{^}}v_mad_mix_v4f32: 114; GCN: s_waitcnt 115; GFX9-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] 116; GFX9-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] 117; GFX9-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] 118; GFX9-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] 119; GFX9-NEXT: v_mov_b32_e32 v0, v7 120; GFX9-NEXT: v_mov_b32_e32 v1, v6 121; GFX9-NEXT: s_setpc_b64 122define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { 123 %src0.ext = fpext <4 x half> %src0 to <4 x float> 124 %src1.ext = fpext <4 x half> %src1 to <4 x float> 125 %src2.ext = fpext <4 x half> %src2 to <4 x float> 126 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext) 127 %cvt.result = fptrunc <4 x float> %result to <4 x half> 128 ret <4 x half> %cvt.result 129} 130 131; FIXME: Fold clamp 132; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt: 133; GFX9: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 134; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}} 135; GFX9-NEXT: v_mov_b32_e32 v0, v3 136; GFX9-NEXT: s_setpc_b64 137define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 138 %src0.ext = fpext <2 x half> %src0 to <2 x float> 139 %src1.ext = fpext <2 x half> %src1 to <2 x float> 140 %src2.ext = fpext <2 x half> %src2 to <2 x float> 141 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 142 %cvt.result = fptrunc <2 x float> %result to <2 x half> 143 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %cvt.result, <2 x half> zeroinitializer) 144 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 145 ret <2 x half> %clamp 146} 147 148; FIXME: Should be packed into 2 registers per argument? 149; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt: 150; GCN: s_waitcnt 151; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp 152; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp 153; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 154; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 155; GFX9-NEXT: v_mov_b32_e32 v0, v6 156; GFX9-NEXT: v_mov_b32_e32 v1, v7 157; GFX9-NEXT: s_setpc_b64 158define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { 159 %src0.ext = fpext <3 x half> %src0 to <3 x float> 160 %src1.ext = fpext <3 x half> %src1 to <3 x float> 161 %src2.ext = fpext <3 x half> %src2 to <3 x float> 162 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext) 163 %cvt.result = fptrunc <3 x float> %result to <3 x half> 164 %max = call <3 x half> @llvm.maxnum.v3f16(<3 x half> %cvt.result, <3 x half> zeroinitializer) 165 %clamp = call <3 x half> @llvm.minnum.v3f16(<3 x half> %max, <3 x half> <half 1.0, half 1.0, half 1.0>) 166 ret <3 x half> %clamp 167} 168 169; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_postcvt: 170; GCN: s_waitcnt 171; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp 172; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 173; GFX9-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp 174; GFX9-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 175; GFX9-NEXT: v_mov_b32_e32 v0, v6 176; GFX9-NEXT: v_mov_b32_e32 v1, v2 177; GFX9-NEXT: s_setpc_b64 178define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { 179 %src0.ext = fpext <4 x half> %src0 to <4 x float> 180 %src1.ext = fpext <4 x half> %src1 to <4 x float> 181 %src2.ext = fpext <4 x half> %src2 to <4 x float> 182 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext) 183 %cvt.result = fptrunc <4 x float> %result to <4 x half> 184 %max = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %cvt.result, <4 x half> zeroinitializer) 185 %clamp = call <4 x half> @llvm.minnum.v4f16(<4 x half> %max, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>) 186 ret <4 x half> %clamp 187} 188 189; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_lo: 190; GCN: s_waitcnt 191; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp 192; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 193; GFX9-NEXT: v_mov_b32_e32 v0, v3 194; GFX9-NEXT: s_setpc_b64 195define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 196 %src0.ext = fpext <2 x half> %src0 to <2 x float> 197 %src1.ext = fpext <2 x half> %src1 to <2 x float> 198 %src2.ext = fpext <2 x half> %src2 to <2 x float> 199 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 200 %cvt.result = fptrunc <2 x float> %result to <2 x half> 201 %cvt.lo = extractelement <2 x half> %cvt.result, i32 0 202 %max.lo = call half @llvm.maxnum.f16(half %cvt.lo, half 0.0) 203 %clamp.lo = call half @llvm.minnum.f16(half %max.lo, half 1.0) 204 %insert = insertelement <2 x half> %cvt.result, half %clamp.lo, i32 0 205 ret <2 x half> %insert 206} 207 208; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_hi: 209; GCN: s_waitcnt 210; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] 211; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 212; GFX9-NEXT: v_mov_b32_e32 v0, v3 213; GFX9-NEXT: s_setpc_b64 214define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 215 %src0.ext = fpext <2 x half> %src0 to <2 x float> 216 %src1.ext = fpext <2 x half> %src1 to <2 x float> 217 %src2.ext = fpext <2 x half> %src2 to <2 x float> 218 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 219 %cvt.result = fptrunc <2 x float> %result to <2 x half> 220 %cvt.hi = extractelement <2 x half> %cvt.result, i32 1 221 %max.hi = call half @llvm.maxnum.f16(half %cvt.hi, half 0.0) 222 %clamp.hi = call half @llvm.minnum.f16(half %max.hi, half 1.0) 223 %insert = insertelement <2 x half> %cvt.result, half %clamp.hi, i32 1 224 ret <2 x half> %insert 225} 226 227; FIXME: Should be able to use mixlo/mixhi 228; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_precvt: 229; GFX9: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp 230; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 231; GFX9: v_cvt_f16_f32_e32 v1, v3 232; GFX9: v_cvt_f16_f32_e32 v0, v0 233; GFX9: v_and_b32_e32 v1, 0xffff, v1 234; GFX9: v_lshl_or_b32 v0, v0, 16, v1 235; GFX9: s_setpc_b64 236define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 237 %src0.ext = fpext <2 x half> %src0 to <2 x float> 238 %src1.ext = fpext <2 x half> %src1 to <2 x float> 239 %src2.ext = fpext <2 x half> %src2 to <2 x float> 240 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 241 %max = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %result, <2 x float> zeroinitializer) 242 %clamp = call <2 x float> @llvm.minnum.v2f32(<2 x float> %max, <2 x float> <float 1.0, float 1.0>) 243 %cvt.result = fptrunc <2 x float> %clamp to <2 x half> 244 ret <2 x half> %cvt.result 245} 246 247; FIXME: Handling undef 4th component 248; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_precvt: 249; GFX9: v_mad_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 250; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp 251; GFX9: v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp 252; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] 253 254; GFX9: v_cvt_f16_f32 255; GFX9: v_cvt_f16_f32 256; GFX9: v_cvt_f16_f32 257; GFX9: v_cvt_f16_f32 258define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { 259 %src0.ext = fpext <3 x half> %src0 to <3 x float> 260 %src1.ext = fpext <3 x half> %src1 to <3 x float> 261 %src2.ext = fpext <3 x half> %src2 to <3 x float> 262 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext) 263 %max = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %result, <3 x float> zeroinitializer) 264 %clamp = call <3 x float> @llvm.minnum.v3f32(<3 x float> %max, <3 x float> <float 1.0, float 1.0, float 1.0>) 265 %cvt.result = fptrunc <3 x float> %clamp to <3 x half> 266 ret <3 x half> %cvt.result 267} 268 269; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_precvt: 270; GFX9: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 271; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp 272; GFX9: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 273; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp 274 275; GFX9: v_cvt_f16_f32 276; GFX9: v_cvt_f16_f32 277; GFX9: v_cvt_f16_f32 278; GFX9: v_cvt_f16_f32 279define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { 280 %src0.ext = fpext <4 x half> %src0 to <4 x float> 281 %src1.ext = fpext <4 x half> %src1 to <4 x float> 282 %src2.ext = fpext <4 x half> %src2 to <4 x float> 283 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext) 284 %max = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %result, <4 x float> zeroinitializer) 285 %clamp = call <4 x float> @llvm.minnum.v4f32(<4 x float> %max, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>) 286 %cvt.result = fptrunc <4 x float> %clamp to <4 x half> 287 ret <4 x half> %cvt.result 288} 289 290declare half @llvm.minnum.f16(half, half) #1 291declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1 292declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1 293declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) #1 294 295declare half @llvm.maxnum.f16(half, half) #1 296declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1 297declare <3 x half> @llvm.maxnum.v3f16(<3 x half>, <3 x half>) #1 298declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>) #1 299 300declare float @llvm.minnum.f32(float, float) #1 301declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1 302declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1 303declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1 304 305declare float @llvm.maxnum.f32(float, float) #1 306declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1 307declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1 308declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1 309 310declare float @llvm.fmuladd.f32(float, float, float) #1 311declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 312declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1 313declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 314 315attributes #0 = { nounwind } 316attributes #1 = { nounwind readnone speculatable } 317