1; RUN: llc -march=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s 2; RUN: llc -march=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s 3 4; RUN: llc -march=amdgcn -mattr=+fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s 5; RUN: llc -march=amdgcn -mattr=-fast-fmaf,+mad-mac-f32-insts -denormal-fp-math-f32=ieee -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s 6 7; FIXME: This should also fold when fma is actually fast if an FMA 8; exists in the original program. 9 10; (fadd (fma x, y, (fmul u, v), z) -> (fma x, y (fma u, v, z)) 11 12; GCN-LABEL: {{^}}fast_add_fmuladd_fmul: 13; GCN: buffer_load_dword [[X:v[0-9]+]] 14; GCN: buffer_load_dword [[Y:v[0-9]+]] 15; GCN: buffer_load_dword [[Z:v[0-9]+]] 16; GCN: buffer_load_dword [[U:v[0-9]+]] 17; GCN: buffer_load_dword [[V:v[0-9]+]] 18 19; GCN-FLUSH: v_mac_f32_e32 [[Z]], [[U]], [[V]] 20; GCN-FLUSH-NEXT: v_mac_f32_e32 [[Z]], [[X]], [[Y]] 21; GCN-FLUSH-NEXT: buffer_store_dword [[Z]] 22 23; GCN-FASTFMA: v_fma_f32 [[FMA0:v[0-9]+]], [[U]], [[V]], [[Z]] 24; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[FMA0]] 25; GCN-FASTFMA: buffer_store_dword [[FMA1]] 26 27; GCN-SLOWFMA: v_mul_f32_e32 28; GCN-SLOWFMA: v_mul_f32_e32 29; GCN-SLOWFMA: v_add_f32_e32 30; GCN-SLOWFMA: v_add_f32_e32 31define amdgpu_kernel void @fast_add_fmuladd_fmul() #0 { 32 %x = load volatile float, float addrspace(1)* undef 33 %y = load volatile float, float addrspace(1)* undef 34 %z = load volatile float, float addrspace(1)* undef 35 %u = load volatile float, float addrspace(1)* undef 36 %v = load volatile float, float addrspace(1)* undef 37 %mul.u.v = fmul fast float %u, %v 38 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 39 %add = fadd fast float %fma, %z 40 store volatile float %add, float addrspace(1)* undef 41 ret void 42} 43 44; GCN-LABEL: {{^}}fast_sub_fmuladd_fmul: 45; GCN: buffer_load_dword [[X:v[0-9]+]] 46; GCN: buffer_load_dword [[Y:v[0-9]+]] 47; GCN: buffer_load_dword [[Z:v[0-9]+]] 48; GCN: buffer_load_dword [[U:v[0-9]+]] 49; GCN: buffer_load_dword [[V:v[0-9]+]] 50 51; GCN-FLUSH: v_mad_f32 [[TMP:v[0-9]]], [[U]], [[V]], -[[Z]] 52; GCN-FLUSH-NEXT: v_mac_f32_e32 [[TMP]], [[X]], [[Y]] 53; GCN-FLUSH-NEXT: buffer_store_dword [[Z]] 54 55; GCN-FASTFMA: v_fma_f32 [[FMA0:v[0-9]+]], [[U]], [[V]], -[[Z]] 56; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[FMA0]] 57; GCN-FASTFMA: buffer_store_dword [[FMA1]] 58define amdgpu_kernel void @fast_sub_fmuladd_fmul() #0 { 59 %x = load volatile float, float addrspace(1)* undef 60 %y = load volatile float, float addrspace(1)* undef 61 %z = load volatile float, float addrspace(1)* undef 62 %u = load volatile float, float addrspace(1)* undef 63 %v = load volatile float, float addrspace(1)* undef 64 %mul.u.v = fmul fast float %u, %v 65 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 66 %add = fsub fast float %fma, %z 67 store volatile float %add, float addrspace(1)* undef 68 ret void 69} 70 71; GCN-LABEL: {{^}}fast_add_fmuladd_fmul_multi_use_mul: 72; GCN: buffer_load_dword [[X:v[0-9]+]] 73; GCN: buffer_load_dword [[Y:v[0-9]+]] 74; GCN: buffer_load_dword [[Z:v[0-9]+]] 75; GCN: buffer_load_dword [[U:v[0-9]+]] 76; GCN: buffer_load_dword [[V:v[0-9]+]] 77 78; GCN-FLUSH-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]] 79; GCN-FLUSH-DAG: v_mac_f32_e32 [[MUL]], [[X]], [[Y]] 80; GCN-FLUSH: v_add_f32_e32 v{{[0-9]+}}, [[U]], [[Z]] 81 82; GCN-FASTFMA: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]] 83; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[MUL]] 84; GCN-FASTFMA: v_add_f32_e32 v{{[0-9]+}}, [[FMA1]], [[Z]] 85 86; GCN-SLOWFMA: v_mul_f32_e32 87; GCN-SLOWFMA: v_mul_f32_e32 88; GCN-SLOWFMA: v_add_f32_e32 89; GCN-SLOWFMA: v_add_f32_e32 90define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul() #0 { 91 %x = load volatile float, float addrspace(1)* undef 92 %y = load volatile float, float addrspace(1)* undef 93 %z = load volatile float, float addrspace(1)* undef 94 %u = load volatile float, float addrspace(1)* undef 95 %v = load volatile float, float addrspace(1)* undef 96 %mul.u.v = fmul fast float %u, %v 97 store volatile float %mul.u.v, float addrspace(1)* undef 98 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 99 %add = fadd fast float %fma, %z 100 store volatile float %add, float addrspace(1)* undef 101 ret void 102} 103 104; GCN-LABEL: {{^}}fast_add_fmuladd_fmul_multi_use_mul_commute: 105; GCN: buffer_load_dword [[X:v[0-9]+]] 106; GCN: buffer_load_dword [[Y:v[0-9]+]] 107; GCN: buffer_load_dword [[Z:v[0-9]+]] 108; GCN: buffer_load_dword [[U:v[0-9]+]] 109; GCN: buffer_load_dword [[V:v[0-9]+]] 110 111; GCN-FLUSH-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]] 112; GCN-FLUSH-DAG: v_mac_f32_e32 [[MUL]], [[X]], [[Y]] 113; GCN-FLUSH: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[U]] 114 115; GCN-FASTFMA: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]] 116; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[MUL]] 117; GCN-FASTFMA: v_add_f32_e32 v{{[0-9]+}}, [[Z]], [[FMA1]] 118 119; GCN-SLOWFMA: v_mul_f32_e32 120; GCN-SLOWFMA: v_mul_f32_e32 121; GCN-SLOWFMA: v_add_f32_e32 122; GCN-SLOWFMA: v_add_f32_e32 123define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul_commute() #0 { 124 %x = load volatile float, float addrspace(1)* undef 125 %y = load volatile float, float addrspace(1)* undef 126 %z = load volatile float, float addrspace(1)* undef 127 %u = load volatile float, float addrspace(1)* undef 128 %v = load volatile float, float addrspace(1)* undef 129 %mul.u.v = fmul fast float %u, %v 130 store volatile float %mul.u.v, float addrspace(1)* undef 131 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 132 %add = fadd fast float %z, %fma 133 store volatile float %add, float addrspace(1)* undef 134 ret void 135} 136 137; GCN-LABEL: {{^}}fast_add_fmuladd_fmul_multi_use_fmuladd: 138; GCN: buffer_load_dword [[X:v[0-9]+]] 139; GCN: buffer_load_dword [[Y:v[0-9]+]] 140; GCN: buffer_load_dword [[Z:v[0-9]+]] 141; GCN: buffer_load_dword [[U:v[0-9]+]] 142; GCN: buffer_load_dword [[V:v[0-9]+]] 143 144; GCN-SLOWFMA: v_mul_f32_e32 145; GCN-SLOWFMA: v_mul_f32_e32 146; GCN-SLOWFMA: v_add_f32_e32 147; GCN-SLOWFMA: v_add_f32_e32 148define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd() #0 { 149 %x = load volatile float, float addrspace(1)* undef 150 %y = load volatile float, float addrspace(1)* undef 151 %z = load volatile float, float addrspace(1)* undef 152 %u = load volatile float, float addrspace(1)* undef 153 %v = load volatile float, float addrspace(1)* undef 154 %mul.u.v = fmul fast float %u, %v 155 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 156 store volatile float %fma, float addrspace(1)* undef 157 %add = fadd fast float %fma, %z 158 store volatile float %add, float addrspace(1)* undef 159 ret void 160} 161 162; GCN-LABEL: {{^}}fast_add_fmuladd_fmul_multi_use_fmuladd_commute: 163; GCN: buffer_load_dword [[X:v[0-9]+]] 164; GCN: buffer_load_dword [[Y:v[0-9]+]] 165; GCN: buffer_load_dword [[Z:v[0-9]+]] 166; GCN: buffer_load_dword [[U:v[0-9]+]] 167; GCN: buffer_load_dword [[V:v[0-9]+]] 168 169; GCN-SLOWFMA: v_mul_f32_e32 170; GCN-SLOWFMA: v_mul_f32_e32 171; GCN-SLOWFMA: v_add_f32_e32 172; GCN-SLOWFMA: v_add_f32_e32 173define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd_commute() #0 { 174 %x = load volatile float, float addrspace(1)* undef 175 %y = load volatile float, float addrspace(1)* undef 176 %z = load volatile float, float addrspace(1)* undef 177 %u = load volatile float, float addrspace(1)* undef 178 %v = load volatile float, float addrspace(1)* undef 179 %mul.u.v = fmul fast float %u, %v 180 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 181 store volatile float %fma, float addrspace(1)* undef 182 %add = fadd fast float %z, %fma 183 store volatile float %add, float addrspace(1)* undef 184 ret void 185} 186 187; GCN-LABEL: {{^}}fast_sub_fmuladd_fmul_multi_use_mul: 188; GCN: buffer_load_dword [[X:v[0-9]+]] 189; GCN: buffer_load_dword [[Y:v[0-9]+]] 190; GCN: buffer_load_dword [[Z:v[0-9]+]] 191; GCN: buffer_load_dword [[U:v[0-9]+]] 192; GCN: buffer_load_dword [[V:v[0-9]+]] 193 194; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]] 195 196; GCN-FLUSH: v_mad_f32 [[MAD:v[0-9]+]], [[X]], [[Y]], [[MUL]] 197; GCN-FLUSH: v_sub_f32_e32 [[SUB:v[0-9]+]], [[MAD]], [[Z]] 198 199; GCN-FASTFMA: v_fma_f32 [[MAD:v[0-9]+]], [[X]], [[Y]], [[MUL]] 200; GCN-FASTFMA: v_sub_f32_e32 [[SUB:v[0-9]+]], [[MAD]], [[Z]] 201 202; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[X]], [[Y]] 203; GCN-SLOWFMA: v_add_f32_e32 204; GCN-SLOWFMA: v_sub_f32_e32 [[MAD:v[0-9]+]] 205 206; GCN: buffer_store_dword [[MUL]] 207; GCN: buffer_store_dword [[MAD]] 208define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_mul() #0 { 209 %x = load volatile float, float addrspace(1)* undef 210 %y = load volatile float, float addrspace(1)* undef 211 %z = load volatile float, float addrspace(1)* undef 212 %u = load volatile float, float addrspace(1)* undef 213 %v = load volatile float, float addrspace(1)* undef 214 %mul.u.v = fmul fast float %u, %v 215 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 216 %add = fsub fast float %fma, %z 217 store volatile float %mul.u.v, float addrspace(1)* undef 218 store volatile float %add, float addrspace(1)* undef 219 ret void 220} 221 222; GCN-LABEL: {{^}}fast_sub_fmuladd_fmul_multi_use_fmuladd_lhs: 223; GCN: buffer_load_dword [[X:v[0-9]+]] 224; GCN: buffer_load_dword [[Y:v[0-9]+]] 225; GCN: buffer_load_dword [[Z:v[0-9]+]] 226; GCN: buffer_load_dword [[U:v[0-9]+]] 227; GCN: buffer_load_dword [[V:v[0-9]+]] 228 229; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]] 230 231; GCN-FLUSH-NEXT: v_mac_f32_e32 [[MUL]], [[X]], [[Y]] 232; GCN-FLUSH-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[MUL]], [[Z]] 233; GCN-FLUSH-NEXT: buffer_store_dword [[MUL]] 234; GCN-FLUSH-NEXT: buffer_store_dword [[SUB]] 235 236; GCN-FASTFMA-NEXT: v_fma_f32 [[FMA:v[0-9]+]], [[X]], [[Y]], [[U]] 237; GCN-FASTFMA-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[FMA]], [[Z]] 238; GCN-FASTFMA-NEXT: buffer_store_dword [[FMA]] 239; GCN-FASTFMA-NEXT: buffer_store_dword [[SUB]] 240 241; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[X]], [[Y]] 242; GCN-SLOWFMA: v_add_f32_e32 243; GCN-SLOWFMA: v_sub_f32_e32 244define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_lhs() #0 { 245 %x = load volatile float, float addrspace(1)* undef 246 %y = load volatile float, float addrspace(1)* undef 247 %z = load volatile float, float addrspace(1)* undef 248 %u = load volatile float, float addrspace(1)* undef 249 %v = load volatile float, float addrspace(1)* undef 250 %mul.u.v = fmul fast float %u, %v 251 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 252 %add = fsub fast float %fma, %z 253 store volatile float %fma, float addrspace(1)* undef 254 store volatile float %add, float addrspace(1)* undef 255 ret void 256} 257 258; GCN-LABEL: {{^}}fast_sub_fmuladd_fmul_multi_use_fmuladd_rhs: 259; GCN: buffer_load_dword [[X:v[0-9]+]] 260; GCN: buffer_load_dword [[Y:v[0-9]+]] 261; GCN: buffer_load_dword [[Z:v[0-9]+]] 262; GCN: buffer_load_dword [[U:v[0-9]+]] 263; GCN: buffer_load_dword [[V:v[0-9]+]] 264 265; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[U]], [[V]] 266 267; GCN-FLUSH-NEXT: v_mac_f32_e32 [[MUL]], [[X]], [[Y]] 268; GCN-FLUSH-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[Z]], [[MUL]] 269; GCN-FLUSH-NEXT: buffer_store_dword [[MUL]] 270; GCN-FLUSH-NEXT: buffer_store_dword [[SUB]] 271 272; GCN-FASTFMA-NEXT: v_fma_f32 [[FMA:v[0-9]+]], [[X]], [[Y]], [[U]] 273; GCN-FASTFMA-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[Z]], [[FMA]] 274; GCN-FASTFMA-NEXT: buffer_store_dword [[FMA]] 275; GCN-FASTFMA-NEXT: buffer_store_dword [[SUB]] 276 277; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[X]], [[Y]] 278; GCN-SLOWFMA: v_add_f32_e32 279; GCN-SLOWFMA: v_sub_f32_e32 280define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_rhs() #0 { 281 %x = load volatile float, float addrspace(1)* undef 282 %y = load volatile float, float addrspace(1)* undef 283 %z = load volatile float, float addrspace(1)* undef 284 %u = load volatile float, float addrspace(1)* undef 285 %v = load volatile float, float addrspace(1)* undef 286 %mul.u.v = fmul fast float %u, %v 287 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 288 %add = fsub fast float %z, %fma 289 store volatile float %fma, float addrspace(1)* undef 290 store volatile float %add, float addrspace(1)* undef 291 ret void 292} 293 294; GCN-LABEL: {{^}}fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_lhs: 295; GCN: buffer_load_dword [[X:v[0-9]+]] 296; GCN: buffer_load_dword [[Y:v[0-9]+]] 297; GCN: buffer_load_dword [[Z:v[0-9]+]] 298; GCN: buffer_load_ushort [[U:v[0-9]+]] 299; GCN: buffer_load_ushort [[V:v[0-9]+]] 300 301; GCN-DAG: v_cvt_f32_f16_e32 [[UFLOAT:v[0-9]+]], [[U]] 302; GCN-DAG: v_cvt_f32_f16_e32 [[VFLOAT:v[0-9]+]], [[V]] 303; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[UFLOAT]], [[VFLOAT]] 304 305; GCN-FLUSH-NEXT: v_mac_f32_e32 [[MUL]], [[X]], [[Y]] 306; GCN-FLUSH-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[MUL]], [[Z]] 307; GCN-FLUSH-NEXT: buffer_store_dword [[MUL]] 308; GCN-FLUSH-NEXT: buffer_store_dword [[SUB]] 309 310; GCN-FASTFMA-NEXT: v_fma_f32 [[FMA:v[0-9]+]], [[X]], [[Y]], [[UFLOAT]] 311; GCN-FASTFMA-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[FMA]], [[Z]] 312; GCN-FASTFMA-NEXT: buffer_store_dword [[FMA]] 313; GCN-FASTFMA-NEXT: buffer_store_dword [[SUB]] 314 315; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[X]], [[Y]] 316; GCN-SLOWFMA: v_add_f32_e32 317; GCN-SLOWFMA: v_sub_f32_e32 318define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_lhs() #0 { 319 %x = load volatile float, float addrspace(1)* undef 320 %y = load volatile float, float addrspace(1)* undef 321 %z = load volatile float, float addrspace(1)* undef 322 %u = load volatile half, half addrspace(1)* undef 323 %v = load volatile half, half addrspace(1)* undef 324 %mul.u.v.half = fmul fast half %u, %v 325 %mul.u.v = fpext half %mul.u.v.half to float 326 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 327 %add = fsub fast float %fma, %z 328 store volatile float %fma, float addrspace(1)* undef 329 store volatile float %add, float addrspace(1)* undef 330 ret void 331} 332 333; GCN-LABEL: {{^}}fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_rhs: 334; GCN: buffer_load_dword [[X:v[0-9]+]] 335; GCN: buffer_load_dword [[Y:v[0-9]+]] 336; GCN: buffer_load_dword [[Z:v[0-9]+]] 337; GCN: buffer_load_ushort [[U:v[0-9]+]] 338; GCN: buffer_load_ushort [[V:v[0-9]+]] 339 340; GCN-DAG: v_cvt_f32_f16_e32 [[UFLOAT:v[0-9]+]], [[U]] 341; GCN-DAG: v_cvt_f32_f16_e32 [[VFLOAT:v[0-9]+]], [[V]] 342; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[UFLOAT]], [[VFLOAT]] 343 344; GCN-FLUSH-NEXT: v_mac_f32_e32 [[MUL]], [[X]], [[Y]] 345; GCN-FLUSH-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[Z]], [[MUL]] 346; GCN-FLUSH-NEXT: buffer_store_dword [[MUL]] 347; GCN-FLUSH-NEXT: buffer_store_dword [[SUB]] 348 349; GCN-FASTFMA-NEXT: v_fma_f32 [[FMA:v[0-9]+]], [[X]], [[Y]], [[UFLOAT]] 350; GCN-FASTFMA-NEXT: v_sub_f32_e32 [[SUB:v[0-9]+]], [[Z]], [[FMA]] 351; GCN-FASTFMA-NEXT: buffer_store_dword [[FMA]] 352; GCN-FASTFMA-NEXT: buffer_store_dword [[SUB]] 353 354; GCN-SLOWFMA-DAG: v_mul_f32_e32 v{{[0-9]+}}, [[X]], [[Y]] 355; GCN-SLOWFMA: v_add_f32_e32 356; GCN-SLOWFMA: v_sub_f32_e32 357define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_rhs() #0 { 358 %x = load volatile float, float addrspace(1)* undef 359 %y = load volatile float, float addrspace(1)* undef 360 %z = load volatile float, float addrspace(1)* undef 361 %u = load volatile half, half addrspace(1)* undef 362 %v = load volatile half, half addrspace(1)* undef 363 %mul.u.v.half = fmul fast half %u, %v 364 %mul.u.v = fpext half %mul.u.v.half to float 365 %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v) 366 %add = fsub fast float %z, %fma 367 store volatile float %fma, float addrspace(1)* undef 368 store volatile float %add, float addrspace(1)* undef 369 ret void 370} 371 372declare float @llvm.fma.f32(float, float, float) #1 373declare float @llvm.fmuladd.f32(float, float, float) #1 374 375attributes #0 = { nounwind } 376attributes #1 = { nounwind readnone } 377