1; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s 3 4; -------------------------------------------------------------------------------- 5; fadd tests 6; -------------------------------------------------------------------------------- 7 8; GCN-LABEL: {{^}}v_fneg_add_f32: 9; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 10; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 11 12; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 13; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 14 15; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]] 16; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]] 17define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 18 %tid = call i32 @llvm.amdgcn.workitem.id.x() 19 %tid.ext = sext i32 %tid to i64 20 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 21 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 22 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 23 %a = load volatile float, float addrspace(1)* %a.gep 24 %b = load volatile float, float addrspace(1)* %b.gep 25 %add = fadd float %a, %b 26 %fneg = fsub float -0.000000e+00, %add 27 store float %fneg, float addrspace(1)* %out.gep 28 ret void 29} 30 31; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32: 32; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 33; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 34; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 35; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]] 36; GCN-NEXT: buffer_store_dword [[NEG_ADD]] 37; GCN-NEXT: buffer_store_dword [[ADD]] 38define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 39 %tid = call i32 @llvm.amdgcn.workitem.id.x() 40 %tid.ext = sext i32 %tid to i64 41 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 42 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 43 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 44 %a = load volatile float, float addrspace(1)* %a.gep 45 %b = load volatile float, float addrspace(1)* %b.gep 46 %add = fadd float %a, %b 47 %fneg = fsub float -0.000000e+00, %add 48 store volatile float %fneg, float addrspace(1)* %out 49 store volatile float %add, float addrspace(1)* %out 50 ret void 51} 52 53; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32: 54; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 55; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 56 57; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 58; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]] 59; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]] 60 61; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]] 62; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]] 63; GCN: buffer_store_dword [[NEG_ADD]] 64; GCN-NEXT: buffer_store_dword [[MUL]] 65define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 66 %tid = call i32 @llvm.amdgcn.workitem.id.x() 67 %tid.ext = sext i32 %tid to i64 68 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 69 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 70 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 71 %a = load volatile float, float addrspace(1)* %a.gep 72 %b = load volatile float, float addrspace(1)* %b.gep 73 %add = fadd float %a, %b 74 %fneg = fsub float -0.000000e+00, %add 75 %use1 = fmul float %add, 4.0 76 store volatile float %fneg, float addrspace(1)* %out 77 store volatile float %use1, float addrspace(1)* %out 78 ret void 79} 80 81; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32: 82; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 83; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 84 85; GCN-SAFE: v_sub_f32_e32 86; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, 87 88; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 89; GCN-NSZ-NEXT: buffer_store_dword [[ADD]] 90define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 91 %tid = call i32 @llvm.amdgcn.workitem.id.x() 92 %tid.ext = sext i32 %tid to i64 93 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 94 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 95 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 96 %a = load volatile float, float addrspace(1)* %a.gep 97 %b = load volatile float, float addrspace(1)* %b.gep 98 %fneg.a = fsub float -0.000000e+00, %a 99 %add = fadd float %fneg.a, %b 100 %fneg = fsub float -0.000000e+00, %add 101 store volatile float %fneg, float addrspace(1)* %out 102 ret void 103} 104 105; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32: 106; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 107; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 108 109; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 110; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 111 112; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 113; GCN-NSZ-NEXT: buffer_store_dword [[ADD]] 114define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 115 %tid = call i32 @llvm.amdgcn.workitem.id.x() 116 %tid.ext = sext i32 %tid to i64 117 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 118 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 119 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 120 %a = load volatile float, float addrspace(1)* %a.gep 121 %b = load volatile float, float addrspace(1)* %b.gep 122 %fneg.b = fsub float -0.000000e+00, %b 123 %add = fadd float %a, %fneg.b 124 %fneg = fsub float -0.000000e+00, %add 125 store volatile float %fneg, float addrspace(1)* %out 126 ret void 127} 128 129; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32: 130; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 131; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 132 133; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]] 134; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 135 136; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 137; GCN-NSZ-NEXT: buffer_store_dword [[ADD]] 138define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 139 %tid = call i32 @llvm.amdgcn.workitem.id.x() 140 %tid.ext = sext i32 %tid to i64 141 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 142 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 143 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 144 %a = load volatile float, float addrspace(1)* %a.gep 145 %b = load volatile float, float addrspace(1)* %b.gep 146 %fneg.a = fsub float -0.000000e+00, %a 147 %fneg.b = fsub float -0.000000e+00, %b 148 %add = fadd float %fneg.a, %fneg.b 149 %fneg = fsub float -0.000000e+00, %add 150 store volatile float %fneg, float addrspace(1)* %out 151 ret void 152} 153 154; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32: 155; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 156; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 157 158; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}} 159; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]] 160; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 161; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]] 162 163; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 164; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] 165; GCN-NSZ-NEXT: buffer_store_dword [[NEG_ADD]] 166; GCN-NSZ-NEXT: buffer_store_dword [[NEG_A]] 167define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 168 %tid = call i32 @llvm.amdgcn.workitem.id.x() 169 %tid.ext = sext i32 %tid to i64 170 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 171 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 172 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 173 %a = load volatile float, float addrspace(1)* %a.gep 174 %b = load volatile float, float addrspace(1)* %b.gep 175 %fneg.a = fsub float -0.000000e+00, %a 176 %add = fadd float %fneg.a, %b 177 %fneg = fsub float -0.000000e+00, %add 178 store volatile float %fneg, float addrspace(1)* %out 179 store volatile float %fneg.a, float addrspace(1)* %out 180 ret void 181} 182 183; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32: 184; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 185; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 186 187; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 188; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 189; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 190 191; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] 192; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 193; GCN-NSZ-NEXT: buffer_store_dword [[NEG_ADD]] 194; GCN-NSZ-NEXT: buffer_store_dword [[MUL]] 195define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 196 %tid = call i32 @llvm.amdgcn.workitem.id.x() 197 %tid.ext = sext i32 %tid to i64 198 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 199 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 200 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 201 %a = load volatile float, float addrspace(1)* %a.gep 202 %b = load volatile float, float addrspace(1)* %b.gep 203 %fneg.a = fsub float -0.000000e+00, %a 204 %add = fadd float %fneg.a, %b 205 %fneg = fsub float -0.000000e+00, %add 206 %use1 = fmul float %fneg.a, %c 207 store volatile float %fneg, float addrspace(1)* %out 208 store volatile float %use1, float addrspace(1)* %out 209 ret void 210} 211 212; -------------------------------------------------------------------------------- 213; fmul tests 214; -------------------------------------------------------------------------------- 215 216; GCN-LABEL: {{^}}v_fneg_mul_f32: 217; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 218; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 219; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]] 220; GCN-NEXT: buffer_store_dword [[RESULT]] 221define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 222 %tid = call i32 @llvm.amdgcn.workitem.id.x() 223 %tid.ext = sext i32 %tid to i64 224 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 225 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 226 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 227 %a = load volatile float, float addrspace(1)* %a.gep 228 %b = load volatile float, float addrspace(1)* %b.gep 229 %mul = fmul float %a, %b 230 %fneg = fsub float -0.000000e+00, %mul 231 store float %fneg, float addrspace(1)* %out.gep 232 ret void 233} 234 235; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32: 236; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 237; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 238; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 239; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]] 240; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 241; GCN: buffer_store_dword [[ADD]] 242define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 243 %tid = call i32 @llvm.amdgcn.workitem.id.x() 244 %tid.ext = sext i32 %tid to i64 245 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 246 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 247 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 248 %a = load volatile float, float addrspace(1)* %a.gep 249 %b = load volatile float, float addrspace(1)* %b.gep 250 %mul = fmul float %a, %b 251 %fneg = fsub float -0.000000e+00, %mul 252 store volatile float %fneg, float addrspace(1)* %out 253 store volatile float %mul, float addrspace(1)* %out 254 ret void 255} 256 257; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32: 258; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 259; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 260; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]] 261; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]] 262; GCN-NEXT: buffer_store_dword [[MUL0]] 263; GCN-NEXT: buffer_store_dword [[MUL1]] 264define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 265 %tid = call i32 @llvm.amdgcn.workitem.id.x() 266 %tid.ext = sext i32 %tid to i64 267 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 268 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 269 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 270 %a = load volatile float, float addrspace(1)* %a.gep 271 %b = load volatile float, float addrspace(1)* %b.gep 272 %mul = fmul float %a, %b 273 %fneg = fsub float -0.000000e+00, %mul 274 %use1 = fmul float %mul, 4.0 275 store volatile float %fneg, float addrspace(1)* %out 276 store volatile float %use1, float addrspace(1)* %out 277 ret void 278} 279 280; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32: 281; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 282; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 283; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 284; GCN-NEXT: buffer_store_dword [[ADD]] 285define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 286 %tid = call i32 @llvm.amdgcn.workitem.id.x() 287 %tid.ext = sext i32 %tid to i64 288 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 289 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 290 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 291 %a = load volatile float, float addrspace(1)* %a.gep 292 %b = load volatile float, float addrspace(1)* %b.gep 293 %fneg.a = fsub float -0.000000e+00, %a 294 %mul = fmul float %fneg.a, %b 295 %fneg = fsub float -0.000000e+00, %mul 296 store volatile float %fneg, float addrspace(1)* %out 297 ret void 298} 299 300; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32: 301; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 302; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 303; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 304; GCN-NEXT: buffer_store_dword [[ADD]] 305define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 306 %tid = call i32 @llvm.amdgcn.workitem.id.x() 307 %tid.ext = sext i32 %tid to i64 308 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 309 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 310 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 311 %a = load volatile float, float addrspace(1)* %a.gep 312 %b = load volatile float, float addrspace(1)* %b.gep 313 %fneg.b = fsub float -0.000000e+00, %b 314 %mul = fmul float %a, %fneg.b 315 %fneg = fsub float -0.000000e+00, %mul 316 store volatile float %fneg, float addrspace(1)* %out 317 ret void 318} 319 320; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32: 321; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 322; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 323; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 324; GCN-NEXT: buffer_store_dword [[ADD]] 325define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 326 %tid = call i32 @llvm.amdgcn.workitem.id.x() 327 %tid.ext = sext i32 %tid to i64 328 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 329 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 330 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 331 %a = load volatile float, float addrspace(1)* %a.gep 332 %b = load volatile float, float addrspace(1)* %b.gep 333 %fneg.a = fsub float -0.000000e+00, %a 334 %fneg.b = fsub float -0.000000e+00, %b 335 %mul = fmul float %fneg.a, %fneg.b 336 %fneg = fsub float -0.000000e+00, %mul 337 store volatile float %fneg, float addrspace(1)* %out 338 ret void 339} 340 341; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32: 342; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 343; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 344; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 345; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]] 346; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 347; GCN: buffer_store_dword [[NEG_A]] 348define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 349 %tid = call i32 @llvm.amdgcn.workitem.id.x() 350 %tid.ext = sext i32 %tid to i64 351 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 352 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 353 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 354 %a = load volatile float, float addrspace(1)* %a.gep 355 %b = load volatile float, float addrspace(1)* %b.gep 356 %fneg.a = fsub float -0.000000e+00, %a 357 %mul = fmul float %fneg.a, %b 358 %fneg = fsub float -0.000000e+00, %mul 359 store volatile float %fneg, float addrspace(1)* %out 360 store volatile float %fneg.a, float addrspace(1)* %out 361 ret void 362} 363 364; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32: 365; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 366; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 367; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]] 368; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 369; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 370; GCN: buffer_store_dword [[MUL]] 371define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 372 %tid = call i32 @llvm.amdgcn.workitem.id.x() 373 %tid.ext = sext i32 %tid to i64 374 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 375 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 376 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 377 %a = load volatile float, float addrspace(1)* %a.gep 378 %b = load volatile float, float addrspace(1)* %b.gep 379 %fneg.a = fsub float -0.000000e+00, %a 380 %mul = fmul float %fneg.a, %b 381 %fneg = fsub float -0.000000e+00, %mul 382 %use1 = fmul float %fneg.a, %c 383 store volatile float %fneg, float addrspace(1)* %out 384 store volatile float %use1, float addrspace(1)* %out 385 ret void 386} 387 388; -------------------------------------------------------------------------------- 389; fminnum tests 390; -------------------------------------------------------------------------------- 391 392; GCN-LABEL: {{^}}v_fneg_minnum_f32: 393; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 394; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 395; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]] 396; GCN: buffer_store_dword [[RESULT]] 397define amdgpu_kernel void @v_fneg_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 398 %tid = call i32 @llvm.amdgcn.workitem.id.x() 399 %tid.ext = sext i32 %tid to i64 400 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 401 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 402 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 403 %a = load volatile float, float addrspace(1)* %a.gep 404 %b = load volatile float, float addrspace(1)* %b.gep 405 %min = call float @llvm.minnum.f32(float %a, float %b) 406 %fneg = fsub float -0.000000e+00, %min 407 store float %fneg, float addrspace(1)* %out.gep 408 ret void 409} 410 411; GCN-LABEL: {{^}}v_fneg_self_minnum_f32: 412; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 413; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]] 414; GCN: buffer_store_dword [[RESULT]] 415define amdgpu_kernel void @v_fneg_self_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 416 %tid = call i32 @llvm.amdgcn.workitem.id.x() 417 %tid.ext = sext i32 %tid to i64 418 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 419 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 420 %a = load volatile float, float addrspace(1)* %a.gep 421 %min = call float @llvm.minnum.f32(float %a, float %a) 422 %min.fneg = fsub float -0.0, %min 423 store float %min.fneg, float addrspace(1)* %out.gep 424 ret void 425} 426 427; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32: 428; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 429; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0 430; GCN: buffer_store_dword [[RESULT]] 431define amdgpu_kernel void @v_fneg_posk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 432 %tid = call i32 @llvm.amdgcn.workitem.id.x() 433 %tid.ext = sext i32 %tid to i64 434 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 435 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 436 %a = load volatile float, float addrspace(1)* %a.gep 437 %min = call float @llvm.minnum.f32(float 4.0, float %a) 438 %fneg = fsub float -0.000000e+00, %min 439 store float %fneg, float addrspace(1)* %out.gep 440 ret void 441} 442 443; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32: 444; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 445; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0 446; GCN: buffer_store_dword [[RESULT]] 447define amdgpu_kernel void @v_fneg_negk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 448 %tid = call i32 @llvm.amdgcn.workitem.id.x() 449 %tid.ext = sext i32 %tid to i64 450 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 451 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 452 %a = load volatile float, float addrspace(1)* %a.gep 453 %min = call float @llvm.minnum.f32(float -4.0, float %a) 454 %fneg = fsub float -0.000000e+00, %min 455 store float %fneg, float addrspace(1)* %out.gep 456 ret void 457} 458 459; GCN-LABEL: {{^}}v_fneg_0_minnum_f32: 460; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 461; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]] 462; GCN: buffer_store_dword [[RESULT]] 463define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 464 %tid = call i32 @llvm.amdgcn.workitem.id.x() 465 %tid.ext = sext i32 %tid to i64 466 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 467 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 468 %a = load volatile float, float addrspace(1)* %a.gep 469 %min = call float @llvm.minnum.f32(float 0.0, float %a) 470 %fneg = fsub float -0.000000e+00, %min 471 store float %fneg, float addrspace(1)* %out.gep 472 ret void 473} 474 475; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32: 476; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 477; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0 478; GCN: buffer_store_dword [[RESULT]] 479define amdgpu_kernel void @v_fneg_neg0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 480 %tid = call i32 @llvm.amdgcn.workitem.id.x() 481 %tid.ext = sext i32 %tid to i64 482 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 483 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 484 %a = load volatile float, float addrspace(1)* %a.gep 485 %min = call float @llvm.minnum.f32(float -0.0, float %a) 486 %fneg = fsub float -0.000000e+00, %min 487 store float %fneg, float addrspace(1)* %out.gep 488 ret void 489} 490 491; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32: 492; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 493; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 494; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[A]] 495; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]] 496; GCN: buffer_store_dword [[RESULT]] 497define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 498 %tid = call i32 @llvm.amdgcn.workitem.id.x() 499 %tid.ext = sext i32 %tid to i64 500 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 501 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 502 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 503 %a = load volatile float, float addrspace(1)* %a.gep 504 %b = load volatile float, float addrspace(1)* %b.gep 505 %min = call float @llvm.minnum.f32(float 0.0, float %a) 506 %fneg = fsub float -0.000000e+00, %min 507 %mul = fmul float %fneg, %b 508 store float %mul, float addrspace(1)* %out.gep 509 ret void 510} 511 512; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32: 513; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 514; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 515; GCN: v_max_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]] 516; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] 517; GCN-NEXT: buffer_store_dword [[MAX0]] 518; GCN-NEXT: buffer_store_dword [[MUL1]] 519define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 520 %tid = call i32 @llvm.amdgcn.workitem.id.x() 521 %tid.ext = sext i32 %tid to i64 522 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 523 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 524 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 525 %a = load volatile float, float addrspace(1)* %a.gep 526 %b = load volatile float, float addrspace(1)* %b.gep 527 %min = call float @llvm.minnum.f32(float %a, float %b) 528 %fneg = fsub float -0.000000e+00, %min 529 %use1 = fmul float %min, 4.0 530 store volatile float %fneg, float addrspace(1)* %out 531 store volatile float %use1, float addrspace(1)* %out 532 ret void 533} 534 535; -------------------------------------------------------------------------------- 536; fmaxnum tests 537; -------------------------------------------------------------------------------- 538 539; GCN-LABEL: {{^}}v_fneg_maxnum_f32: 540; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 541; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 542; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]] 543; GCN: buffer_store_dword [[RESULT]] 544define amdgpu_kernel void @v_fneg_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 545 %tid = call i32 @llvm.amdgcn.workitem.id.x() 546 %tid.ext = sext i32 %tid to i64 547 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 548 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 549 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 550 %a = load volatile float, float addrspace(1)* %a.gep 551 %b = load volatile float, float addrspace(1)* %b.gep 552 %min = call float @llvm.maxnum.f32(float %a, float %b) 553 %fneg = fsub float -0.000000e+00, %min 554 store float %fneg, float addrspace(1)* %out.gep 555 ret void 556} 557 558; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32: 559; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 560; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]] 561; GCN: buffer_store_dword [[RESULT]] 562define amdgpu_kernel void @v_fneg_self_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 563 %tid = call i32 @llvm.amdgcn.workitem.id.x() 564 %tid.ext = sext i32 %tid to i64 565 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 566 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 567 %a = load volatile float, float addrspace(1)* %a.gep 568 %min = call float @llvm.maxnum.f32(float %a, float %a) 569 %min.fneg = fsub float -0.0, %min 570 store float %min.fneg, float addrspace(1)* %out.gep 571 ret void 572} 573 574; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32: 575; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 576; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0 577; GCN: buffer_store_dword [[RESULT]] 578define amdgpu_kernel void @v_fneg_posk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 579 %tid = call i32 @llvm.amdgcn.workitem.id.x() 580 %tid.ext = sext i32 %tid to i64 581 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 582 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 583 %a = load volatile float, float addrspace(1)* %a.gep 584 %min = call float @llvm.maxnum.f32(float 4.0, float %a) 585 %fneg = fsub float -0.000000e+00, %min 586 store float %fneg, float addrspace(1)* %out.gep 587 ret void 588} 589 590; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32: 591; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 592; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0 593; GCN: buffer_store_dword [[RESULT]] 594define amdgpu_kernel void @v_fneg_negk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 595 %tid = call i32 @llvm.amdgcn.workitem.id.x() 596 %tid.ext = sext i32 %tid to i64 597 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 598 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 599 %a = load volatile float, float addrspace(1)* %a.gep 600 %min = call float @llvm.maxnum.f32(float -4.0, float %a) 601 %fneg = fsub float -0.000000e+00, %min 602 store float %fneg, float addrspace(1)* %out.gep 603 ret void 604} 605 606; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32: 607; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 608; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]] 609; GCN: buffer_store_dword [[RESULT]] 610define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 611 %tid = call i32 @llvm.amdgcn.workitem.id.x() 612 %tid.ext = sext i32 %tid to i64 613 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 614 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 615 %a = load volatile float, float addrspace(1)* %a.gep 616 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 617 %fneg = fsub float -0.000000e+00, %max 618 store float %fneg, float addrspace(1)* %out.gep 619 ret void 620} 621 622; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32: 623; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 624; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0 625; GCN: buffer_store_dword [[RESULT]] 626define amdgpu_kernel void @v_fneg_neg0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 627 %tid = call i32 @llvm.amdgcn.workitem.id.x() 628 %tid.ext = sext i32 %tid to i64 629 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 630 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 631 %a = load volatile float, float addrspace(1)* %a.gep 632 %max = call float @llvm.maxnum.f32(float -0.0, float %a) 633 %fneg = fsub float -0.000000e+00, %max 634 store float %fneg, float addrspace(1)* %out.gep 635 ret void 636} 637 638; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32: 639; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 640; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 641; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]] 642; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]] 643; GCN: buffer_store_dword [[RESULT]] 644define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 645 %tid = call i32 @llvm.amdgcn.workitem.id.x() 646 %tid.ext = sext i32 %tid to i64 647 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 648 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 649 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 650 %a = load volatile float, float addrspace(1)* %a.gep 651 %b = load volatile float, float addrspace(1)* %b.gep 652 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 653 %fneg = fsub float -0.000000e+00, %max 654 %mul = fmul float %fneg, %b 655 store float %mul, float addrspace(1)* %out.gep 656 ret void 657} 658 659; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32: 660; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 661; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 662; GCN: v_min_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]] 663; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] 664; GCN-NEXT: buffer_store_dword [[MAX0]] 665; GCN-NEXT: buffer_store_dword [[MUL1]] 666define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 667 %tid = call i32 @llvm.amdgcn.workitem.id.x() 668 %tid.ext = sext i32 %tid to i64 669 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 670 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 671 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 672 %a = load volatile float, float addrspace(1)* %a.gep 673 %b = load volatile float, float addrspace(1)* %b.gep 674 %min = call float @llvm.maxnum.f32(float %a, float %b) 675 %fneg = fsub float -0.000000e+00, %min 676 %use1 = fmul float %min, 4.0 677 store volatile float %fneg, float addrspace(1)* %out 678 store volatile float %use1, float addrspace(1)* %out 679 ret void 680} 681 682; -------------------------------------------------------------------------------- 683; fma tests 684; -------------------------------------------------------------------------------- 685 686; GCN-LABEL: {{^}}v_fneg_fma_f32: 687; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 688; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 689; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 690 691; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] 692; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]] 693 694; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 695; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]] 696define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 697 %tid = call i32 @llvm.amdgcn.workitem.id.x() 698 %tid.ext = sext i32 %tid to i64 699 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 700 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 701 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 702 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 703 %a = load volatile float, float addrspace(1)* %a.gep 704 %b = load volatile float, float addrspace(1)* %b.gep 705 %c = load volatile float, float addrspace(1)* %c.gep 706 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 707 %fneg = fsub float -0.000000e+00, %fma 708 store float %fneg, float addrspace(1)* %out.gep 709 ret void 710} 711 712; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32: 713; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 714; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 715; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 716; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 717; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]] 718; GCN-NEXT: buffer_store_dword [[NEG_FMA]] 719; GCN-NEXT: buffer_store_dword [[FMA]] 720define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 721 %tid = call i32 @llvm.amdgcn.workitem.id.x() 722 %tid.ext = sext i32 %tid to i64 723 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 724 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 725 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 726 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 727 %a = load volatile float, float addrspace(1)* %a.gep 728 %b = load volatile float, float addrspace(1)* %b.gep 729 %c = load volatile float, float addrspace(1)* %c.gep 730 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 731 %fneg = fsub float -0.000000e+00, %fma 732 store volatile float %fneg, float addrspace(1)* %out 733 store volatile float %fma, float addrspace(1)* %out 734 ret void 735} 736 737; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32: 738; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 739; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 740; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 741 742; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 743; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]] 744; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]] 745 746; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] 747; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]] 748 749; GCN-NEXT: buffer_store_dword [[NEG_FMA]] 750; GCN-NEXT: buffer_store_dword [[MUL]] 751define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 752 %tid = call i32 @llvm.amdgcn.workitem.id.x() 753 %tid.ext = sext i32 %tid to i64 754 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 755 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 756 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 757 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 758 %a = load volatile float, float addrspace(1)* %a.gep 759 %b = load volatile float, float addrspace(1)* %b.gep 760 %c = load volatile float, float addrspace(1)* %c.gep 761 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 762 %fneg = fsub float -0.000000e+00, %fma 763 %use1 = fmul float %fma, 4.0 764 store volatile float %fneg, float addrspace(1)* %out 765 store volatile float %use1, float addrspace(1)* %out 766 ret void 767} 768 769; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32: 770; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 771; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 772; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 773 774; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]] 775; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 776 777; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 778; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 779define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 780 %tid = call i32 @llvm.amdgcn.workitem.id.x() 781 %tid.ext = sext i32 %tid to i64 782 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 783 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 784 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 785 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 786 %a = load volatile float, float addrspace(1)* %a.gep 787 %b = load volatile float, float addrspace(1)* %b.gep 788 %c = load volatile float, float addrspace(1)* %c.gep 789 %fneg.a = fsub float -0.000000e+00, %a 790 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 791 %fneg = fsub float -0.000000e+00, %fma 792 store volatile float %fneg, float addrspace(1)* %out 793 ret void 794} 795 796; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32: 797; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 798; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 799; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 800 801; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]] 802; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 803 804; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 805; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 806define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 807 %tid = call i32 @llvm.amdgcn.workitem.id.x() 808 %tid.ext = sext i32 %tid to i64 809 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 810 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 811 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 812 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 813 %a = load volatile float, float addrspace(1)* %a.gep 814 %b = load volatile float, float addrspace(1)* %b.gep 815 %c = load volatile float, float addrspace(1)* %c.gep 816 %fneg.b = fsub float -0.000000e+00, %b 817 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c) 818 %fneg = fsub float -0.000000e+00, %fma 819 store volatile float %fneg, float addrspace(1)* %out 820 ret void 821} 822 823; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32: 824; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 825; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 826; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 827 828; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]] 829; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] 830 831; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] 832; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 833define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 834 %tid = call i32 @llvm.amdgcn.workitem.id.x() 835 %tid.ext = sext i32 %tid to i64 836 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 837 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 838 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 839 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 840 %a = load volatile float, float addrspace(1)* %a.gep 841 %b = load volatile float, float addrspace(1)* %b.gep 842 %c = load volatile float, float addrspace(1)* %c.gep 843 %fneg.a = fsub float -0.000000e+00, %a 844 %fneg.b = fsub float -0.000000e+00, %b 845 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c) 846 %fneg = fsub float -0.000000e+00, %fma 847 store volatile float %fneg, float addrspace(1)* %out 848 ret void 849} 850 851; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32: 852; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 853; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 854; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 855 856; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]] 857; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] 858 859; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 860; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 861define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 862 %tid = call i32 @llvm.amdgcn.workitem.id.x() 863 %tid.ext = sext i32 %tid to i64 864 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 865 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 866 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 867 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 868 %a = load volatile float, float addrspace(1)* %a.gep 869 %b = load volatile float, float addrspace(1)* %b.gep 870 %c = load volatile float, float addrspace(1)* %c.gep 871 %fneg.a = fsub float -0.000000e+00, %a 872 %fneg.c = fsub float -0.000000e+00, %c 873 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c) 874 %fneg = fsub float -0.000000e+00, %fma 875 store volatile float %fneg, float addrspace(1)* %out 876 ret void 877} 878 879; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32: 880; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 881; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 882; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 883 884; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 885; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 886 887; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]] 888; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 889define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 890 %tid = call i32 @llvm.amdgcn.workitem.id.x() 891 %tid.ext = sext i32 %tid to i64 892 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 893 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 894 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 895 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 896 %a = load volatile float, float addrspace(1)* %a.gep 897 %b = load volatile float, float addrspace(1)* %b.gep 898 %c = load volatile float, float addrspace(1)* %c.gep 899 %fneg.c = fsub float -0.000000e+00, %c 900 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c) 901 %fneg = fsub float -0.000000e+00, %fma 902 store volatile float %fneg, float addrspace(1)* %out 903 ret void 904} 905 906; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32: 907; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 908; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 909; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 910 911; GCN-SAFE: v_xor_b32 912; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], 913; GCN-SAFE: v_xor_b32 914 915; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 916; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 917; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 918; GCN-NSZ-NEXT: buffer_store_dword [[NEG_A]] 919define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 920 %tid = call i32 @llvm.amdgcn.workitem.id.x() 921 %tid.ext = sext i32 %tid to i64 922 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 923 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 924 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 925 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 926 %a = load volatile float, float addrspace(1)* %a.gep 927 %b = load volatile float, float addrspace(1)* %b.gep 928 %c = load volatile float, float addrspace(1)* %c.gep 929 %fneg.a = fsub float -0.000000e+00, %a 930 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 931 %fneg = fsub float -0.000000e+00, %fma 932 store volatile float %fneg, float addrspace(1)* %out 933 store volatile float %fneg.a, float addrspace(1)* %out 934 ret void 935} 936 937; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32: 938; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 939; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 940; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 941 942; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 943; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]] 944; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 945 946; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 947; GCN-NSZ-NEXT: buffer_store_dword [[NEG_FMA]] 948; GCN-NSZ-NEXT: buffer_store_dword [[MUL]] 949define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 { 950 %tid = call i32 @llvm.amdgcn.workitem.id.x() 951 %tid.ext = sext i32 %tid to i64 952 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 953 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 954 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 955 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 956 %a = load volatile float, float addrspace(1)* %a.gep 957 %b = load volatile float, float addrspace(1)* %b.gep 958 %c = load volatile float, float addrspace(1)* %c.gep 959 %fneg.a = fsub float -0.000000e+00, %a 960 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 961 %fneg = fsub float -0.000000e+00, %fma 962 %use1 = fmul float %fneg.a, %d 963 store volatile float %fneg, float addrspace(1)* %out 964 store volatile float %use1, float addrspace(1)* %out 965 ret void 966} 967 968; -------------------------------------------------------------------------------- 969; fmad tests 970; -------------------------------------------------------------------------------- 971 972; GCN-LABEL: {{^}}v_fneg_fmad_f32: 973; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 974; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 975; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 976 977; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]] 978; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]] 979 980; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 981; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]] 982define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 983 %tid = call i32 @llvm.amdgcn.workitem.id.x() 984 %tid.ext = sext i32 %tid to i64 985 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 986 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 987 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 988 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 989 %a = load volatile float, float addrspace(1)* %a.gep 990 %b = load volatile float, float addrspace(1)* %b.gep 991 %c = load volatile float, float addrspace(1)* %c.gep 992 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 993 %fneg = fsub float -0.000000e+00, %fma 994 store float %fneg, float addrspace(1)* %out.gep 995 ret void 996} 997 998; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32: 999; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1000; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1001; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1002 1003; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]] 1004; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]] 1005; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]] 1006 1007; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], -[[A]], [[B]], -[[C]] 1008; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]] 1009 1010; GCN: buffer_store_dword [[NEG_MAD]] 1011; GCN-NEXT: buffer_store_dword [[MUL]] 1012define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1013 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1014 %tid.ext = sext i32 %tid to i64 1015 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1016 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1017 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1018 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1019 %a = load volatile float, float addrspace(1)* %a.gep 1020 %b = load volatile float, float addrspace(1)* %b.gep 1021 %c = load volatile float, float addrspace(1)* %c.gep 1022 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 1023 %fneg = fsub float -0.000000e+00, %fma 1024 %use1 = fmul float %fma, 4.0 1025 store volatile float %fneg, float addrspace(1)* %out 1026 store volatile float %use1, float addrspace(1)* %out 1027 ret void 1028} 1029 1030; -------------------------------------------------------------------------------- 1031; fp_extend tests 1032; -------------------------------------------------------------------------------- 1033 1034; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64: 1035; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1036; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]] 1037; GCN: buffer_store_dwordx2 [[RESULT]] 1038define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1039 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1040 %tid.ext = sext i32 %tid to i64 1041 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1042 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1043 %a = load volatile float, float addrspace(1)* %a.gep 1044 %fpext = fpext float %a to double 1045 %fneg = fsub double -0.000000e+00, %fpext 1046 store double %fneg, double addrspace(1)* %out.gep 1047 ret void 1048} 1049 1050; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64: 1051; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1052; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]] 1053; GCN: buffer_store_dwordx2 [[RESULT]] 1054define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1055 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1056 %tid.ext = sext i32 %tid to i64 1057 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1058 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1059 %a = load volatile float, float addrspace(1)* %a.gep 1060 %fneg.a = fsub float -0.000000e+00, %a 1061 %fpext = fpext float %fneg.a to double 1062 %fneg = fsub double -0.000000e+00, %fpext 1063 store double %fneg, double addrspace(1)* %out.gep 1064 ret void 1065} 1066 1067; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64: 1068; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1069; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]] 1070; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]] 1071; GCN: buffer_store_dwordx2 [[RESULT]] 1072; GCN: buffer_store_dword [[FNEG_A]] 1073define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1074 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1075 %tid.ext = sext i32 %tid to i64 1076 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1077 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1078 %a = load volatile float, float addrspace(1)* %a.gep 1079 %fneg.a = fsub float -0.000000e+00, %a 1080 %fpext = fpext float %fneg.a to double 1081 %fneg = fsub double -0.000000e+00, %fpext 1082 store volatile double %fneg, double addrspace(1)* %out.gep 1083 store volatile float %fneg.a, float addrspace(1)* undef 1084 ret void 1085} 1086 1087; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64: 1088; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1089; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]] 1090; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]] 1091; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}} 1092; GCN: buffer_store_dwordx2 v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}} 1093define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1094 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1095 %tid.ext = sext i32 %tid to i64 1096 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1097 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1098 %a = load volatile float, float addrspace(1)* %a.gep 1099 %fpext = fpext float %a to double 1100 %fneg = fsub double -0.000000e+00, %fpext 1101 store volatile double %fneg, double addrspace(1)* %out.gep 1102 store volatile double %fpext, double addrspace(1)* undef 1103 ret void 1104} 1105 1106; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64: 1107; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1108; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]] 1109; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]] 1110; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0 1111; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}} 1112; GCN: buffer_store_dwordx2 [[MUL]] 1113define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1114 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1115 %tid.ext = sext i32 %tid to i64 1116 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1117 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1118 %a = load volatile float, float addrspace(1)* %a.gep 1119 %fpext = fpext float %a to double 1120 %fneg = fsub double -0.000000e+00, %fpext 1121 %mul = fmul double %fpext, 4.0 1122 store volatile double %fneg, double addrspace(1)* %out.gep 1123 store volatile double %mul, double addrspace(1)* %out.gep 1124 ret void 1125} 1126 1127; FIXME: Source modifiers not folded for f16->f32 1128; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32: 1129define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 1130 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1131 %tid.ext = sext i32 %tid to i64 1132 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 1133 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1134 %a = load volatile half, half addrspace(1)* %a.gep 1135 %fpext = fpext half %a to float 1136 %fneg = fsub float -0.000000e+00, %fpext 1137 store volatile float %fneg, float addrspace(1)* %out.gep 1138 store volatile float %fpext, float addrspace(1)* %out.gep 1139 ret void 1140} 1141 1142; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32: 1143define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 1144 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1145 %tid.ext = sext i32 %tid to i64 1146 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 1147 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1148 %a = load volatile half, half addrspace(1)* %a.gep 1149 %fpext = fpext half %a to float 1150 %fneg = fsub float -0.000000e+00, %fpext 1151 %mul = fmul float %fpext, 4.0 1152 store volatile float %fneg, float addrspace(1)* %out.gep 1153 store volatile float %mul, float addrspace(1)* %out.gep 1154 ret void 1155} 1156 1157; -------------------------------------------------------------------------------- 1158; fp_round tests 1159; -------------------------------------------------------------------------------- 1160 1161; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32: 1162; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1163; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]] 1164; GCN: buffer_store_dword [[RESULT]] 1165define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1166 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1167 %tid.ext = sext i32 %tid to i64 1168 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1169 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1170 %a = load volatile double, double addrspace(1)* %a.gep 1171 %fpround = fptrunc double %a to float 1172 %fneg = fsub float -0.000000e+00, %fpround 1173 store float %fneg, float addrspace(1)* %out.gep 1174 ret void 1175} 1176 1177; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32: 1178; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1179; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]] 1180; GCN: buffer_store_dword [[RESULT]] 1181define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1182 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1183 %tid.ext = sext i32 %tid to i64 1184 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1185 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1186 %a = load volatile double, double addrspace(1)* %a.gep 1187 %fneg.a = fsub double -0.000000e+00, %a 1188 %fpround = fptrunc double %fneg.a to float 1189 %fneg = fsub float -0.000000e+00, %fpround 1190 store float %fneg, float addrspace(1)* %out.gep 1191 ret void 1192} 1193 1194; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32: 1195; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}} 1196; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}} 1197; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]] 1198; GCN: buffer_store_dword [[RESULT]] 1199; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}} 1200define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1201 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1202 %tid.ext = sext i32 %tid to i64 1203 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1204 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1205 %a = load volatile double, double addrspace(1)* %a.gep 1206 %fneg.a = fsub double -0.000000e+00, %a 1207 %fpround = fptrunc double %fneg.a to float 1208 %fneg = fsub float -0.000000e+00, %fpround 1209 store volatile float %fneg, float addrspace(1)* %out.gep 1210 store volatile double %fneg.a, double addrspace(1)* undef 1211 ret void 1212} 1213 1214; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32: 1215; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1216; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]] 1217; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}} 1218; GCN: buffer_store_dword [[RESULT]] 1219; GCN: buffer_store_dwordx2 [[USE1]] 1220define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 { 1221 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1222 %tid.ext = sext i32 %tid to i64 1223 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1224 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1225 %a = load volatile double, double addrspace(1)* %a.gep 1226 %fneg.a = fsub double -0.000000e+00, %a 1227 %fpround = fptrunc double %fneg.a to float 1228 %fneg = fsub float -0.000000e+00, %fpround 1229 %use1 = fmul double %fneg.a, %c 1230 store volatile float %fneg, float addrspace(1)* %out.gep 1231 store volatile double %use1, double addrspace(1)* undef 1232 ret void 1233} 1234 1235; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16: 1236; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1237; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1238; GCN: buffer_store_short [[RESULT]] 1239define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1240 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1241 %tid.ext = sext i32 %tid to i64 1242 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1243 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1244 %a = load volatile float, float addrspace(1)* %a.gep 1245 %fpround = fptrunc float %a to half 1246 %fneg = fsub half -0.000000e+00, %fpround 1247 store half %fneg, half addrspace(1)* %out.gep 1248 ret void 1249} 1250 1251; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16: 1252; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1253; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1254; GCN: buffer_store_short [[RESULT]] 1255define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1256 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1257 %tid.ext = sext i32 %tid to i64 1258 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1259 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1260 %a = load volatile float, float addrspace(1)* %a.gep 1261 %fneg.a = fsub float -0.000000e+00, %a 1262 %fpround = fptrunc float %fneg.a to half 1263 %fneg = fsub half -0.000000e+00, %fpround 1264 store half %fneg, half addrspace(1)* %out.gep 1265 ret void 1266} 1267 1268; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32: 1269; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1270; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]] 1271; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]] 1272; GCN: buffer_store_dword [[NEG]] 1273; GCN: buffer_store_dword [[CVT]] 1274define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1275 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1276 %tid.ext = sext i32 %tid to i64 1277 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1278 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1279 %a = load volatile double, double addrspace(1)* %a.gep 1280 %fpround = fptrunc double %a to float 1281 %fneg = fsub float -0.000000e+00, %fpround 1282 store volatile float %fneg, float addrspace(1)* %out.gep 1283 store volatile float %fpround, float addrspace(1)* %out.gep 1284 ret void 1285} 1286 1287; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16: 1288; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1289; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1290; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1291; GCN: buffer_store_short [[RESULT]] 1292; GCN: buffer_store_dword [[NEG_A]] 1293define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1294 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1295 %tid.ext = sext i32 %tid to i64 1296 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1297 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1298 %a = load volatile float, float addrspace(1)* %a.gep 1299 %fneg.a = fsub float -0.000000e+00, %a 1300 %fpround = fptrunc float %fneg.a to half 1301 %fneg = fsub half -0.000000e+00, %fpround 1302 store volatile half %fneg, half addrspace(1)* %out.gep 1303 store volatile float %fneg.a, float addrspace(1)* undef 1304 ret void 1305} 1306 1307; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16: 1308; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1309; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1310; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s 1311; GCN: buffer_store_short [[RESULT]] 1312; GCN: buffer_store_dword [[USE1]] 1313define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 { 1314 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1315 %tid.ext = sext i32 %tid to i64 1316 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1317 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1318 %a = load volatile float, float addrspace(1)* %a.gep 1319 %fneg.a = fsub float -0.000000e+00, %a 1320 %fpround = fptrunc float %fneg.a to half 1321 %fneg = fsub half -0.000000e+00, %fpround 1322 %use1 = fmul float %fneg.a, %c 1323 store volatile half %fneg, half addrspace(1)* %out.gep 1324 store volatile float %use1, float addrspace(1)* undef 1325 ret void 1326} 1327 1328; -------------------------------------------------------------------------------- 1329; rcp tests 1330; -------------------------------------------------------------------------------- 1331 1332; GCN-LABEL: {{^}}v_fneg_rcp_f32: 1333; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1334; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1335; GCN: buffer_store_dword [[RESULT]] 1336define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1337 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1338 %tid.ext = sext i32 %tid to i64 1339 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1340 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1341 %a = load volatile float, float addrspace(1)* %a.gep 1342 %rcp = call float @llvm.amdgcn.rcp.f32(float %a) 1343 %fneg = fsub float -0.000000e+00, %rcp 1344 store float %fneg, float addrspace(1)* %out.gep 1345 ret void 1346} 1347 1348; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32: 1349; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1350; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1351; GCN: buffer_store_dword [[RESULT]] 1352define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1353 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1354 %tid.ext = sext i32 %tid to i64 1355 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1356 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1357 %a = load volatile float, float addrspace(1)* %a.gep 1358 %fneg.a = fsub float -0.000000e+00, %a 1359 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1360 %fneg = fsub float -0.000000e+00, %rcp 1361 store float %fneg, float addrspace(1)* %out.gep 1362 ret void 1363} 1364 1365; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32: 1366; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1367; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1368; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1369; GCN: buffer_store_dword [[RESULT]] 1370; GCN: buffer_store_dword [[NEG_A]] 1371define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1372 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1373 %tid.ext = sext i32 %tid to i64 1374 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1375 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1376 %a = load volatile float, float addrspace(1)* %a.gep 1377 %fneg.a = fsub float -0.000000e+00, %a 1378 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1379 %fneg = fsub float -0.000000e+00, %rcp 1380 store volatile float %fneg, float addrspace(1)* %out.gep 1381 store volatile float %fneg.a, float addrspace(1)* undef 1382 ret void 1383} 1384 1385; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32: 1386; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1387; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1388; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1389; GCN: buffer_store_dword [[RESULT]] 1390; GCN: buffer_store_dword [[MUL]] 1391define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 { 1392 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1393 %tid.ext = sext i32 %tid to i64 1394 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1395 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1396 %a = load volatile float, float addrspace(1)* %a.gep 1397 %fneg.a = fsub float -0.000000e+00, %a 1398 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1399 %fneg = fsub float -0.000000e+00, %rcp 1400 %use1 = fmul float %fneg.a, %c 1401 store volatile float %fneg, float addrspace(1)* %out.gep 1402 store volatile float %use1, float addrspace(1)* undef 1403 ret void 1404} 1405 1406; -------------------------------------------------------------------------------- 1407; rcp_legacy tests 1408; -------------------------------------------------------------------------------- 1409 1410; GCN-LABEL: {{^}}v_fneg_rcp_legacy_f32: 1411; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1412; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1413; GCN: buffer_store_dword [[RESULT]] 1414define amdgpu_kernel void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1415 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1416 %tid.ext = sext i32 %tid to i64 1417 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1418 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1419 %a = load volatile float, float addrspace(1)* %a.gep 1420 %rcp = call float @llvm.amdgcn.rcp.legacy(float %a) 1421 %fneg = fsub float -0.000000e+00, %rcp 1422 store float %fneg, float addrspace(1)* %out.gep 1423 ret void 1424} 1425 1426; -------------------------------------------------------------------------------- 1427; fmul_legacy tests 1428; -------------------------------------------------------------------------------- 1429 1430; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32: 1431; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1432; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1433; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]] 1434; GCN-NEXT: buffer_store_dword [[RESULT]] 1435define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1436 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1437 %tid.ext = sext i32 %tid to i64 1438 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1439 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1440 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1441 %a = load volatile float, float addrspace(1)* %a.gep 1442 %b = load volatile float, float addrspace(1)* %b.gep 1443 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1444 %fneg = fsub float -0.000000e+00, %mul 1445 store float %fneg, float addrspace(1)* %out.gep 1446 ret void 1447} 1448 1449; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32: 1450; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1451; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1452; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1453; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]] 1454; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1455; GCN: buffer_store_dword [[ADD]] 1456define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1457 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1458 %tid.ext = sext i32 %tid to i64 1459 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1460 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1461 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1462 %a = load volatile float, float addrspace(1)* %a.gep 1463 %b = load volatile float, float addrspace(1)* %b.gep 1464 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1465 %fneg = fsub float -0.000000e+00, %mul 1466 store volatile float %fneg, float addrspace(1)* %out 1467 store volatile float %mul, float addrspace(1)* %out 1468 ret void 1469} 1470 1471; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32: 1472; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1473; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1474; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 1475; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0 1476; GCN-NEXT: buffer_store_dword [[ADD]] 1477; GCN-NEXT: buffer_store_dword [[MUL]] 1478define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1479 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1480 %tid.ext = sext i32 %tid to i64 1481 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1482 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1483 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1484 %a = load volatile float, float addrspace(1)* %a.gep 1485 %b = load volatile float, float addrspace(1)* %b.gep 1486 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1487 %fneg = fsub float -0.000000e+00, %mul 1488 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0) 1489 store volatile float %fneg, float addrspace(1)* %out 1490 store volatile float %use1, float addrspace(1)* %out 1491 ret void 1492} 1493 1494; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32: 1495; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1496; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1497; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1498; GCN-NEXT: buffer_store_dword [[ADD]] 1499define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1500 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1501 %tid.ext = sext i32 %tid to i64 1502 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1503 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1504 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1505 %a = load volatile float, float addrspace(1)* %a.gep 1506 %b = load volatile float, float addrspace(1)* %b.gep 1507 %fneg.a = fsub float -0.000000e+00, %a 1508 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1509 %fneg = fsub float -0.000000e+00, %mul 1510 store volatile float %fneg, float addrspace(1)* %out 1511 ret void 1512} 1513 1514; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32: 1515; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1516; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1517; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1518; GCN-NEXT: buffer_store_dword [[ADD]] 1519define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1520 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1521 %tid.ext = sext i32 %tid to i64 1522 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1523 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1524 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1525 %a = load volatile float, float addrspace(1)* %a.gep 1526 %b = load volatile float, float addrspace(1)* %b.gep 1527 %fneg.b = fsub float -0.000000e+00, %b 1528 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b) 1529 %fneg = fsub float -0.000000e+00, %mul 1530 store volatile float %fneg, float addrspace(1)* %out 1531 ret void 1532} 1533 1534; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32: 1535; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1536; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1537; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 1538; GCN-NEXT: buffer_store_dword [[ADD]] 1539define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1540 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1541 %tid.ext = sext i32 %tid to i64 1542 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1543 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1544 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1545 %a = load volatile float, float addrspace(1)* %a.gep 1546 %b = load volatile float, float addrspace(1)* %b.gep 1547 %fneg.a = fsub float -0.000000e+00, %a 1548 %fneg.b = fsub float -0.000000e+00, %b 1549 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b) 1550 %fneg = fsub float -0.000000e+00, %mul 1551 store volatile float %fneg, float addrspace(1)* %out 1552 ret void 1553} 1554 1555; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32: 1556; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1557; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1558; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1559; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]] 1560; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1561; GCN: buffer_store_dword [[NEG_A]] 1562define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1563 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1564 %tid.ext = sext i32 %tid to i64 1565 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1566 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1567 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1568 %a = load volatile float, float addrspace(1)* %a.gep 1569 %b = load volatile float, float addrspace(1)* %b.gep 1570 %fneg.a = fsub float -0.000000e+00, %a 1571 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1572 %fneg = fsub float -0.000000e+00, %mul 1573 store volatile float %fneg, float addrspace(1)* %out 1574 store volatile float %fneg.a, float addrspace(1)* %out 1575 ret void 1576} 1577 1578; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32: 1579; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1580; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1581; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]] 1582; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1583; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1584; GCN: buffer_store_dword [[MUL]] 1585define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 1586 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1587 %tid.ext = sext i32 %tid to i64 1588 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1589 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1590 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1591 %a = load volatile float, float addrspace(1)* %a.gep 1592 %b = load volatile float, float addrspace(1)* %b.gep 1593 %fneg.a = fsub float -0.000000e+00, %a 1594 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1595 %fneg = fsub float -0.000000e+00, %mul 1596 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c) 1597 store volatile float %fneg, float addrspace(1)* %out 1598 store volatile float %use1, float addrspace(1)* %out 1599 ret void 1600} 1601 1602; -------------------------------------------------------------------------------- 1603; sin tests 1604; -------------------------------------------------------------------------------- 1605 1606; GCN-LABEL: {{^}}v_fneg_sin_f32: 1607; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1608; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]] 1609; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]] 1610; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]] 1611; GCN: buffer_store_dword [[RESULT]] 1612define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1613 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1614 %tid.ext = sext i32 %tid to i64 1615 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1616 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1617 %a = load volatile float, float addrspace(1)* %a.gep 1618 %sin = call float @llvm.sin.f32(float %a) 1619 %fneg = fsub float -0.000000e+00, %sin 1620 store float %fneg, float addrspace(1)* %out.gep 1621 ret void 1622} 1623 1624; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32: 1625; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1626; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1627; GCN: buffer_store_dword [[RESULT]] 1628define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1629 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1630 %tid.ext = sext i32 %tid to i64 1631 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1632 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1633 %a = load volatile float, float addrspace(1)* %a.gep 1634 %sin = call float @llvm.amdgcn.sin.f32(float %a) 1635 %fneg = fsub float -0.0, %sin 1636 store float %fneg, float addrspace(1)* %out.gep 1637 ret void 1638} 1639 1640; -------------------------------------------------------------------------------- 1641; ftrunc tests 1642; -------------------------------------------------------------------------------- 1643 1644; GCN-LABEL: {{^}}v_fneg_trunc_f32: 1645; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1646; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1647; GCN: buffer_store_dword [[RESULT]] 1648define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1649 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1650 %tid.ext = sext i32 %tid to i64 1651 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1652 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1653 %a = load volatile float, float addrspace(1)* %a.gep 1654 %trunc = call float @llvm.trunc.f32(float %a) 1655 %fneg = fsub float -0.0, %trunc 1656 store float %fneg, float addrspace(1)* %out.gep 1657 ret void 1658} 1659 1660; -------------------------------------------------------------------------------- 1661; fround tests 1662; -------------------------------------------------------------------------------- 1663 1664; GCN-LABEL: {{^}}v_fneg_round_f32: 1665; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1666; GCN: v_trunc_f32_e32 1667; GCN: v_sub_f32_e32 1668; GCN: v_cndmask_b32 1669 1670; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} 1671; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]] 1672 1673; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}} 1674; GCN: buffer_store_dword [[RESULT]] 1675define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1676 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1677 %tid.ext = sext i32 %tid to i64 1678 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1679 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1680 %a = load volatile float, float addrspace(1)* %a.gep 1681 %round = call float @llvm.round.f32(float %a) 1682 %fneg = fsub float -0.0, %round 1683 store float %fneg, float addrspace(1)* %out.gep 1684 ret void 1685} 1686 1687; -------------------------------------------------------------------------------- 1688; rint tests 1689; -------------------------------------------------------------------------------- 1690 1691; GCN-LABEL: {{^}}v_fneg_rint_f32: 1692; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1693; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1694; GCN: buffer_store_dword [[RESULT]] 1695define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1696 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1697 %tid.ext = sext i32 %tid to i64 1698 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1699 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1700 %a = load volatile float, float addrspace(1)* %a.gep 1701 %rint = call float @llvm.rint.f32(float %a) 1702 %fneg = fsub float -0.0, %rint 1703 store float %fneg, float addrspace(1)* %out.gep 1704 ret void 1705} 1706 1707; -------------------------------------------------------------------------------- 1708; nearbyint tests 1709; -------------------------------------------------------------------------------- 1710 1711; GCN-LABEL: {{^}}v_fneg_nearbyint_f32: 1712; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1713; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1714; GCN: buffer_store_dword [[RESULT]] 1715define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1716 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1717 %tid.ext = sext i32 %tid to i64 1718 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1719 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1720 %a = load volatile float, float addrspace(1)* %a.gep 1721 %nearbyint = call float @llvm.nearbyint.f32(float %a) 1722 %fneg = fsub float -0.0, %nearbyint 1723 store float %fneg, float addrspace(1)* %out.gep 1724 ret void 1725} 1726 1727; -------------------------------------------------------------------------------- 1728; fcanonicalize tests 1729; -------------------------------------------------------------------------------- 1730 1731; GCN-LABEL: {{^}}v_fneg_canonicalize_f32: 1732; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1733; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]] 1734; GCN: buffer_store_dword [[RESULT]] 1735define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1736 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1737 %tid.ext = sext i32 %tid to i64 1738 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1739 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1740 %a = load volatile float, float addrspace(1)* %a.gep 1741 %trunc = call float @llvm.canonicalize.f32(float %a) 1742 %fneg = fsub float -0.0, %trunc 1743 store float %fneg, float addrspace(1)* %out.gep 1744 ret void 1745} 1746 1747; -------------------------------------------------------------------------------- 1748; vintrp tests 1749; -------------------------------------------------------------------------------- 1750 1751; GCN-LABEL: {{^}}v_fneg_interp_p1_f32: 1752; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1753; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1754; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 1755; GCN: v_interp_p1_f32 v{{[0-9]+}}, [[MUL]] 1756; GCN: v_interp_p1_f32 v{{[0-9]+}}, [[MUL]] 1757define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1758 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1759 %tid.ext = sext i32 %tid to i64 1760 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1761 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1762 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1763 %a = load volatile float, float addrspace(1)* %a.gep 1764 %b = load volatile float, float addrspace(1)* %b.gep 1765 %mul = fmul float %a, %b 1766 %fneg = fsub float -0.0, %mul 1767 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0) 1768 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0) 1769 store volatile float %intrp0, float addrspace(1)* %out.gep 1770 store volatile float %intrp1, float addrspace(1)* %out.gep 1771 ret void 1772} 1773 1774; GCN-LABEL: {{^}}v_fneg_interp_p2_f32: 1775; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1776; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1777; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 1778; GCN: v_interp_p2_f32 v{{[0-9]+}}, [[MUL]] 1779; GCN: v_interp_p2_f32 v{{[0-9]+}}, [[MUL]] 1780define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1781 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1782 %tid.ext = sext i32 %tid to i64 1783 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1784 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1785 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1786 %a = load volatile float, float addrspace(1)* %a.gep 1787 %b = load volatile float, float addrspace(1)* %b.gep 1788 %mul = fmul float %a, %b 1789 %fneg = fsub float -0.0, %mul 1790 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0) 1791 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0) 1792 store volatile float %intrp0, float addrspace(1)* %out.gep 1793 store volatile float %intrp1, float addrspace(1)* %out.gep 1794 ret void 1795} 1796 1797; -------------------------------------------------------------------------------- 1798; CopyToReg tests 1799; -------------------------------------------------------------------------------- 1800 1801; GCN-LABEL: {{^}}v_fneg_copytoreg_f32: 1802; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1803; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1804; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1805; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]] 1806; GCN: s_cbranch_scc1 1807 1808; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]] 1809; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]] 1810; GCN: buffer_store_dword [[MUL1]] 1811 1812; GCN: buffer_store_dword [[MUL0]] 1813define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 1814 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1815 %tid.ext = sext i32 %tid to i64 1816 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1817 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1818 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1819 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1820 %a = load volatile float, float addrspace(1)* %a.gep 1821 %b = load volatile float, float addrspace(1)* %b.gep 1822 %c = load volatile float, float addrspace(1)* %c.gep 1823 %mul = fmul float %a, %b 1824 %fneg = fsub float -0.0, %mul 1825 %cmp0 = icmp eq i32 %d, 0 1826 br i1 %cmp0, label %if, label %endif 1827 1828if: 1829 %mul1 = fmul float %fneg, %c 1830 store volatile float %mul1, float addrspace(1)* %out.gep 1831 br label %endif 1832 1833endif: 1834 store volatile float %mul, float addrspace(1)* %out.gep 1835 ret void 1836} 1837 1838; -------------------------------------------------------------------------------- 1839; inlineasm tests 1840; -------------------------------------------------------------------------------- 1841 1842; Can't fold into use, so should fold into source 1843; GCN-LABEL: {{^}}v_fneg_inlineasm_f32: 1844; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1845; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1846; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 1847; GCN: ; use [[MUL]] 1848; GCN: buffer_store_dword [[MUL]] 1849define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 1850 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1851 %tid.ext = sext i32 %tid to i64 1852 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1853 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1854 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1855 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1856 %a = load volatile float, float addrspace(1)* %a.gep 1857 %b = load volatile float, float addrspace(1)* %b.gep 1858 %c = load volatile float, float addrspace(1)* %c.gep 1859 %mul = fmul float %a, %b 1860 %fneg = fsub float -0.0, %mul 1861 call void asm sideeffect "; use $0", "v"(float %fneg) #0 1862 store volatile float %fneg, float addrspace(1)* %out.gep 1863 ret void 1864} 1865 1866; -------------------------------------------------------------------------------- 1867; inlineasm tests 1868; -------------------------------------------------------------------------------- 1869 1870; Can't fold into use, so should fold into source 1871; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32: 1872; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1873; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1874; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]] 1875; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]] 1876; GCN: ; use [[NEG]] 1877; GCN: buffer_store_dword [[MUL]] 1878define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 1879 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1880 %tid.ext = sext i32 %tid to i64 1881 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1882 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1883 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1884 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1885 %a = load volatile float, float addrspace(1)* %a.gep 1886 %b = load volatile float, float addrspace(1)* %b.gep 1887 %c = load volatile float, float addrspace(1)* %c.gep 1888 %mul = fmul float %a, %b 1889 %fneg = fsub float -0.0, %mul 1890 call void asm sideeffect "; use $0", "v"(float %fneg) #0 1891 store volatile float %mul, float addrspace(1)* %out.gep 1892 ret void 1893} 1894 1895; -------------------------------------------------------------------------------- 1896; code size regression tests 1897; -------------------------------------------------------------------------------- 1898 1899; There are multiple users of the fneg that must use a VOP3 1900; instruction, so there is no penalty 1901; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32: 1902; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1903; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1904; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1905 1906; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]] 1907; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0 1908; GCN-NEXT: buffer_store_dword [[FMA0]] 1909; GCN-NEXT: buffer_store_dword [[FMA1]] 1910define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1911 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1912 %tid.ext = sext i32 %tid to i64 1913 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1914 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1915 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1916 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1917 %a = load volatile float, float addrspace(1)* %a.gep 1918 %b = load volatile float, float addrspace(1)* %b.gep 1919 %c = load volatile float, float addrspace(1)* %c.gep 1920 1921 %fneg.a = fsub float -0.0, %a 1922 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 1923 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0) 1924 1925 store volatile float %fma0, float addrspace(1)* %out 1926 store volatile float %fma1, float addrspace(1)* %out 1927 ret void 1928} 1929 1930; There are multiple users, but both require using a larger encoding 1931; for the modifier. 1932 1933; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32: 1934; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1935; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1936; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1937 1938; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]] 1939; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]] 1940; GCN-NEXT: buffer_store_dword [[MUL0]] 1941; GCN-NEXT: buffer_store_dword [[MUL1]] 1942define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1943 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1944 %tid.ext = sext i32 %tid to i64 1945 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1946 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1947 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1948 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1949 %a = load volatile float, float addrspace(1)* %a.gep 1950 %b = load volatile float, float addrspace(1)* %b.gep 1951 %c = load volatile float, float addrspace(1)* %c.gep 1952 1953 %fneg.a = fsub float -0.0, %a 1954 %mul0 = fmul float %fneg.a, %b 1955 %mul1 = fmul float %fneg.a, %c 1956 1957 store volatile float %mul0, float addrspace(1)* %out 1958 store volatile float %mul1, float addrspace(1)* %out 1959 ret void 1960} 1961 1962; One user is VOP3 so has no cost to folding the modifier, the other does. 1963; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32: 1964; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1965; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1966; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1967 1968; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0 1969; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]] 1970 1971; GCN: buffer_store_dword [[FMA0]] 1972; GCN-NEXT: buffer_store_dword [[MUL1]] 1973define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1974 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1975 %tid.ext = sext i32 %tid to i64 1976 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1977 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1978 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1979 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1980 %a = load volatile float, float addrspace(1)* %a.gep 1981 %b = load volatile float, float addrspace(1)* %b.gep 1982 %c = load volatile float, float addrspace(1)* %c.gep 1983 1984 %fneg.a = fsub float -0.0, %a 1985 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0) 1986 %mul1 = fmul float %fneg.a, %c 1987 1988 store volatile float %fma0, float addrspace(1)* %out 1989 store volatile float %mul1, float addrspace(1)* %out 1990 ret void 1991} 1992 1993; The use of the fneg requires a code size increase, but folding into 1994; the source does not 1995 1996; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32: 1997; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1998; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1999; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2000; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]] 2001 2002; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0 2003; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]] 2004; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]] 2005 2006; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0 2007; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]] 2008; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]] 2009 2010; GCN: buffer_store_dword [[MUL1]] 2011; GCN-NEXT: buffer_store_dword [[MUL2]] 2012define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2013 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2014 %tid.ext = sext i32 %tid to i64 2015 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2016 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2017 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2018 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2019 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2020 %a = load volatile float, float addrspace(1)* %a.gep 2021 %b = load volatile float, float addrspace(1)* %b.gep 2022 %c = load volatile float, float addrspace(1)* %c.gep 2023 %d = load volatile float, float addrspace(1)* %d.gep 2024 2025 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0) 2026 %fneg.fma0 = fsub float -0.0, %fma0 2027 %mul1 = fmul float %fneg.fma0, %c 2028 %mul2 = fmul float %fneg.fma0, %d 2029 2030 store volatile float %mul1, float addrspace(1)* %out 2031 store volatile float %mul2, float addrspace(1)* %out 2032 ret void 2033} 2034 2035; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64: 2036; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 2037; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] 2038; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]] 2039; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]] 2040 2041; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0 2042; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]] 2043; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]] 2044 2045; GCN: buffer_store_dwordx2 [[MUL0]] 2046; GCN: buffer_store_dwordx2 [[MUL1]] 2047define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 { 2048 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2049 %tid.ext = sext i32 %tid to i64 2050 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 2051 %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext 2052 %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext 2053 %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext 2054 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 2055 %a = load volatile double, double addrspace(1)* %a.gep 2056 %b = load volatile double, double addrspace(1)* %b.gep 2057 %c = load volatile double, double addrspace(1)* %c.gep 2058 %d = load volatile double, double addrspace(1)* %d.gep 2059 2060 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0) 2061 %fneg.fma0 = fsub double -0.0, %fma0 2062 %mul1 = fmul double %fneg.fma0, %c 2063 %mul2 = fmul double %fneg.fma0, %d 2064 2065 store volatile double %mul1, double addrspace(1)* %out 2066 store volatile double %mul2, double addrspace(1)* %out 2067 ret void 2068} 2069 2070; %trunc.a has one fneg use, but it requires a code size increase and 2071; %the fneg can instead be folded for free into the fma. 2072 2073; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32: 2074; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2075; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2076; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2077; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]] 2078; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]] 2079; GCN: buffer_store_dword [[FMA0]] 2080define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2081 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2082 %tid.ext = sext i32 %tid to i64 2083 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2084 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2085 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2086 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2087 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2088 %a = load volatile float, float addrspace(1)* %a.gep 2089 %b = load volatile float, float addrspace(1)* %b.gep 2090 %c = load volatile float, float addrspace(1)* %c.gep 2091 %d = load volatile float, float addrspace(1)* %d.gep 2092 2093 %trunc.a = call float @llvm.trunc.f32(float %a) 2094 %trunc.fneg.a = fsub float -0.0, %trunc.a 2095 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) 2096 store volatile float %fma0, float addrspace(1)* %out 2097 ret void 2098} 2099 2100; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src: 2101; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2102; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2103; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2104; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]] 2105; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]] 2106; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]] 2107; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]] 2108; GCN: buffer_store_dword [[FMA0]] 2109; GCN: buffer_store_dword [[MUL1]] 2110define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2111 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2112 %tid.ext = sext i32 %tid to i64 2113 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2114 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2115 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2116 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2117 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2118 %a = load volatile float, float addrspace(1)* %a.gep 2119 %b = load volatile float, float addrspace(1)* %b.gep 2120 %c = load volatile float, float addrspace(1)* %c.gep 2121 %d = load volatile float, float addrspace(1)* %d.gep 2122 2123 %trunc.a = call float @llvm.trunc.f32(float %a) 2124 %trunc.fneg.a = fsub float -0.0, %trunc.a 2125 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) 2126 %mul1 = fmul float %trunc.a, %d 2127 store volatile float %fma0, float addrspace(1)* %out 2128 store volatile float %mul1, float addrspace(1)* %out 2129 ret void 2130} 2131 2132declare i32 @llvm.amdgcn.workitem.id.x() #1 2133declare float @llvm.fma.f32(float, float, float) #1 2134declare float @llvm.fmuladd.f32(float, float, float) #1 2135declare float @llvm.sin.f32(float) #1 2136declare float @llvm.trunc.f32(float) #1 2137declare float @llvm.round.f32(float) #1 2138declare float @llvm.rint.f32(float) #1 2139declare float @llvm.nearbyint.f32(float) #1 2140declare float @llvm.canonicalize.f32(float) #1 2141declare float @llvm.minnum.f32(float, float) #1 2142declare float @llvm.maxnum.f32(float, float) #1 2143 2144declare double @llvm.fma.f64(double, double, double) #1 2145 2146declare float @llvm.amdgcn.sin.f32(float) #1 2147declare float @llvm.amdgcn.rcp.f32(float) #1 2148declare float @llvm.amdgcn.rcp.legacy(float) #1 2149declare float @llvm.amdgcn.fmul.legacy(float, float) #1 2150declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0 2151declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 2152 2153attributes #0 = { nounwind } 2154attributes #1 = { nounwind readnone } 2155