1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 5 6; FUNC-LABEL: {{^}}rcp_pat_f32: 7; GCN: s_load_dword [[SRC:s[0-9]+]] 8; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 9; GCN: buffer_store_dword [[RCP]] 10 11; EG: RECIP_IEEE 12define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 { 13 %rcp = fdiv float 1.0, %src 14 store float %rcp, float addrspace(1)* %out, align 4 15 ret void 16} 17 18; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32: 19; GCN: s_load_dword [[SRC:s[0-9]+]] 20; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 21; GCN: buffer_store_dword [[RCP]] 22 23; EG: RECIP_IEEE 24define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { 25 %rcp = fdiv float 1.0, %src, !fpmath !0 26 store float %rcp, float addrspace(1)* %out, align 4 27 ret void 28} 29 30; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32: 31; GCN: s_load_dword [[SRC:s[0-9]+]] 32; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 33; GCN: buffer_store_dword [[RCP]] 34 35; EG: RECIP_IEEE 36define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { 37 %rcp = fdiv fast float 1.0, %src, !fpmath !0 38 store float %rcp, float addrspace(1)* %out, align 4 39 ret void 40} 41 42; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32: 43; GCN: s_load_dword [[SRC:s[0-9]+]] 44; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 45; GCN: buffer_store_dword [[RCP]] 46 47; EG: RECIP_IEEE 48define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { 49 %rcp = fdiv arcp float 1.0, %src, !fpmath !0 50 store float %rcp, float addrspace(1)* %out, align 4 51 ret void 52} 53 54; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32: 55; GCN: s_load_dword [[SRC:s[0-9]+]] 56; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 57; GCN: buffer_store_dword [[RCP]] 58 59; EG: RECIP_IEEE 60define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 { 61 %rcp = fdiv float 1.0, %src, !fpmath !0 62 store float %rcp, float addrspace(1)* %out, align 4 63 ret void 64} 65 66; FUNC-LABEL: {{^}}rcp_fabs_pat_f32: 67; GCN: s_load_dword [[SRC:s[0-9]+]] 68; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], |[[SRC]]| 69; GCN: buffer_store_dword [[RCP]] 70 71; EG: RECIP_IEEE 72define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 { 73 %src.fabs = call float @llvm.fabs.f32(float %src) 74 %rcp = fdiv float 1.0, %src.fabs 75 store float %rcp, float addrspace(1)* %out, align 4 76 ret void 77} 78 79; FUNC-LABEL: {{^}}neg_rcp_pat_f32: 80; GCN: s_load_dword [[SRC:s[0-9]+]] 81; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[SRC]] 82; GCN: buffer_store_dword [[RCP]] 83 84; EG: RECIP_IEEE 85define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 { 86 %rcp = fdiv float -1.0, %src 87 store float %rcp, float addrspace(1)* %out, align 4 88 ret void 89} 90 91; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32: 92; GCN: s_load_dword [[SRC:s[0-9]+]] 93; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]| 94; GCN: buffer_store_dword [[RCP]] 95define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 { 96 %src.fabs = call float @llvm.fabs.f32(float %src) 97 %src.fabs.fneg = fsub float -0.0, %src.fabs 98 %rcp = fdiv float 1.0, %src.fabs.fneg 99 store float %rcp, float addrspace(1)* %out, align 4 100 ret void 101} 102 103; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_multi_use_f32: 104; GCN: s_load_dword [[SRC:s[0-9]+]] 105; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]| 106; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[SRC]], -|[[SRC]]| 107; GCN: buffer_store_dword [[RCP]] 108; GCN: buffer_store_dword [[MUL]] 109define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 { 110 %src.fabs = call float @llvm.fabs.f32(float %src) 111 %src.fabs.fneg = fsub float -0.0, %src.fabs 112 %rcp = fdiv float 1.0, %src.fabs.fneg 113 store volatile float %rcp, float addrspace(1)* %out, align 4 114 115 %other = fmul float %src, %src.fabs.fneg 116 store volatile float %other, float addrspace(1)* %out, align 4 117 ret void 118} 119 120; FUNC-LABEL: {{^}}div_arcp_2_x_pat_f32: 121; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}} 122; GCN: buffer_store_dword [[MUL]] 123define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 { 124 %x = load float, float addrspace(1)* undef 125 %rcp = fdiv arcp float %x, 2.0 126 store float %rcp, float addrspace(1)* %out, align 4 127 ret void 128} 129 130; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f32: 131; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0x3dcccccd, v{{[0-9]+}} 132; GCN: buffer_store_dword [[MUL]] 133define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 { 134 %x = load float, float addrspace(1)* undef 135 %rcp = fdiv arcp float %x, 10.0 136 store float %rcp, float addrspace(1)* %out, align 4 137 ret void 138} 139 140; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f32: 141; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbdcccccd, v{{[0-9]+}} 142; GCN: buffer_store_dword [[MUL]] 143define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 { 144 %x = load float, float addrspace(1)* undef 145 %rcp = fdiv arcp float %x, -10.0 146 store float %rcp, float addrspace(1)* %out, align 4 147 ret void 148} 149 150declare float @llvm.fabs.f32(float) #1 151declare float @llvm.sqrt.f32(float) #1 152 153attributes #0 = { nounwind "unsafe-fp-math"="false" } 154attributes #1 = { nounwind readnone } 155attributes #2 = { nounwind "unsafe-fp-math"="true" } 156 157!0 = !{float 2.500000e+00} 158