1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4declare float @llvm.amdgcn.rsq.f32(float) #0 5declare double @llvm.amdgcn.rsq.f64(double) #0 6 7; FUNC-LABEL: {{^}}rsq_f32: 8; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} 9define amdgpu_kernel void @rsq_f32(float addrspace(1)* %out, float %src) #1 { 10 %rsq = call float @llvm.amdgcn.rsq.f32(float %src) #0 11 store float %rsq, float addrspace(1)* %out, align 4 12 ret void 13} 14 15; TODO: Really these should be constant folded 16; FUNC-LABEL: {{^}}rsq_f32_constant_4.0 17; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0 18define amdgpu_kernel void @rsq_f32_constant_4.0(float addrspace(1)* %out) #1 { 19 %rsq = call float @llvm.amdgcn.rsq.f32(float 4.0) #0 20 store float %rsq, float addrspace(1)* %out, align 4 21 ret void 22} 23 24; FUNC-LABEL: {{^}}rsq_f32_constant_100.0 25; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000 26define amdgpu_kernel void @rsq_f32_constant_100.0(float addrspace(1)* %out) #1 { 27 %rsq = call float @llvm.amdgcn.rsq.f32(float 100.0) #0 28 store float %rsq, float addrspace(1)* %out, align 4 29 ret void 30} 31 32; FUNC-LABEL: {{^}}rsq_f64: 33; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} 34define amdgpu_kernel void @rsq_f64(double addrspace(1)* %out, double %src) #1 { 35 %rsq = call double @llvm.amdgcn.rsq.f64(double %src) #0 36 store double %rsq, double addrspace(1)* %out, align 4 37 ret void 38} 39 40; TODO: Really these should be constant folded 41; FUNC-LABEL: {{^}}rsq_f64_constant_4.0 42; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, 4.0 43define amdgpu_kernel void @rsq_f64_constant_4.0(double addrspace(1)* %out) #1 { 44 %rsq = call double @llvm.amdgcn.rsq.f64(double 4.0) #0 45 store double %rsq, double addrspace(1)* %out, align 4 46 ret void 47} 48 49; FUNC-LABEL: {{^}}rsq_f64_constant_100.0 50; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0x40590000 51; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0{{$}} 52; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} 53define amdgpu_kernel void @rsq_f64_constant_100.0(double addrspace(1)* %out) #1 { 54 %rsq = call double @llvm.amdgcn.rsq.f64(double 100.0) #0 55 store double %rsq, double addrspace(1)* %out, align 4 56 ret void 57} 58 59; FUNC-LABEL: {{^}}rsq_undef_f32: 60; SI-NOT: v_rsq_f32 61define amdgpu_kernel void @rsq_undef_f32(float addrspace(1)* %out) #1 { 62 %rsq = call float @llvm.amdgcn.rsq.f32(float undef) 63 store float %rsq, float addrspace(1)* %out, align 4 64 ret void 65} 66 67attributes #0 = { nounwind readnone } 68attributes #1 = { nounwind } 69