1; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s 2 3; CHECK: 'fabs_f32' 4; CHECK: estimated cost of 0 for {{.*}} call float @llvm.fabs.f32 5define amdgpu_kernel void @fabs_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 { 6 %vec = load float, float addrspace(1)* %vaddr 7 %fabs = call float @llvm.fabs.f32(float %vec) #1 8 store float %fabs, float addrspace(1)* %out 9 ret void 10} 11 12; CHECK: 'fabs_v2f32' 13; CHECK: estimated cost of 0 for {{.*}} call <2 x float> @llvm.fabs.v2f32 14define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 { 15 %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr 16 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %vec) #1 17 store <2 x float> %fabs, <2 x float> addrspace(1)* %out 18 ret void 19} 20 21; CHECK: 'fabs_v3f32' 22; CHECK: estimated cost of 0 for {{.*}} call <3 x float> @llvm.fabs.v3f32 23define amdgpu_kernel void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr) #0 { 24 %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr 25 %fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %vec) #1 26 store <3 x float> %fabs, <3 x float> addrspace(1)* %out 27 ret void 28} 29 30; CHECK: 'fabs_f64' 31; CHECK: estimated cost of 0 for {{.*}} call double @llvm.fabs.f64 32define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0 { 33 %vec = load double, double addrspace(1)* %vaddr 34 %fabs = call double @llvm.fabs.f64(double %vec) #1 35 store double %fabs, double addrspace(1)* %out 36 ret void 37} 38 39; CHECK: 'fabs_v2f64' 40; CHECK: estimated cost of 0 for {{.*}} call <2 x double> @llvm.fabs.v2f64 41define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr) #0 { 42 %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr 43 %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %vec) #1 44 store <2 x double> %fabs, <2 x double> addrspace(1)* %out 45 ret void 46} 47 48; CHECK: 'fabs_v3f64' 49; CHECK: estimated cost of 0 for {{.*}} call <3 x double> @llvm.fabs.v3f64 50define amdgpu_kernel void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr) #0 { 51 %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr 52 %fabs = call <3 x double> @llvm.fabs.v3f64(<3 x double> %vec) #1 53 store <3 x double> %fabs, <3 x double> addrspace(1)* %out 54 ret void 55} 56 57; CHECK: 'fabs_f16' 58; CHECK: estimated cost of 0 for {{.*}} call half @llvm.fabs.f16 59define amdgpu_kernel void @fabs_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 { 60 %vec = load half, half addrspace(1)* %vaddr 61 %fabs = call half @llvm.fabs.f16(half %vec) #1 62 store half %fabs, half addrspace(1)* %out 63 ret void 64} 65 66; CHECK: 'fabs_v2f16' 67; CHECK: estimated cost of 0 for {{.*}} call <2 x half> @llvm.fabs.v2f16 68define amdgpu_kernel void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 { 69 %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr 70 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %vec) #1 71 store <2 x half> %fabs, <2 x half> addrspace(1)* %out 72 ret void 73} 74 75; CHECK: 'fabs_v3f16' 76; CHECK: estimated cost of 0 for {{.*}} call <3 x half> @llvm.fabs.v3f16 77define amdgpu_kernel void @fabs_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr) #0 { 78 %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr 79 %fabs = call <3 x half> @llvm.fabs.v3f16(<3 x half> %vec) #1 80 store <3 x half> %fabs, <3 x half> addrspace(1)* %out 81 ret void 82} 83 84declare float @llvm.fabs.f32(float) #1 85declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #1 86declare <3 x float> @llvm.fabs.v3f32(<3 x float>) #1 87 88declare double @llvm.fabs.f64(double) #1 89declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #1 90declare <3 x double> @llvm.fabs.v3f64(<3 x double>) #1 91 92declare half @llvm.fabs.f16(half) #1 93declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1 94declare <3 x half> @llvm.fabs.v3f16(<3 x half>) #1 95 96attributes #0 = { nounwind } 97attributes #1 = { nounwind readnone } 98