1; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \ 2; RUN: | FileCheck %s 3 4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" 5 6declare float @llvm.sqrt.f32(float) 7declare double @llvm.sqrt.f64(double) 8 9; -- reciprocal sqrt -- 10 11; CHECK-LABEL test_rsqrt32 12define float @test_rsqrt32(float %a) #0 { 13; CHECK: rsqrt.approx.f32 14 %val = tail call float @llvm.sqrt.f32(float %a) 15 %ret = fdiv float 1.0, %val 16 ret float %ret 17} 18 19; CHECK-LABEL test_rsqrt_ftz 20define float @test_rsqrt_ftz(float %a) #0 #1 { 21; CHECK: rsqrt.approx.ftz.f32 22 %val = tail call float @llvm.sqrt.f32(float %a) 23 %ret = fdiv float 1.0, %val 24 ret float %ret 25} 26 27; CHECK-LABEL test_rsqrt64 28define double @test_rsqrt64(double %a) #0 { 29; CHECK: rsqrt.approx.f64 30 %val = tail call double @llvm.sqrt.f64(double %a) 31 %ret = fdiv double 1.0, %val 32 ret double %ret 33} 34 35; CHECK-LABEL test_rsqrt64_ftz 36define double @test_rsqrt64_ftz(double %a) #0 #1 { 37; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version. 38; CHECK: rsqrt.approx.f64 39 %val = tail call double @llvm.sqrt.f64(double %a) 40 %ret = fdiv double 1.0, %val 41 ret double %ret 42} 43 44; -- sqrt -- 45 46; CHECK-LABEL test_sqrt32 47define float @test_sqrt32(float %a) #0 { 48; CHECK: sqrt.approx.f32 49 %ret = tail call float @llvm.sqrt.f32(float %a) 50 ret float %ret 51} 52 53; CHECK-LABEL test_sqrt_ftz 54define float @test_sqrt_ftz(float %a) #0 #1 { 55; CHECK: sqrt.approx.ftz.f32 56 %ret = tail call float @llvm.sqrt.f32(float %a) 57 ret float %ret 58} 59 60; CHECK-LABEL test_sqrt64 61define double @test_sqrt64(double %a) #0 { 62; There's no sqrt.approx.f64 instruction; we emit 63; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal, 64; so we just use the ftz version. 65; CHECK: rsqrt.approx.f64 66; CHECK: rcp.approx.ftz.f64 67 %ret = tail call double @llvm.sqrt.f64(double %a) 68 ret double %ret 69} 70 71; CHECK-LABEL test_sqrt64_ftz 72define double @test_sqrt64_ftz(double %a) #0 #1 { 73; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version. 74; CHECK: rsqrt.approx.f64 75; CHECK: rcp.approx.ftz.f64 76 %ret = tail call double @llvm.sqrt.f64(double %a) 77 ret double %ret 78} 79 80; -- refined sqrt and rsqrt -- 81; 82; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed 83; by some math. 84 85; CHECK-LABEL: test_rsqrt32_refined 86define float @test_rsqrt32_refined(float %a) #0 #2 { 87; CHECK: rsqrt.approx.f32 88 %val = tail call float @llvm.sqrt.f32(float %a) 89 %ret = fdiv float 1.0, %val 90 ret float %ret 91} 92 93; CHECK-LABEL: test_sqrt32_refined 94define float @test_sqrt32_refined(float %a) #0 #2 { 95; CHECK: rsqrt.approx.f32 96 %ret = tail call float @llvm.sqrt.f32(float %a) 97 ret float %ret 98} 99 100; CHECK-LABEL: test_rsqrt64_refined 101define double @test_rsqrt64_refined(double %a) #0 #2 { 102; CHECK: rsqrt.approx.f64 103 %val = tail call double @llvm.sqrt.f64(double %a) 104 %ret = fdiv double 1.0, %val 105 ret double %ret 106} 107 108; CHECK-LABEL: test_sqrt64_refined 109define double @test_sqrt64_refined(double %a) #0 #2 { 110; CHECK: rsqrt.approx.f64 111 %ret = tail call double @llvm.sqrt.f64(double %a) 112 ret double %ret 113} 114 115; -- refined sqrt and rsqrt with ftz enabled -- 116 117; CHECK-LABEL: test_rsqrt32_refined_ftz 118define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 { 119; CHECK: rsqrt.approx.ftz.f32 120 %val = tail call float @llvm.sqrt.f32(float %a) 121 %ret = fdiv float 1.0, %val 122 ret float %ret 123} 124 125; CHECK-LABEL: test_sqrt32_refined_ftz 126define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 { 127; CHECK: rsqrt.approx.ftz.f32 128 %ret = tail call float @llvm.sqrt.f32(float %a) 129 ret float %ret 130} 131 132; CHECK-LABEL: test_rsqrt64_refined_ftz 133define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 { 134; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version. 135; CHECK: rsqrt.approx.f64 136 %val = tail call double @llvm.sqrt.f64(double %a) 137 %ret = fdiv double 1.0, %val 138 ret double %ret 139} 140 141; CHECK-LABEL: test_sqrt64_refined_ftz 142define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 { 143; CHECK: rsqrt.approx.f64 144 %ret = tail call double @llvm.sqrt.f64(double %a) 145 ret double %ret 146} 147 148attributes #0 = { "unsafe-fp-math" = "true" } 149attributes #1 = { "nvptx-f32ftz" = "true" } 150attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" } 151