1; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \
2; RUN:   | FileCheck %s
3
4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
5
6declare float @llvm.sqrt.f32(float)
7declare double @llvm.sqrt.f64(double)
8
9; -- reciprocal sqrt --
10
11; CHECK-LABEL test_rsqrt32
12define float @test_rsqrt32(float %a) #0 {
13; CHECK: rsqrt.approx.f32
14  %val = tail call float @llvm.sqrt.f32(float %a)
15  %ret = fdiv float 1.0, %val
16  ret float %ret
17}
18
19; CHECK-LABEL test_rsqrt_ftz
20define float @test_rsqrt_ftz(float %a) #0 #1 {
21; CHECK: rsqrt.approx.ftz.f32
22  %val = tail call float @llvm.sqrt.f32(float %a)
23  %ret = fdiv float 1.0, %val
24  ret float %ret
25}
26
27; CHECK-LABEL test_rsqrt64
28define double @test_rsqrt64(double %a) #0 {
29; CHECK: rsqrt.approx.f64
30  %val = tail call double @llvm.sqrt.f64(double %a)
31  %ret = fdiv double 1.0, %val
32  ret double %ret
33}
34
35; CHECK-LABEL test_rsqrt64_ftz
36define double @test_rsqrt64_ftz(double %a) #0 #1 {
37; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
38; CHECK: rsqrt.approx.f64
39  %val = tail call double @llvm.sqrt.f64(double %a)
40  %ret = fdiv double 1.0, %val
41  ret double %ret
42}
43
44; -- sqrt --
45
46; CHECK-LABEL test_sqrt32
47define float @test_sqrt32(float %a) #0 {
48; CHECK: sqrt.approx.f32
49  %ret = tail call float @llvm.sqrt.f32(float %a)
50  ret float %ret
51}
52
53; CHECK-LABEL test_sqrt_ftz
54define float @test_sqrt_ftz(float %a) #0 #1 {
55; CHECK: sqrt.approx.ftz.f32
56  %ret = tail call float @llvm.sqrt.f32(float %a)
57  ret float %ret
58}
59
60; CHECK-LABEL test_sqrt64
61define double @test_sqrt64(double %a) #0 {
62; There's no sqrt.approx.f64 instruction; we emit
63; reciprocal(rsqrt.approx.f64(x)).  There's no non-ftz approximate reciprocal,
64; so we just use the ftz version.
65; CHECK: rsqrt.approx.f64
66; CHECK: rcp.approx.ftz.f64
67  %ret = tail call double @llvm.sqrt.f64(double %a)
68  ret double %ret
69}
70
71; CHECK-LABEL test_sqrt64_ftz
72define double @test_sqrt64_ftz(double %a) #0 #1 {
73; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
74; CHECK: rsqrt.approx.f64
75; CHECK: rcp.approx.ftz.f64
76  %ret = tail call double @llvm.sqrt.f64(double %a)
77  ret double %ret
78}
79
80; -- refined sqrt and rsqrt --
81;
82; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed
83; by some math.
84
85; CHECK-LABEL: test_rsqrt32_refined
86define float @test_rsqrt32_refined(float %a) #0 #2 {
87; CHECK: rsqrt.approx.f32
88  %val = tail call float @llvm.sqrt.f32(float %a)
89  %ret = fdiv float 1.0, %val
90  ret float %ret
91}
92
93; CHECK-LABEL: test_sqrt32_refined
94define float @test_sqrt32_refined(float %a) #0 #2 {
95; CHECK: rsqrt.approx.f32
96  %ret = tail call float @llvm.sqrt.f32(float %a)
97  ret float %ret
98}
99
100; CHECK-LABEL: test_rsqrt64_refined
101define double @test_rsqrt64_refined(double %a) #0 #2 {
102; CHECK: rsqrt.approx.f64
103  %val = tail call double @llvm.sqrt.f64(double %a)
104  %ret = fdiv double 1.0, %val
105  ret double %ret
106}
107
108; CHECK-LABEL: test_sqrt64_refined
109define double @test_sqrt64_refined(double %a) #0 #2 {
110; CHECK: rsqrt.approx.f64
111  %ret = tail call double @llvm.sqrt.f64(double %a)
112  ret double %ret
113}
114
115; -- refined sqrt and rsqrt with ftz enabled --
116
117; CHECK-LABEL: test_rsqrt32_refined_ftz
118define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
119; CHECK: rsqrt.approx.ftz.f32
120  %val = tail call float @llvm.sqrt.f32(float %a)
121  %ret = fdiv float 1.0, %val
122  ret float %ret
123}
124
125; CHECK-LABEL: test_sqrt32_refined_ftz
126define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
127; CHECK: rsqrt.approx.ftz.f32
128  %ret = tail call float @llvm.sqrt.f32(float %a)
129  ret float %ret
130}
131
132; CHECK-LABEL: test_rsqrt64_refined_ftz
133define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
134; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
135; CHECK: rsqrt.approx.f64
136  %val = tail call double @llvm.sqrt.f64(double %a)
137  %ret = fdiv double 1.0, %val
138  ret double %ret
139}
140
141; CHECK-LABEL: test_sqrt64_refined_ftz
142define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
143; CHECK: rsqrt.approx.f64
144  %ret = tail call double @llvm.sqrt.f64(double %a)
145  ret double %ret
146}
147
148attributes #0 = { "unsafe-fp-math" = "true" }
149attributes #1 = { "nvptx-f32ftz" = "true" }
150attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }
151