1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
4
5define float @v_rsq_clamp_f32(float %src) #0 {
6; SI-LABEL: v_rsq_clamp_f32:
7; SI:       ; %bb.0:
8; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; SI-NEXT:    v_rsq_clamp_f32_e32 v0, v0
10; SI-NEXT:    s_setpc_b64 s[30:31]
11;
12; VI-LABEL: v_rsq_clamp_f32:
13; VI:       ; %bb.0:
14; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; VI-NEXT:    v_rsq_f32_e32 v0, v0
16; VI-NEXT:    v_min_f32_e32 v0, 0x7f7fffff, v0
17; VI-NEXT:    v_max_f32_e32 v0, 0xff7fffff, v0
18; VI-NEXT:    s_setpc_b64 s[30:31]
19  %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
20  ret float %rsq_clamp
21}
22
23define float @v_rsq_clamp_fabs_f32(float %src) #0 {
24; SI-LABEL: v_rsq_clamp_fabs_f32:
25; SI:       ; %bb.0:
26; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; SI-NEXT:    v_rsq_clamp_f32_e64 v0, |v0|
28; SI-NEXT:    s_setpc_b64 s[30:31]
29;
30; VI-LABEL: v_rsq_clamp_fabs_f32:
31; VI:       ; %bb.0:
32; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; VI-NEXT:    v_rsq_f32_e64 v0, |v0|
34; VI-NEXT:    v_min_f32_e32 v0, 0x7f7fffff, v0
35; VI-NEXT:    v_max_f32_e32 v0, 0xff7fffff, v0
36; VI-NEXT:    s_setpc_b64 s[30:31]
37  %fabs.src = call float @llvm.fabs.f32(float %src)
38  %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src)
39  ret float %rsq_clamp
40}
41
42define double @v_rsq_clamp_f64(double %src) #0 {
43; SI-LABEL: v_rsq_clamp_f64:
44; SI:       ; %bb.0:
45; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; SI-NEXT:    v_rsq_clamp_f64_e32 v[0:1], v[0:1]
47; SI-NEXT:    s_setpc_b64 s[30:31]
48;
49; VI-LABEL: v_rsq_clamp_f64:
50; VI:       ; %bb.0:
51; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; VI-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
53; VI-NEXT:    s_mov_b32 s4, -1
54; VI-NEXT:    s_mov_b32 s5, 0x7fefffff
55; VI-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
56; VI-NEXT:    s_mov_b32 s5, 0xffefffff
57; VI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
58; VI-NEXT:    s_setpc_b64 s[30:31]
59  %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
60  ret double %rsq_clamp
61}
62
63define double @v_rsq_clamp_fabs_f64(double %src) #0 {
64; SI-LABEL: v_rsq_clamp_fabs_f64:
65; SI:       ; %bb.0:
66; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; SI-NEXT:    v_rsq_clamp_f64_e64 v[0:1], |v[0:1]|
68; SI-NEXT:    s_setpc_b64 s[30:31]
69;
70; VI-LABEL: v_rsq_clamp_fabs_f64:
71; VI:       ; %bb.0:
72; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; VI-NEXT:    v_rsq_f64_e64 v[0:1], |v[0:1]|
74; VI-NEXT:    s_mov_b32 s4, -1
75; VI-NEXT:    s_mov_b32 s5, 0x7fefffff
76; VI-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
77; VI-NEXT:    s_mov_b32 s5, 0xffefffff
78; VI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
79; VI-NEXT:    s_setpc_b64 s[30:31]
80  %fabs.src = call double @llvm.fabs.f64(double %src)
81  %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
82  ret double %rsq_clamp
83}
84
85define float @v_rsq_clamp_undef_f32() #0 {
86; SI-LABEL: v_rsq_clamp_undef_f32:
87; SI:       ; %bb.0:
88; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; SI-NEXT:    v_rsq_clamp_f32_e32 v0, s4
90; SI-NEXT:    s_setpc_b64 s[30:31]
91;
92; VI-LABEL: v_rsq_clamp_undef_f32:
93; VI:       ; %bb.0:
94; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; VI-NEXT:    v_rsq_f32_e32 v0, s4
96; VI-NEXT:    v_min_f32_e32 v0, 0x7f7fffff, v0
97; VI-NEXT:    v_max_f32_e32 v0, 0xff7fffff, v0
98; VI-NEXT:    s_setpc_b64 s[30:31]
99  %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
100  ret float %rsq_clamp
101}
102
103define double @v_rsq_clamp_undef_f64() #0 {
104; SI-LABEL: v_rsq_clamp_undef_f64:
105; SI:       ; %bb.0:
106; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; SI-NEXT:    v_rsq_clamp_f64_e32 v[0:1], s[4:5]
108; SI-NEXT:    s_setpc_b64 s[30:31]
109;
110; VI-LABEL: v_rsq_clamp_undef_f64:
111; VI:       ; %bb.0:
112; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; VI-NEXT:    v_rsq_f64_e32 v[0:1], s[4:5]
114; VI-NEXT:    s_mov_b32 s4, -1
115; VI-NEXT:    s_mov_b32 s5, 0x7fefffff
116; VI-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
117; VI-NEXT:    s_mov_b32 s5, 0xffefffff
118; VI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
119; VI-NEXT:    s_setpc_b64 s[30:31]
120  %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
121  ret double %rsq_clamp
122}
123
124define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
125; SI-LABEL: v_rsq_clamp_f32_non_ieee:
126; SI:       ; %bb.0:
127; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; SI-NEXT:    v_rsq_clamp_f32_e32 v0, v0
129; SI-NEXT:    s_setpc_b64 s[30:31]
130;
131; VI-LABEL: v_rsq_clamp_f32_non_ieee:
132; VI:       ; %bb.0:
133; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; VI-NEXT:    v_rsq_f32_e32 v0, v0
135; VI-NEXT:    v_min_f32_e32 v0, 0x7f7fffff, v0
136; VI-NEXT:    v_max_f32_e32 v0, 0xff7fffff, v0
137; VI-NEXT:    s_setpc_b64 s[30:31]
138  %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
139  ret float %rsq_clamp
140}
141
142define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
143; SI-LABEL: v_rsq_clamp_f64_non_ieee:
144; SI:       ; %bb.0:
145; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; SI-NEXT:    v_rsq_clamp_f64_e32 v[0:1], v[0:1]
147; SI-NEXT:    s_setpc_b64 s[30:31]
148;
149; VI-LABEL: v_rsq_clamp_f64_non_ieee:
150; VI:       ; %bb.0:
151; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; VI-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
153; VI-NEXT:    s_mov_b32 s4, -1
154; VI-NEXT:    s_mov_b32 s5, 0x7fefffff
155; VI-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
156; VI-NEXT:    s_mov_b32 s5, 0xffefffff
157; VI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
158; VI-NEXT:    s_setpc_b64 s[30:31]
159  %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
160  ret double %rsq_clamp
161}
162
163declare float @llvm.fabs.f32(float) #1
164declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
165declare double @llvm.fabs.f64(double) #1
166declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
167
168attributes #0 = { nounwind }
169attributes #1 = { nounwind readnone }
170attributes #2 = { nounwind "amdgpu-ieee"="false" }
171