1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s
2; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s
3; Make sure this doesn't crash with no triple
4
5; NOOP-LABEL: @noop_fdiv_fpmath(
6; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
7define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
8  %md.25ulp = fdiv float %a, %b, !fpmath !0
9  store volatile float %md.25ulp, float addrspace(1)* %out
10  ret void
11}
12
13; CHECK-LABEL: @fdiv_fpmath(
14; CHECK: %no.md = fdiv float %a, %b{{$}}
15; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
16; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
17; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
18; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
19; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
20; CHECK: arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
21define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
22  %no.md = fdiv float %a, %b
23  store volatile float %no.md, float addrspace(1)* %out
24
25  %md.half.ulp = fdiv float %a, %b, !fpmath !1
26  store volatile float %md.half.ulp, float addrspace(1)* %out
27
28  %md.1ulp = fdiv float %a, %b, !fpmath !2
29  store volatile float %md.1ulp, float addrspace(1)* %out
30
31  %md.25ulp = fdiv float %a, %b, !fpmath !0
32  store volatile float %md.25ulp, float addrspace(1)* %out
33
34  %md.3ulp = fdiv float %a, %b, !fpmath !3
35  store volatile float %md.3ulp, float addrspace(1)* %out
36
37  %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
38  store volatile float %fast.md.25ulp, float addrspace(1)* %out
39
40  %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
41  store volatile float %arcp.md.25ulp, float addrspace(1)* %out
42
43  ret void
44}
45
46; CHECK-LABEL: @rcp_fdiv_fpmath(
47; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
48; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
49; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
50; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}}
51; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
52; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}}
53; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
54define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
55  %no.md = fdiv float 1.0, %x
56  store volatile float %no.md, float addrspace(1)* %out
57
58  %md.25ulp = fdiv float 1.0, %x, !fpmath !0
59  store volatile float %md.25ulp, float addrspace(1)* %out
60
61  %md.half.ulp = fdiv float 1.0, %x, !fpmath !1
62  store volatile float %md.half.ulp, float addrspace(1)* %out
63
64  %arcp.no.md = fdiv arcp float 1.0, %x
65  store volatile float %arcp.no.md, float addrspace(1)* %out
66
67  %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0
68  store volatile float %arcp.25ulp, float addrspace(1)* %out
69
70  %fast.no.md = fdiv fast float 1.0, %x
71  store volatile float %fast.no.md, float addrspace(1)* %out
72
73  %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
74  store volatile float %fast.25ulp, float addrspace(1)* %out
75
76  ret void
77}
78
79; CHECK-LABEL: @fdiv_fpmath_vector(
80; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}}
81; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
82; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
83
84; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
85; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
86; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0
87; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
88; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
89; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
90; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0
91; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
92define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
93  %no.md = fdiv <2 x float> %a, %b
94  store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
95
96  %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
97  store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
98
99  %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
100  store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
101
102  %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
103  store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out
104
105  ret void
106}
107
108; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
109; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
110; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
111; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
112; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
113; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
114; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
115; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
116define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
117  %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
118  store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
119
120  %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
121  store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
122
123  %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x
124  store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
125
126  %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
127  store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
128
129  %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
130  store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
131
132  %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
133  store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
134
135  ret void
136}
137
138; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
139; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
140; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
141; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}}
142; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
143; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
144; CHECK: store volatile <2 x float> %fast.25ulp
145define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
146  %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
147  store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
148
149  %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x
150  store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
151
152  %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
153  store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
154
155  %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
156  store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
157
158  %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
159  store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
160
161  ret void
162}
163
164; FIXME: Should be able to get fdiv for 1.0 component
165; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
166; CHECK: %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
167; CHECK: store volatile <2 x float> %arcp.25ulp
168
169; CHECK: %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
170; CHECK: store volatile <2 x float> %fast.25ulp
171define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
172  %x.insert = insertelement <2 x float> %x, float 1.0, i32 0
173
174  %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
175  store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
176
177  %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
178  store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
179
180  ret void
181}
182
183; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
184; CHECK: %no.md = fdiv float %a, %b{{$}}
185; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
186; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
187; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0
188; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
189; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
190; CHECK: %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
191define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
192  %no.md = fdiv float %a, %b
193  store volatile float %no.md, float addrspace(1)* %out
194
195  %md.half.ulp = fdiv float %a, %b, !fpmath !1
196  store volatile float %md.half.ulp, float addrspace(1)* %out
197
198  %md.1ulp = fdiv float %a, %b, !fpmath !2
199  store volatile float %md.1ulp, float addrspace(1)* %out
200
201  %md.25ulp = fdiv float %a, %b, !fpmath !0
202  store volatile float %md.25ulp, float addrspace(1)* %out
203
204  %md.3ulp = fdiv float %a, %b, !fpmath !3
205  store volatile float %md.3ulp, float addrspace(1)* %out
206
207  %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
208  store volatile float %fast.md.25ulp, float addrspace(1)* %out
209
210  %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
211  store volatile float %arcp.md.25ulp, float addrspace(1)* %out
212
213  ret void
214}
215
216attributes #0 = { nounwind optnone noinline }
217attributes #1 = { nounwind }
218attributes #2 = { nounwind "target-features"="+fp32-denormals" }
219
220; CHECK: !0 = !{float 2.500000e+00}
221; CHECK: !1 = !{float 5.000000e-01}
222; CHECK: !2 = !{float 1.000000e+00}
223; CHECK: !3 = !{float 3.000000e+00}
224
225!0 = !{float 2.500000e+00}
226!1 = !{float 5.000000e-01}
227!2 = !{float 1.000000e+00}
228!3 = !{float 3.000000e+00}
229