1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s
2; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s
3; Make sure this doesn't crash with no triple
4
5; NOOP-LABEL: @noop_fdiv_fpmath(
6; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
7define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
8  %md.25ulp = fdiv float %a, %b, !fpmath !0
9  store volatile float %md.25ulp, float addrspace(1)* %out
10  ret void
11}
12
13; CHECK-LABEL: @fdiv_fpmath(
14; CHECK: %no.md = fdiv float %a, %b{{$}}
15; CHECK: %md.half.ulp = fdiv float %a, %b
16; CHECK: %md.1ulp = fdiv float %a, %b
17; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b)
18; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b)
19; CHECK: %[[FAST_RCP:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %b)
20; CHECK: %fast.md.25ulp = fmul fast float %a, %[[FAST_RCP]]
21; CHECK: %[[AFN_RCP:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %b)
22; CHECK: afn.md.25ulp = fmul afn float %a, %[[AFN_RCP]]
23define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
24  %no.md = fdiv float %a, %b
25  store volatile float %no.md, float addrspace(1)* %out
26
27  %md.half.ulp = fdiv float %a, %b, !fpmath !1
28  store volatile float %md.half.ulp, float addrspace(1)* %out
29
30  %md.1ulp = fdiv float %a, %b, !fpmath !2
31  store volatile float %md.1ulp, float addrspace(1)* %out
32
33  %md.25ulp = fdiv float %a, %b, !fpmath !0
34  store volatile float %md.25ulp, float addrspace(1)* %out
35
36  %md.3ulp = fdiv float %a, %b, !fpmath !3
37  store volatile float %md.3ulp, float addrspace(1)* %out
38
39  %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
40  store volatile float %fast.md.25ulp, float addrspace(1)* %out
41
42  %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
43  store volatile float %afn.md.25ulp, float addrspace(1)* %out
44
45  ret void
46}
47
48; CHECK-LABEL: @rcp_fdiv_fpmath(
49; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
50; CHECK: %md.25ulp = call float @llvm.amdgcn.rcp.f32(float %x)
51; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x
52; CHECK: %afn.no.md = call afn float @llvm.amdgcn.rcp.f32(float %x)
53; CHECK: %afn.25ulp = call afn float @llvm.amdgcn.rcp.f32(float %x)
54; CHECK: %fast.no.md = call fast float @llvm.amdgcn.rcp.f32(float %x)
55; CHECK: %fast.25ulp = call fast float @llvm.amdgcn.rcp.f32(float %x)
56define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
57  %no.md = fdiv float 1.0, %x
58  store volatile float %no.md, float addrspace(1)* %out
59
60  %md.25ulp = fdiv float 1.0, %x, !fpmath !0
61  store volatile float %md.25ulp, float addrspace(1)* %out
62
63  %md.half.ulp = fdiv float 1.0, %x, !fpmath !1
64  store volatile float %md.half.ulp, float addrspace(1)* %out
65
66  %afn.no.md = fdiv afn float 1.0, %x
67  store volatile float %afn.no.md, float addrspace(1)* %out
68
69  %afn.25ulp = fdiv afn float 1.0, %x, !fpmath !0
70  store volatile float %afn.25ulp, float addrspace(1)* %out
71
72  %fast.no.md = fdiv fast float 1.0, %x
73  store volatile float %fast.no.md, float addrspace(1)* %out
74
75  %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
76  store volatile float %fast.25ulp, float addrspace(1)* %out
77
78  ret void
79}
80
81; CHECK-LABEL: @fdiv_fpmath_vector(
82; CHECK: %[[NO_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
83; CHECK: %[[NO_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
84; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float %[[NO_A0]], %[[NO_B0]]
85; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
86; CHECK: %[[NO_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
87; CHECK: %[[NO_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
88; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float %[[NO_A1]], %[[NO_B1]]
89; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
90; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
91
92; CHECK: %[[HALF_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
93; CHECK: %[[HALF_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
94; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float %[[HALF_A0]], %[[HALF_B0]]
95; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
96; CHECK: %[[HALF_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
97; CHECK: %[[HALF_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
98; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float %[[HALF_A1]], %[[HALF_B1]]
99; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1
100; CHECK: store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
101
102; CHECK: %[[ONE_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
103; CHECK: %[[ONE_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
104; CHECK: %[[ONE_FDIV0:[0-9]+]] = fdiv float %[[ONE_A0]], %[[ONE_B0]]
105; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[ONE_FDIV0]], i64 0
106; CHECK: %[[ONE_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
107; CHECK: %[[ONE_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
108; CHECK: %[[ONE_FDIV1:[0-9]+]] = fdiv float %[[ONE_A1]], %[[ONE_B1]]
109; CHECK: %md.1ulp = insertelement <2 x float> %[[ONE_INS0]], float %[[ONE_FDIV1]], i64 1
110; CHECK: store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
111
112; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
113; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
114; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]])
115; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
116; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
117; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
118; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]])
119; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
120define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
121  %no.md = fdiv <2 x float> %a, %b
122  store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
123
124  %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
125  store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
126
127  %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
128  store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
129
130  %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
131  store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out
132
133  ret void
134}
135
136; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
137; CHECK: %[[NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
138; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
139; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
140; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
141; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[NO1]]
142; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
143; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
144
145; CHECK: %[[HALF0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
146; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF0]]
147; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
148; CHECK: %[[HALF1:[0-9]+]] = extractelement <2 x float> %x, i64 1
149; CHECK: %[[HALF_FDIV1:[0-9]+]] =  fdiv float 1.000000e+00, %[[HALF1]]
150; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1
151; CHECK: store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
152
153; CHECK: %[[AFN_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
154; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
155; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
156; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
157; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
158; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_FDIV1]], i64 1
159; CHECK: store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
160
161; CHECK: %[[FAST_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
162; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
163; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
164; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
165; CHECK: %[[FAST_NO_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
166; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_RCP1]], i64 1
167; CHECK: store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
168
169; CHECK: %[[AFN_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
170; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
171; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
172; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
173; CHECK: %[[AFN_25_RCP1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
174; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_RCP1]], i64 1
175; CHECK: store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
176
177; CHECK: %[[FAST_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
178; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
179; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
180; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
181; CHECK: %[[FAST_25_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
182; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_RCP1]], i64 1
183; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
184define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
185  %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
186  store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
187
188  %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
189  store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
190
191  %afn.no.md = fdiv afn <2 x float> <float 1.0, float 1.0>, %x
192  store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
193
194  %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
195  store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
196
197  %afn.25ulp = fdiv afn <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
198  store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
199
200  %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
201  store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
202
203  ret void
204}
205
206; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
207; CHECK: %[[NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
208; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
209; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
210; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
211; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 2.000000e+00, %[[NO1]]
212; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
213; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
214
215; CHECK: %[[AFN_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
216; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
217; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
218; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
219; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
220; CHECK: %[[AFN_NO_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_NO_FDIV1]]
221; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_MUL1]], i64 1
222; CHECK: store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
223
224; CHECK: %[[FAST_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
225; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
226; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
227; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
228; CHECK: %[[FAST_NO_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
229; CHECK: %[[FAST_NO_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_NO_RCP1]]
230; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_MUL1]], i64 1
231; CHECK: store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
232
233; CHECK: %[[AFN_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
234; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
235; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
236; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
237; CHECK: %[[AFN_25_RCP1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
238; CHECK: %[[AFN_25_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_25_RCP1]]
239; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_MUL1]], i64 1
240; CHECK: store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
241
242; CHECK: %[[FAST_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
243; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
244; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
245; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
246; CHECK: %[[FAST_25_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
247; CHECK: %[[FAST_25_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_25_RCP1]]
248; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_MUL1]], i64 1
249; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
250define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
251  %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
252  store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
253
254  %afn.no.md = fdiv afn <2 x float> <float 1.0, float 2.0>, %x
255  store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
256
257  %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
258  store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
259
260  %afn.25ulp = fdiv afn <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
261  store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
262
263  %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
264  store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
265
266  ret void
267}
268
269; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
270; CHECK: %[[AFN_A0:[0-9]+]] = extractelement <2 x float> %x.insert, i64 0
271; CHECK: %[[AFN_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
272; CHECK: %[[AFN_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B0]])
273; CHECK: %[[AFN_MUL0:[0-9]+]] = fmul afn float %[[AFN_A0]], %[[AFN_RCP0]]
274; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_MUL0]], i64 0
275; CHECK: %[[AFN_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
276; CHECK: %[[AFN_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
277; CHECK: %[[AFN_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B1]])
278; CHECK: %[[AFN_MUL1:[0-9]+]] = fmul afn float %[[AFN_A1]], %[[AFN_RCP1]]
279; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_INS0]], float %[[AFN_MUL1]], i64 1
280; CHECK: store volatile <2 x float> %afn.25ulp
281
282; CHECK: %[[FAST_A0:[0-9]+]] = extractelement <2 x float> %x.insert, i64 0
283; CHECK: %[[FAST_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
284; CHECK: %[[FAST_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B0]])
285; CHECK: %[[FAST_MUL0:[0-9]+]] = fmul fast float %[[FAST_A0]], %[[FAST_RCP0]]
286; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_MUL0]], i64 0
287; CHECK: %[[FAST_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
288; CHECK: %[[FAST_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
289; CHECK: %[[FAST_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B1]])
290; CHECK: %[[FAST_MUL1:[0-9]+]] = fmul fast float %[[FAST_A1]], %[[FAST_RCP1]]
291; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_INS0]], float %[[FAST_MUL1]], i64 1
292; CHECK: store volatile <2 x float> %fast.25ulp
293define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
294  %x.insert = insertelement <2 x float> %x, float 1.0, i32 0
295
296  %afn.25ulp = fdiv afn <2 x float> %x.insert, %y, !fpmath !0
297  store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
298
299  %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
300  store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
301
302  ret void
303}
304
305; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
306; CHECK: %no.md = fdiv float %a, %b{{$}}
307; CHECK: %md.half.ulp = fdiv float %a, %b
308; CHECK: %md.1ulp = fdiv float %a, %b
309; CHECK: %md.25ulp = fdiv float %a, %b
310; CHECK: %md.3ulp = fdiv float %a, %b
311; CHECK: %[[RCP_FAST:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %b)
312; CHECK: %fast.md.25ulp = fmul fast float %a, %[[RCP_FAST]]
313; CHECK: %[[RCP_AFN:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %b)
314; CHECK: %afn.md.25ulp  = fmul afn float %a, %[[RCP_AFN]]
315define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
316  %no.md = fdiv float %a, %b
317  store volatile float %no.md, float addrspace(1)* %out
318
319  %md.half.ulp = fdiv float %a, %b, !fpmath !1
320  store volatile float %md.half.ulp, float addrspace(1)* %out
321
322  %md.1ulp = fdiv float %a, %b, !fpmath !2
323  store volatile float %md.1ulp, float addrspace(1)* %out
324
325  %md.25ulp = fdiv float %a, %b, !fpmath !0
326  store volatile float %md.25ulp, float addrspace(1)* %out
327
328  %md.3ulp = fdiv float %a, %b, !fpmath !3
329  store volatile float %md.3ulp, float addrspace(1)* %out
330
331  %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
332  store volatile float %fast.md.25ulp, float addrspace(1)* %out
333
334  %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
335  store volatile float %afn.md.25ulp, float addrspace(1)* %out
336
337  ret void
338}
339
340attributes #0 = { nounwind optnone noinline }
341attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
342attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
343
344!0 = !{float 2.500000e+00}
345!1 = !{float 5.000000e-01}
346!2 = !{float 1.000000e+00}
347!3 = !{float 3.000000e+00}
348