1; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
2; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
3; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
4; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s
5
6;  fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
7
8; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32:
9; GCN: s_waitcnt
10; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
11; GFX9-F32FLUSH-NEXT: s_setpc_b64
12
13; GFX9-F32DENORM-NEXT: v_mul_f16
14; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
15; GFX9-F32DENORM-NEXT: v_add_f32
16define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
17entry:
18  %mul = fmul half %x, %y
19  %mul.ext = fpext half %mul to float
20  %add = fadd float %mul.ext, %z
21  ret float %add
22}
23
24; f16->f64 is not free.
25; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f64:
26; GFX89: v_mul_f16
27; GFX89: v_cvt_f32_f16
28; GFX89: v_cvt_f64_f32
29; GFX89: v_add_f64
30define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 {
31entry:
32  %mul = fmul half %x, %y
33  %mul.ext = fpext half %mul to double
34  %add = fadd double %mul.ext, %z
35  ret double %add
36}
37
38; f32->f64 is not free.
39; GCN-LABEL: {{^}}fadd_fpext_fmul_f32_to_f64:
40; GCN: v_mul_f32
41; GCN: v_cvt_f64_f32
42; GCN: v_add_f64
43define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 {
44entry:
45  %mul = fmul float %x, %y
46  %mul.ext = fpext float %mul to double
47  %add = fadd double %mul.ext, %z
48  ret double %add
49}
50
51; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
52; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32_commute:
53; GCN: s_waitcnt
54; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
55; GFX9-F32FLUSH-NEXT: s_setpc_b64
56
57; GFX9-F32DENORM-NEXT: v_mul_f16
58; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
59; GFX9-F32DENORM-NEXT: v_add_f32
60; GFX9-F32DENORM-NEXT: s_setpc_b64
61define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 {
62entry:
63  %mul = fmul half %x, %y
64  %mul.ext = fpext half %mul to float
65  %add = fadd float %z, %mul.ext
66  ret float %add
67}
68
69; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
70;   -> (fma x, y, (fma (fpext u), (fpext v), z))
71
72; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32:
73; GCN: s_waitcnt
74; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
75; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
76; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
77; GFX9-F32FLUSH-NEXT: s_setpc_b64
78
79; GFX9-F32DENORM-NEXT: v_mul_f16
80; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
81; GFX9-F32DENORM-NEXT: v_fma_f32
82; GFX9-F32DENORM-NEXT: v_add_f32
83; GFX9-F32DENORM-NEXT: s_setpc_b64
84define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
85entry:
86  %mul = fmul half %u, %v
87  %mul.ext = fpext half %mul to float
88  %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
89  %add = fadd float %fma, %z
90  ret float %add
91}
92
93; fold (fadd x, (fma y, z, (fpext (fmul u, v)))
94;   -> (fma y, z, (fma (fpext u), (fpext v), x))
95; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32_commute:
96; GCN: s_waitcnt
97; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
98; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
99; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
100; GFX9-F32FLUSH-NEXT: s_setpc_b64
101
102; GFX9-F32DENORM-NEXT: v_mul_f16
103; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
104; GFX9-F32DENORM-NEXT: v_fma_f32
105; GFX9-F32DENORM-NEXT: v_add_f32
106; GFX9-F32DENORM-NEXT: s_setpc_b64
107define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
108entry:
109  %mul = fmul half %u, %v
110  %mul.ext = fpext half %mul to float
111  %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
112  %add = fadd float %z, %fma
113  ret float %add
114}
115
116; GCN-LABEL: {{^}}fadd_fmad_fpext_fmul_f16_to_f32:
117; GCN: s_waitcnt
118; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
119; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
120; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
121; GFX9-F32FLUSH-NEXT: s_setpc_b64
122
123; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
124; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
125; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
126define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
127entry:
128  %mul = fmul half %u, %v
129  %mul.ext = fpext half %mul to float
130  %mul1 = fmul contract float %x, %y
131  %fmad = fadd contract float %mul1, %mul.ext
132  %add = fadd float %fmad, %z
133  ret float %add
134}
135
136; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
137;   -> (fma x, y, (fma (fpext u), (fpext v), z))
138
139; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32:
140; GCN: s_waitcnt
141; GFX89: v_mul_f16
142; GFX89: v_cvt_f32_f16
143; GFX89: v_fma_f32
144; GFX89: v_add_f32
145define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
146entry:
147  %mul = fmul contract half %u, %v
148  %mul.ext = fpext half %mul to float
149  %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext)
150  %add = fadd float %fma, %z
151  ret float %add
152}
153
154; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32_commute:
155; GCN: s_waitcnt
156; GFX89: v_mul_f16
157; GFX89: v_cvt_f32_f16
158; GFX89: v_fma_f32
159; GFX89: v_add_f32
160define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
161entry:
162  %mul = fmul contract half %u, %v
163  %mul.ext = fpext half %mul to float
164  %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext)
165  %add = fadd float %z, %fma
166  ret float %add
167}
168
169; fold (fadd x, (fpext (fma y, z, (fmul u, v)))
170;   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
171
172; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32:
173; GFX9: v_mul_f16
174; GFX9: v_fma_legacy_f16
175; GFX9: v_cvt_f32_f16
176; GFX9: v_add_f32_e32
177define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
178entry:
179  %mul = fmul contract half %u, %v
180  %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul)
181  %ext.fma = fpext half %fma to float
182  %add = fadd float %x, %ext.fma
183  ret float %add
184}
185
186; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32:
187; GFX9: v_mul_f16
188; GFX9: v_fma_legacy_f16
189; GFX9: v_cvt_f32_f16
190; GFX9: v_add_f32_e32
191define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
192entry:
193  %mul = fmul contract half %u, %v
194  %fma = call half @llvm.fma.f16(half %y, half %z, half %mul)
195  %ext.fma = fpext half %fma to float
196  %add = fadd float %x, %ext.fma
197  ret float %add
198}
199
200; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute:
201; GFX9: v_mul_f16
202; GFX9: v_fma_legacy_f16
203; GFX9: v_cvt_f32_f16
204; GFX9: v_add_f32_e32
205define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
206entry:
207  %mul = fmul contract half %u, %v
208  %fma = call half @llvm.fma.f16(half %y, half %z, half %mul)
209  %ext.fma = fpext half %fma to float
210  %add = fadd float %ext.fma, %x
211  ret float %add
212}
213
214; fold (fsub (fpext (fmul x, y)), z)
215;   -> (fma (fpext x), (fpext y), (fneg z))
216
217; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32:
218; GCN: s_waitcnt
219; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}}
220; GFX9-F32FLUSH-NEXT: s_setpc_b64
221
222; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
223; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
224; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
225; GFX9-F32DENORM-NEXT: s_setpc_b64
226define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
227entry:
228  %mul = fmul half %x, %y
229  %mul.ext = fpext half %mul to float
230  %add = fsub float %mul.ext, %z
231  ret float %add
232}
233
234; fold (fsub x, (fpext (fmul y, z)))
235;   -> (fma (fneg (fpext y)), (fpext z), x)
236
237; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32_commute:
238; GCN: s_waitcnt
239; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0]
240; GFX9-F32FLUSH-NEXT: s_setpc_b64
241
242; GFX9-F32DENORM-NEXT: v_mul_f16_e32
243; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32
244; GFX9-F32DENORM-NEXT: v_sub_f32_e32
245; GFX9-F32DENORM-NEXT: s_setpc_b64
246define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 {
247entry:
248  %mul = fmul contract half %y, %z
249  %mul.ext = fpext half %mul to float
250  %add = fsub contract float %x, %mul.ext
251  ret float %add
252}
253
254; fold (fsub (fpext (fneg (fmul, x, y))), z)
255;   -> (fneg (fma (fpext x), (fpext y), z))
256
257; GCN-LABEL: {{^}}fsub_fpext_fneg_fmul_f16_to_f32:
258; GCN: s_waitcnt
259; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}}
260; GFX9-F32FLUSH-NEXT: s_setpc_b64
261
262; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
263; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
264; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
265; GFX9-F32DENORM-NEXT: s_setpc_b64
266define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
267entry:
268  %mul = fmul half %x, %y
269  %neg.mul = fsub half -0.0, %mul
270  %neg.mul.ext = fpext half %neg.mul to float
271  %add = fsub float %neg.mul.ext, %z
272  ret float %add
273}
274
275; fold (fsub (fneg (fpext (fmul, x, y))), z)
276;   -> (fneg (fma (fpext x)), (fpext y), z)
277
278; GCN-LABEL: {{^}}fsub_fneg_fpext_fmul_f16_to_f32:
279; GCN: s_waitcnt
280; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}}
281; GFX9-F32FLUSH-NEXT: s_setpc_b64
282
283; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
284; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
285; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
286; GFX9-F32DENORM-NEXT: s_setpc_b64
287define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
288entry:
289  %mul = fmul half %x, %y
290  %mul.ext = fpext half %mul to float
291  %neg.mul.ext = fsub float -0.0, %mul.ext
292  %add = fsub float %neg.mul.ext, %z
293  ret float %add
294}
295
296; fold (fsub (fmad x, y, (fpext (fmul u, v))), z)
297;    -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z)))
298; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32:
299; GCN: s_waitcnt
300; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v3, v4, -v2 op_sel_hi:[1,1,0]{{$}}
301; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
302; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
303; GFX9-F32FLUSH-NEXT: s_setpc_b64
304
305; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
306; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
307; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3
308; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
309; GFX9-F32DENORM-NEXT: s_setpc_b64
310define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 {
311entry:
312  %mul = fmul half %u, %v
313  %mul.ext = fpext half %mul to float
314  %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
315  %add = fsub float %fma, %z
316  ret float %add
317}
318
319;  fold (fsub (fpext (fmad x, y, (fmul u, v))), z)
320;    -> (fmad (fpext x), (fpext y),
321;            (fmad (fpext u), (fpext v), (fneg z)))
322
323; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32:
324; GFX9: v_mul_f16
325; GFX9: v_fma_legacy_f16
326; GFX9: v_cvt_f32_f16
327; GFX9: v_sub_f32
328; GCN: s_setpc_b64
329define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 {
330entry:
331  %mul = fmul half %u, %v
332  %fma = call half @llvm.fmuladd.f16(half %x, half %y, half %mul)
333  %fma.ext = fpext half %fma to float
334  %add = fsub float %fma.ext, %z
335  ret float %add
336}
337
338; fold (fsub x, (fmad y, z, (fpext (fmul u, v))))
339;   -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x))
340; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32_commute:
341; GCN: s_waitcnt
342; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v3, v4, v0 op_sel_hi:[1,1,0]{{$}}
343; GFX9-F32FLUSH-NEXT: v_mad_f32 v0, -v1, v2, v0{{$}}
344; GFX9-F32FLUSH-NEXT: s_setpc_b64
345
346; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
347; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
348; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3
349; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1
350; GFX9-F32DENORM-NEXT: s_setpc_b64
351define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 {
352entry:
353  %mul = fmul half %u, %v
354  %mul.ext = fpext half %mul to float
355  %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext)
356  %add = fsub float %x, %fma
357  ret float %add
358}
359
360; fold (fsub x, (fpext (fma y, z, (fmul u, v))))
361;    -> (fma (fneg (fpext y)), (fpext z),
362;            (fma (fneg (fpext u)), (fpext v), x))
363; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute:
364; GCN: s_waitcnt
365; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4
366; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3
367; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
368; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
369; GFX9-NEXT: s_setpc_b64
370define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
371entry:
372  %mul = fmul half %u, %v
373  %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul)
374  %fma.ext = fpext half %fma to float
375  %add = fsub float %x, %fma.ext
376  ret float %add
377}
378
379declare float @llvm.fmuladd.f32(float, float, float) #0
380declare float @llvm.fma.f32(float, float, float) #0
381declare half @llvm.fmuladd.f16(half, half, half) #0
382declare half @llvm.fma.f16(half, half, half) #0
383
384attributes #0 = { nounwind readnone speculatable }
385