1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s
3; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
4
5; Check generated fp16 fused MAC and MLS.
6
7define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) {
8; CHECK-LABEL: fusedMACTest2:
9; CHECK:       @ %bb.0:
10; CHECK-NEXT:    vldr.16 s0, [r1]
11; CHECK-NEXT:    vldr.16 s2, [r0]
12; CHECK-NEXT:    vldr.16 s4, [r2]
13; CHECK-NEXT:    vfma.f16 s4, s2, s0
14; CHECK-NEXT:    vstr.16 s4, [r0]
15; CHECK-NEXT:    bx lr
16;
17; DONT-FUSE-LABEL: fusedMACTest2:
18; DONT-FUSE:       @ %bb.0:
19; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
20; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
21; DONT-FUSE-NEXT:    vmul.f16 s0, s2, s0
22; DONT-FUSE-NEXT:    vldr.16 s2, [r2]
23; DONT-FUSE-NEXT:    vadd.f16 s0, s0, s2
24; DONT-FUSE-NEXT:    vstr.16 s0, [r0]
25; DONT-FUSE-NEXT:    bx lr
26
27  %f1 = load half, half *%a1, align 2
28  %f2 = load half, half *%a2, align 2
29  %f3 = load half, half *%a3, align 2
30  %1 = fmul half %f1, %f2
31  %2 = fadd half %1, %f3
32  store half %2, half *%a1, align 2
33  ret void
34}
35
36define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) {
37; CHECK-LABEL: fusedMACTest4:
38; CHECK:       @ %bb.0:
39; CHECK-NEXT:    vldr.16 s0, [r2]
40; CHECK-NEXT:    vldr.16 s2, [r1]
41; CHECK-NEXT:    vldr.16 s4, [r0]
42; CHECK-NEXT:    vfms.f16 s4, s2, s0
43; CHECK-NEXT:    vstr.16 s4, [r0]
44; CHECK-NEXT:    bx lr
45;
46; DONT-FUSE-LABEL: fusedMACTest4:
47; DONT-FUSE:       @ %bb.0:
48; DONT-FUSE-NEXT:    vldr.16 s0, [r2]
49; DONT-FUSE-NEXT:    vldr.16 s2, [r1]
50; DONT-FUSE-NEXT:    vmul.f16 s0, s2, s0
51; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
52; DONT-FUSE-NEXT:    vsub.f16 s0, s2, s0
53; DONT-FUSE-NEXT:    vstr.16 s0, [r0]
54; DONT-FUSE-NEXT:    bx lr
55
56  %f1 = load half, half *%a1, align 2
57  %f2 = load half, half *%a2, align 2
58  %f3 = load half, half *%a3, align 2
59  %1 = fmul half %f2, %f3
60  %2 = fsub half %f1, %1
61  store half %2, half *%a1, align 2
62  ret void
63}
64
65define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) {
66; CHECK-LABEL: fusedMACTest6:
67; CHECK:       @ %bb.0:
68; CHECK-NEXT:    vldr.16 s0, [r1]
69; CHECK-NEXT:    vldr.16 s2, [r0]
70; CHECK-NEXT:    vldr.16 s4, [r2]
71; CHECK-NEXT:    vfnma.f16 s4, s2, s0
72; CHECK-NEXT:    vstr.16 s4, [r0]
73; CHECK-NEXT:    bx lr
74;
75; DONT-FUSE-LABEL: fusedMACTest6:
76; DONT-FUSE:       @ %bb.0:
77; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
78; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
79; DONT-FUSE-NEXT:    vnmul.f16 s0, s2, s0
80; DONT-FUSE-NEXT:    vldr.16 s2, [r2]
81; DONT-FUSE-NEXT:    vsub.f16 s0, s0, s2
82; DONT-FUSE-NEXT:    vstr.16 s0, [r0]
83; DONT-FUSE-NEXT:    bx lr
84
85  %f1 = load half, half *%a1, align 2
86  %f2 = load half, half *%a2, align 2
87  %f3 = load half, half *%a3, align 2
88  %1 = fmul half %f1, %f2
89  %2 = fsub half -0.0, %1
90  %3 = fsub half %2, %f3
91  store half %3, half *%a1, align 2
92  ret void
93}
94
95define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) {
96; CHECK-LABEL: fusedMACTest8:
97; CHECK:       @ %bb.0:
98; CHECK-NEXT:    vldr.16 s0, [r1]
99; CHECK-NEXT:    vldr.16 s2, [r0]
100; CHECK-NEXT:    vldr.16 s4, [r2]
101; CHECK-NEXT:    vfnms.f16 s4, s2, s0
102; CHECK-NEXT:    vstr.16 s4, [r0]
103; CHECK-NEXT:    bx lr
104;
105; DONT-FUSE-LABEL: fusedMACTest8:
106; DONT-FUSE:       @ %bb.0:
107; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
108; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
109; DONT-FUSE-NEXT:    vmul.f16 s0, s2, s0
110; DONT-FUSE-NEXT:    vldr.16 s2, [r2]
111; DONT-FUSE-NEXT:    vsub.f16 s0, s0, s2
112; DONT-FUSE-NEXT:    vstr.16 s0, [r0]
113; DONT-FUSE-NEXT:    bx lr
114
115  %f1 = load half, half *%a1, align 2
116  %f2 = load half, half *%a2, align 2
117  %f3 = load half, half *%a3, align 2
118  %1 = fmul half %f1, %f2
119  %2 = fsub half %1, %f3
120  store half %2, half *%a1, align 2
121  ret void
122}
123
124define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
125; CHECK-LABEL: test_fma_f16:
126; CHECK:       @ %bb.0: @ %entry
127; CHECK-NEXT:    vldr.16 s0, [r1]
128; CHECK-NEXT:    vldr.16 s2, [r0]
129; CHECK-NEXT:    vldr.16 s4, [r2]
130; CHECK-NEXT:    vfma.f16 s4, s2, s0
131; CHECK-NEXT:    vstr.16 s4, [r0]
132; CHECK-NEXT:    bx lr
133;
134; DONT-FUSE-LABEL: test_fma_f16:
135; DONT-FUSE:       @ %bb.0: @ %entry
136; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
137; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
138; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
139; DONT-FUSE-NEXT:    vfma.f16 s4, s2, s0
140; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
141; DONT-FUSE-NEXT:    bx lr
142entry:
143  %a = load half, half *%aa, align 2
144  %b = load half, half *%bb, align 2
145  %c = load half, half *%cc, align 2
146  %tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
147  store half %tmp1, half *%aa, align 2
148  ret void
149}
150
151define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
152; CHECK-LABEL: test_fnms_f16:
153; CHECK:       @ %bb.0:
154; CHECK-NEXT:    vldr.16 s0, [r1]
155; CHECK-NEXT:    vldr.16 s2, [r0]
156; CHECK-NEXT:    vldr.16 s4, [r2]
157; CHECK-NEXT:    vfma.f16 s4, s2, s0
158; CHECK-NEXT:    vstr.16 s4, [r0]
159; CHECK-NEXT:    bx lr
160;
161; DONT-FUSE-LABEL: test_fnms_f16:
162; DONT-FUSE:       @ %bb.0:
163; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
164; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
165; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
166; DONT-FUSE-NEXT:    vfma.f16 s4, s2, s0
167; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
168; DONT-FUSE-NEXT:    bx lr
169
170  %a = load half, half *%aa, align 2
171  %b = load half, half *%bb, align 2
172  %c = load half, half *%cc, align 2
173  %tmp2 = fsub half -0.0, %c
174  %tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
175  store half %tmp3, half *%aa, align 2
176  ret void
177}
178
179define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind {
180; CHECK-LABEL: test_fma_const_fold:
181; CHECK:       @ %bb.0:
182; CHECK-NEXT:    vldr.16 s0, [r1]
183; CHECK-NEXT:    vldr.16 s2, [r0]
184; CHECK-NEXT:    vadd.f16 s0, s2, s0
185; CHECK-NEXT:    vstr.16 s0, [r0]
186; CHECK-NEXT:    bx lr
187;
188; DONT-FUSE-LABEL: test_fma_const_fold:
189; DONT-FUSE:       @ %bb.0:
190; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
191; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
192; DONT-FUSE-NEXT:    vadd.f16 s0, s2, s0
193; DONT-FUSE-NEXT:    vstr.16 s0, [r0]
194; DONT-FUSE-NEXT:    bx lr
195
196  %a = load half, half *%aa, align 2
197  %b = load half, half *%bb, align 2
198  %ret = call half @llvm.fma.f16(half %a, half 1.0, half %b)
199  store half %ret, half *%aa, align 2
200  ret void
201}
202
203define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind {
204; CHECK-LABEL: test_fma_canonicalize:
205; CHECK:       @ %bb.0:
206; CHECK-NEXT:    vldr.16 s0, [r0]
207; CHECK-NEXT:    vldr.16 s2, [r1]
208; CHECK-NEXT:    vmov.f16 s4, #2.000000e+00
209; CHECK-NEXT:    vfma.f16 s2, s0, s4
210; CHECK-NEXT:    vstr.16 s2, [r0]
211; CHECK-NEXT:    bx lr
212;
213; DONT-FUSE-LABEL: test_fma_canonicalize:
214; DONT-FUSE:       @ %bb.0:
215; DONT-FUSE-NEXT:    vldr.16 s0, [r0]
216; DONT-FUSE-NEXT:    vldr.16 s2, [r1]
217; DONT-FUSE-NEXT:    vmov.f16 s4, #2.000000e+00
218; DONT-FUSE-NEXT:    vfma.f16 s2, s0, s4
219; DONT-FUSE-NEXT:    vstr.16 s2, [r0]
220; DONT-FUSE-NEXT:    bx lr
221
222  %a = load half, half *%aa, align 2
223  %b = load half, half *%bb, align 2
224  %ret = call half @llvm.fma.f16(half 2.0, half %a, half %b)
225  store half %ret, half *%aa, align 2
226  ret void
227}
228
229define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
230; CHECK-LABEL: fms1:
231; CHECK:       @ %bb.0:
232; CHECK-NEXT:    vldr.16 s0, [r1]
233; CHECK-NEXT:    vldr.16 s2, [r0]
234; CHECK-NEXT:    vldr.16 s4, [r2]
235; CHECK-NEXT:    vfms.f16 s4, s2, s0
236; CHECK-NEXT:    vstr.16 s4, [r0]
237; CHECK-NEXT:    bx lr
238;
239; DONT-FUSE-LABEL: fms1:
240; DONT-FUSE:       @ %bb.0:
241; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
242; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
243; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
244; DONT-FUSE-NEXT:    vfms.f16 s4, s2, s0
245; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
246; DONT-FUSE-NEXT:    bx lr
247
248  %f1 = load half, half *%a1, align 2
249  %f2 = load half, half *%a2, align 2
250  %f3 = load half, half *%a3, align 2
251  %s = fsub half -0.0, %f1
252  %ret = call half @llvm.fma.f16(half %s, half %f2, half %f3)
253  store half %ret, half *%a1, align 2
254  ret void
255}
256
257define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
258; CHECK-LABEL: fms2:
259; CHECK:       @ %bb.0:
260; CHECK-NEXT:    vldr.16 s0, [r1]
261; CHECK-NEXT:    vldr.16 s2, [r0]
262; CHECK-NEXT:    vldr.16 s4, [r2]
263; CHECK-NEXT:    vfms.f16 s4, s2, s0
264; CHECK-NEXT:    vstr.16 s4, [r0]
265; CHECK-NEXT:    bx lr
266;
267; DONT-FUSE-LABEL: fms2:
268; DONT-FUSE:       @ %bb.0:
269; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
270; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
271; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
272; DONT-FUSE-NEXT:    vfms.f16 s4, s2, s0
273; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
274; DONT-FUSE-NEXT:    bx lr
275
276  %f1 = load half, half *%a1, align 2
277  %f2 = load half, half *%a2, align 2
278  %f3 = load half, half *%a3, align 2
279  %s = fsub half -0.0, %f1
280  %ret = call half @llvm.fma.f16(half %f2, half %s, half %f3)
281  store half %ret, half *%a1, align 2
282  ret void
283}
284
285define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
286; CHECK-LABEL: fnma1:
287; CHECK:       @ %bb.0:
288; CHECK-NEXT:    vldr.16 s0, [r1]
289; CHECK-NEXT:    vldr.16 s2, [r0]
290; CHECK-NEXT:    vldr.16 s4, [r2]
291; CHECK-NEXT:    vfnma.f16 s4, s2, s0
292; CHECK-NEXT:    vstr.16 s4, [r0]
293; CHECK-NEXT:    bx lr
294;
295; DONT-FUSE-LABEL: fnma1:
296; DONT-FUSE:       @ %bb.0:
297; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
298; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
299; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
300; DONT-FUSE-NEXT:    vfnma.f16 s4, s2, s0
301; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
302; DONT-FUSE-NEXT:    bx lr
303
304  %f1 = load half, half *%a1, align 2
305  %f2 = load half, half *%a2, align 2
306  %f3 = load half, half *%a3, align 2
307  %fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3)
308  %n1 = fsub half -0.0, %fma
309  store half %n1, half *%a1, align 2
310  ret void
311}
312
313define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
314; CHECK-LABEL: fnma2:
315; CHECK:       @ %bb.0:
316; CHECK-NEXT:    vldr.16 s0, [r1]
317; CHECK-NEXT:    vldr.16 s2, [r0]
318; CHECK-NEXT:    vldr.16 s4, [r2]
319; CHECK-NEXT:    vfnma.f16 s4, s2, s0
320; CHECK-NEXT:    vstr.16 s4, [r0]
321; CHECK-NEXT:    bx lr
322;
323; DONT-FUSE-LABEL: fnma2:
324; DONT-FUSE:       @ %bb.0:
325; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
326; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
327; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
328; DONT-FUSE-NEXT:    vfnma.f16 s4, s2, s0
329; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
330; DONT-FUSE-NEXT:    bx lr
331
332  %f1 = load half, half *%a1, align 2
333  %f2 = load half, half *%a2, align 2
334  %f3 = load half, half *%a3, align 2
335  %n1 = fsub half -0.0, %f1
336  %n3 = fsub half -0.0, %f3
337  %ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3)
338  store half %ret, half *%a1, align 2
339  ret void
340}
341
342define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
343; CHECK-LABEL: fnms1:
344; CHECK:       @ %bb.0:
345; CHECK-NEXT:    vldr.16 s0, [r1]
346; CHECK-NEXT:    vldr.16 s2, [r0]
347; CHECK-NEXT:    vldr.16 s4, [r2]
348; CHECK-NEXT:    vfnms.f16 s4, s2, s0
349; CHECK-NEXT:    vstr.16 s4, [r0]
350; CHECK-NEXT:    bx lr
351;
352; DONT-FUSE-LABEL: fnms1:
353; DONT-FUSE:       @ %bb.0:
354; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
355; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
356; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
357; DONT-FUSE-NEXT:    vfnms.f16 s4, s2, s0
358; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
359; DONT-FUSE-NEXT:    bx lr
360
361  %f1 = load half, half *%a1, align 2
362  %f2 = load half, half *%a2, align 2
363  %f3 = load half, half *%a3, align 2
364  %n3 = fsub half -0.0, %f3
365  %ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3)
366  store half %ret, half *%a1, align 2
367  ret void
368}
369
370define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
371; CHECK-LABEL: fnms2:
372; CHECK:       @ %bb.0:
373; CHECK-NEXT:    vldr.16 s0, [r1]
374; CHECK-NEXT:    vldr.16 s2, [r0]
375; CHECK-NEXT:    vldr.16 s4, [r2]
376; CHECK-NEXT:    vfnms.f16 s4, s2, s0
377; CHECK-NEXT:    vstr.16 s4, [r0]
378; CHECK-NEXT:    bx lr
379;
380; DONT-FUSE-LABEL: fnms2:
381; DONT-FUSE:       @ %bb.0:
382; DONT-FUSE-NEXT:    vldr.16 s0, [r1]
383; DONT-FUSE-NEXT:    vldr.16 s2, [r0]
384; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
385; DONT-FUSE-NEXT:    vfnms.f16 s4, s2, s0
386; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
387; DONT-FUSE-NEXT:    bx lr
388
389  %f1 = load half, half *%a1, align 2
390  %f2 = load half, half *%a2, align 2
391  %f3 = load half, half *%a3, align 2
392  %n1 = fsub half -0.0, %f1
393  %fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3)
394  %n = fsub half -0.0, %fma
395  store half %n, half *%a1, align 2
396  ret void
397}
398
399define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
400; CHECK-LABEL: fnms3:
401; CHECK:       @ %bb.0:
402; CHECK-NEXT:    vldr.16 s0, [r0]
403; CHECK-NEXT:    vldr.16 s2, [r1]
404; CHECK-NEXT:    vldr.16 s4, [r2]
405; CHECK-NEXT:    vfnms.f16 s4, s2, s0
406; CHECK-NEXT:    vstr.16 s4, [r0]
407; CHECK-NEXT:    bx lr
408;
409; DONT-FUSE-LABEL: fnms3:
410; DONT-FUSE:       @ %bb.0:
411; DONT-FUSE-NEXT:    vldr.16 s0, [r0]
412; DONT-FUSE-NEXT:    vldr.16 s2, [r1]
413; DONT-FUSE-NEXT:    vldr.16 s4, [r2]
414; DONT-FUSE-NEXT:    vfnms.f16 s4, s2, s0
415; DONT-FUSE-NEXT:    vstr.16 s4, [r0]
416; DONT-FUSE-NEXT:    bx lr
417
418  %f1 = load half, half *%a1, align 2
419  %f2 = load half, half *%a2, align 2
420  %f3 = load half, half *%a3, align 2
421  %n2 = fsub half -0.0, %f2
422  %fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3)
423  %n1 = fsub half -0.0, %fma
424  store half %n1, half *%a1, align 2
425  ret void
426}
427
428
429declare half @llvm.fma.f16(half, half, half) nounwind readnone
430