1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s --check-prefix=FMA
3; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s --check-prefix=FMA
4; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s --check-prefix=FMA
5
6attributes #0 = { nounwind }
7
8declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
9define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
10; FMA-LABEL: test_x86_fmadd_baa_ss:
11; FMA:       # %bb.0:
12; FMA-NEXT:    vmovaps (%rdx), %xmm0
13; FMA-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
14; FMA-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
15; FMA-NEXT:    retq
16  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
17  ret <4 x float> %res
18}
19
20define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
21; FMA-LABEL: test_x86_fmadd_aba_ss:
22; FMA:       # %bb.0:
23; FMA-NEXT:    vmovaps (%rcx), %xmm0
24; FMA-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
25; FMA-NEXT:    retq
26  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
27  ret <4 x float> %res
28}
29
30define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
31; FMA-LABEL: test_x86_fmadd_bba_ss:
32; FMA:       # %bb.0:
33; FMA-NEXT:    vmovaps (%rdx), %xmm0
34; FMA-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
35; FMA-NEXT:    retq
36  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
37  ret <4 x float> %res
38}
39
40declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
41define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
42; FMA-LABEL: test_x86_fmadd_baa_ps:
43; FMA:       # %bb.0:
44; FMA-NEXT:    vmovaps (%rcx), %xmm0
45; FMA-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
46; FMA-NEXT:    retq
47  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
48  ret <4 x float> %res
49}
50
51define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
52; FMA-LABEL: test_x86_fmadd_aba_ps:
53; FMA:       # %bb.0:
54; FMA-NEXT:    vmovaps (%rcx), %xmm0
55; FMA-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
56; FMA-NEXT:    retq
57  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
58  ret <4 x float> %res
59}
60
61define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
62; FMA-LABEL: test_x86_fmadd_bba_ps:
63; FMA:       # %bb.0:
64; FMA-NEXT:    vmovaps (%rdx), %xmm0
65; FMA-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
66; FMA-NEXT:    retq
67  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
68  ret <4 x float> %res
69}
70
71declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
72define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
73; FMA-LABEL: test_x86_fmadd_baa_ps_y:
74; FMA:       # %bb.0:
75; FMA-NEXT:    vmovaps (%rcx), %ymm0
76; FMA-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
77; FMA-NEXT:    retq
78  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
79  ret <8 x float> %res
80}
81
82define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
83; FMA-LABEL: test_x86_fmadd_aba_ps_y:
84; FMA:       # %bb.0:
85; FMA-NEXT:    vmovaps (%rcx), %ymm0
86; FMA-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
87; FMA-NEXT:    retq
88  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
89  ret <8 x float> %res
90}
91
92define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
93; FMA-LABEL: test_x86_fmadd_bba_ps_y:
94; FMA:       # %bb.0:
95; FMA-NEXT:    vmovaps (%rdx), %ymm0
96; FMA-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
97; FMA-NEXT:    retq
98  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
99  ret <8 x float> %res
100}
101
102declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
103define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
104; FMA-LABEL: test_x86_fmadd_baa_sd:
105; FMA:       # %bb.0:
106; FMA-NEXT:    vmovapd (%rdx), %xmm0
107; FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
108; FMA-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
109; FMA-NEXT:    retq
110  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
111  ret <2 x double> %res
112}
113
114define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
115; FMA-LABEL: test_x86_fmadd_aba_sd:
116; FMA:       # %bb.0:
117; FMA-NEXT:    vmovapd (%rcx), %xmm0
118; FMA-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
119; FMA-NEXT:    retq
120  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
121  ret <2 x double> %res
122}
123
124define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
125; FMA-LABEL: test_x86_fmadd_bba_sd:
126; FMA:       # %bb.0:
127; FMA-NEXT:    vmovapd (%rdx), %xmm0
128; FMA-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
129; FMA-NEXT:    retq
130  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
131  ret <2 x double> %res
132}
133
134declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
135define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
136; FMA-LABEL: test_x86_fmadd_baa_pd:
137; FMA:       # %bb.0:
138; FMA-NEXT:    vmovapd (%rcx), %xmm0
139; FMA-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
140; FMA-NEXT:    retq
141  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
142  ret <2 x double> %res
143}
144
145define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
146; FMA-LABEL: test_x86_fmadd_aba_pd:
147; FMA:       # %bb.0:
148; FMA-NEXT:    vmovapd (%rcx), %xmm0
149; FMA-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
150; FMA-NEXT:    retq
151  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
152  ret <2 x double> %res
153}
154
155define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
156; FMA-LABEL: test_x86_fmadd_bba_pd:
157; FMA:       # %bb.0:
158; FMA-NEXT:    vmovapd (%rdx), %xmm0
159; FMA-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
160; FMA-NEXT:    retq
161  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
162  ret <2 x double> %res
163}
164
165declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
166define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
167; FMA-LABEL: test_x86_fmadd_baa_pd_y:
168; FMA:       # %bb.0:
169; FMA-NEXT:    vmovapd (%rcx), %ymm0
170; FMA-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
171; FMA-NEXT:    retq
172  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
173  ret <4 x double> %res
174}
175
176define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
177; FMA-LABEL: test_x86_fmadd_aba_pd_y:
178; FMA:       # %bb.0:
179; FMA-NEXT:    vmovapd (%rcx), %ymm0
180; FMA-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
181; FMA-NEXT:    retq
182  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
183  ret <4 x double> %res
184}
185
186define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
187; FMA-LABEL: test_x86_fmadd_bba_pd_y:
188; FMA:       # %bb.0:
189; FMA-NEXT:    vmovapd (%rdx), %ymm0
190; FMA-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
191; FMA-NEXT:    retq
192  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
193  ret <4 x double> %res
194}
195
196
197declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
198define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
199; FMA-LABEL: test_x86_fnmadd_baa_ss:
200; FMA:       # %bb.0:
201; FMA-NEXT:    vmovaps (%rdx), %xmm0
202; FMA-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
203; FMA-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
204; FMA-NEXT:    retq
205  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
206  ret <4 x float> %res
207}
208
209define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
210; FMA-LABEL: test_x86_fnmadd_aba_ss:
211; FMA:       # %bb.0:
212; FMA-NEXT:    vmovaps (%rcx), %xmm0
213; FMA-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
214; FMA-NEXT:    retq
215  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
216  ret <4 x float> %res
217}
218
219define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
220; FMA-LABEL: test_x86_fnmadd_bba_ss:
221; FMA:       # %bb.0:
222; FMA-NEXT:    vmovaps (%rdx), %xmm0
223; FMA-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
224; FMA-NEXT:    retq
225  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
226  ret <4 x float> %res
227}
228
229declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
230define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
231; FMA-LABEL: test_x86_fnmadd_baa_ps:
232; FMA:       # %bb.0:
233; FMA-NEXT:    vmovaps (%rcx), %xmm0
234; FMA-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
235; FMA-NEXT:    retq
236  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
237  ret <4 x float> %res
238}
239
240define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
241; FMA-LABEL: test_x86_fnmadd_aba_ps:
242; FMA:       # %bb.0:
243; FMA-NEXT:    vmovaps (%rcx), %xmm0
244; FMA-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
245; FMA-NEXT:    retq
246  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
247  ret <4 x float> %res
248}
249
250define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
251; FMA-LABEL: test_x86_fnmadd_bba_ps:
252; FMA:       # %bb.0:
253; FMA-NEXT:    vmovaps (%rdx), %xmm0
254; FMA-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
255; FMA-NEXT:    retq
256  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
257  ret <4 x float> %res
258}
259
260declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
261define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
262; FMA-LABEL: test_x86_fnmadd_baa_ps_y:
263; FMA:       # %bb.0:
264; FMA-NEXT:    vmovaps (%rcx), %ymm0
265; FMA-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
266; FMA-NEXT:    retq
267  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
268  ret <8 x float> %res
269}
270
271define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
272; FMA-LABEL: test_x86_fnmadd_aba_ps_y:
273; FMA:       # %bb.0:
274; FMA-NEXT:    vmovaps (%rcx), %ymm0
275; FMA-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
276; FMA-NEXT:    retq
277  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
278  ret <8 x float> %res
279}
280
281define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
282; FMA-LABEL: test_x86_fnmadd_bba_ps_y:
283; FMA:       # %bb.0:
284; FMA-NEXT:    vmovaps (%rdx), %ymm0
285; FMA-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
286; FMA-NEXT:    retq
287  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
288  ret <8 x float> %res
289}
290
291declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
292define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
293; FMA-LABEL: test_x86_fnmadd_baa_sd:
294; FMA:       # %bb.0:
295; FMA-NEXT:    vmovapd (%rdx), %xmm0
296; FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
297; FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
298; FMA-NEXT:    retq
299  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
300  ret <2 x double> %res
301}
302
303define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
304; FMA-LABEL: test_x86_fnmadd_aba_sd:
305; FMA:       # %bb.0:
306; FMA-NEXT:    vmovapd (%rcx), %xmm0
307; FMA-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
308; FMA-NEXT:    retq
309  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
310  ret <2 x double> %res
311}
312
313define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
314; FMA-LABEL: test_x86_fnmadd_bba_sd:
315; FMA:       # %bb.0:
316; FMA-NEXT:    vmovapd (%rdx), %xmm0
317; FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
318; FMA-NEXT:    retq
319  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
320  ret <2 x double> %res
321}
322
323declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
324define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
325; FMA-LABEL: test_x86_fnmadd_baa_pd:
326; FMA:       # %bb.0:
327; FMA-NEXT:    vmovapd (%rcx), %xmm0
328; FMA-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
329; FMA-NEXT:    retq
330  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
331  ret <2 x double> %res
332}
333
334define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
335; FMA-LABEL: test_x86_fnmadd_aba_pd:
336; FMA:       # %bb.0:
337; FMA-NEXT:    vmovapd (%rcx), %xmm0
338; FMA-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
339; FMA-NEXT:    retq
340  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
341  ret <2 x double> %res
342}
343
344define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
345; FMA-LABEL: test_x86_fnmadd_bba_pd:
346; FMA:       # %bb.0:
347; FMA-NEXT:    vmovapd (%rdx), %xmm0
348; FMA-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
349; FMA-NEXT:    retq
350  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
351  ret <2 x double> %res
352}
353
354declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
355define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
356; FMA-LABEL: test_x86_fnmadd_baa_pd_y:
357; FMA:       # %bb.0:
358; FMA-NEXT:    vmovapd (%rcx), %ymm0
359; FMA-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
360; FMA-NEXT:    retq
361  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
362  ret <4 x double> %res
363}
364
365define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
366; FMA-LABEL: test_x86_fnmadd_aba_pd_y:
367; FMA:       # %bb.0:
368; FMA-NEXT:    vmovapd (%rcx), %ymm0
369; FMA-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
370; FMA-NEXT:    retq
371  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
372  ret <4 x double> %res
373}
374
375define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
376; FMA-LABEL: test_x86_fnmadd_bba_pd_y:
377; FMA:       # %bb.0:
378; FMA-NEXT:    vmovapd (%rdx), %ymm0
379; FMA-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
380; FMA-NEXT:    retq
381  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
382  ret <4 x double> %res
383}
384
385declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
386define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
387; FMA-LABEL: test_x86_fmsub_baa_ss:
388; FMA:       # %bb.0:
389; FMA-NEXT:    vmovaps (%rdx), %xmm0
390; FMA-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
391; FMA-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
392; FMA-NEXT:    retq
393  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
394  ret <4 x float> %res
395}
396
397define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
398; FMA-LABEL: test_x86_fmsub_aba_ss:
399; FMA:       # %bb.0:
400; FMA-NEXT:    vmovaps (%rcx), %xmm0
401; FMA-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
402; FMA-NEXT:    retq
403  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
404  ret <4 x float> %res
405}
406
407define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
408; FMA-LABEL: test_x86_fmsub_bba_ss:
409; FMA:       # %bb.0:
410; FMA-NEXT:    vmovaps (%rdx), %xmm0
411; FMA-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
412; FMA-NEXT:    retq
413  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
414  ret <4 x float> %res
415}
416
417declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
418define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
419; FMA-LABEL: test_x86_fmsub_baa_ps:
420; FMA:       # %bb.0:
421; FMA-NEXT:    vmovaps (%rcx), %xmm0
422; FMA-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
423; FMA-NEXT:    retq
424  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
425  ret <4 x float> %res
426}
427
428define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
429; FMA-LABEL: test_x86_fmsub_aba_ps:
430; FMA:       # %bb.0:
431; FMA-NEXT:    vmovaps (%rcx), %xmm0
432; FMA-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
433; FMA-NEXT:    retq
434  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
435  ret <4 x float> %res
436}
437
438define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
439; FMA-LABEL: test_x86_fmsub_bba_ps:
440; FMA:       # %bb.0:
441; FMA-NEXT:    vmovaps (%rdx), %xmm0
442; FMA-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
443; FMA-NEXT:    retq
444  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
445  ret <4 x float> %res
446}
447
448declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
449define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
450; FMA-LABEL: test_x86_fmsub_baa_ps_y:
451; FMA:       # %bb.0:
452; FMA-NEXT:    vmovaps (%rcx), %ymm0
453; FMA-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
454; FMA-NEXT:    retq
455  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
456  ret <8 x float> %res
457}
458
459define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
460; FMA-LABEL: test_x86_fmsub_aba_ps_y:
461; FMA:       # %bb.0:
462; FMA-NEXT:    vmovaps (%rcx), %ymm0
463; FMA-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
464; FMA-NEXT:    retq
465  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
466  ret <8 x float> %res
467}
468
469define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
470; FMA-LABEL: test_x86_fmsub_bba_ps_y:
471; FMA:       # %bb.0:
472; FMA-NEXT:    vmovaps (%rdx), %ymm0
473; FMA-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
474; FMA-NEXT:    retq
475  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
476  ret <8 x float> %res
477}
478
479declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
480define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
481; FMA-LABEL: test_x86_fmsub_baa_sd:
482; FMA:       # %bb.0:
483; FMA-NEXT:    vmovapd (%rdx), %xmm0
484; FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
485; FMA-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
486; FMA-NEXT:    retq
487  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
488  ret <2 x double> %res
489}
490
491define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
492; FMA-LABEL: test_x86_fmsub_aba_sd:
493; FMA:       # %bb.0:
494; FMA-NEXT:    vmovapd (%rcx), %xmm0
495; FMA-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
496; FMA-NEXT:    retq
497  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
498  ret <2 x double> %res
499}
500
501define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
502; FMA-LABEL: test_x86_fmsub_bba_sd:
503; FMA:       # %bb.0:
504; FMA-NEXT:    vmovapd (%rdx), %xmm0
505; FMA-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
506; FMA-NEXT:    retq
507  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
508  ret <2 x double> %res
509}
510
511declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
512define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
513; FMA-LABEL: test_x86_fmsub_baa_pd:
514; FMA:       # %bb.0:
515; FMA-NEXT:    vmovapd (%rcx), %xmm0
516; FMA-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
517; FMA-NEXT:    retq
518  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
519  ret <2 x double> %res
520}
521
522define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
523; FMA-LABEL: test_x86_fmsub_aba_pd:
524; FMA:       # %bb.0:
525; FMA-NEXT:    vmovapd (%rcx), %xmm0
526; FMA-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
527; FMA-NEXT:    retq
528  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
529  ret <2 x double> %res
530}
531
532define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
533; FMA-LABEL: test_x86_fmsub_bba_pd:
534; FMA:       # %bb.0:
535; FMA-NEXT:    vmovapd (%rdx), %xmm0
536; FMA-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
537; FMA-NEXT:    retq
538  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
539  ret <2 x double> %res
540}
541
542declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
543define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
544; FMA-LABEL: test_x86_fmsub_baa_pd_y:
545; FMA:       # %bb.0:
546; FMA-NEXT:    vmovapd (%rcx), %ymm0
547; FMA-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
548; FMA-NEXT:    retq
549  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
550  ret <4 x double> %res
551}
552
553define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
554; FMA-LABEL: test_x86_fmsub_aba_pd_y:
555; FMA:       # %bb.0:
556; FMA-NEXT:    vmovapd (%rcx), %ymm0
557; FMA-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
558; FMA-NEXT:    retq
559  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
560  ret <4 x double> %res
561}
562
563define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
564; FMA-LABEL: test_x86_fmsub_bba_pd_y:
565; FMA:       # %bb.0:
566; FMA-NEXT:    vmovapd (%rdx), %ymm0
567; FMA-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
568; FMA-NEXT:    retq
569  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
570  ret <4 x double> %res
571}
572
573
574declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
575define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
576; FMA-LABEL: test_x86_fnmsub_baa_ss:
577; FMA:       # %bb.0:
578; FMA-NEXT:    vmovaps (%rdx), %xmm0
579; FMA-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
580; FMA-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
581; FMA-NEXT:    retq
582  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
583  ret <4 x float> %res
584}
585
586define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
587; FMA-LABEL: test_x86_fnmsub_aba_ss:
588; FMA:       # %bb.0:
589; FMA-NEXT:    vmovaps (%rcx), %xmm0
590; FMA-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
591; FMA-NEXT:    retq
592  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
593  ret <4 x float> %res
594}
595
596define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
597; FMA-LABEL: test_x86_fnmsub_bba_ss:
598; FMA:       # %bb.0:
599; FMA-NEXT:    vmovaps (%rdx), %xmm0
600; FMA-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
601; FMA-NEXT:    retq
602  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
603  ret <4 x float> %res
604}
605
606declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
607define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
608; FMA-LABEL: test_x86_fnmsub_baa_ps:
609; FMA:       # %bb.0:
610; FMA-NEXT:    vmovaps (%rcx), %xmm0
611; FMA-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
612; FMA-NEXT:    retq
613  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
614  ret <4 x float> %res
615}
616
617define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
618; FMA-LABEL: test_x86_fnmsub_aba_ps:
619; FMA:       # %bb.0:
620; FMA-NEXT:    vmovaps (%rcx), %xmm0
621; FMA-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
622; FMA-NEXT:    retq
623  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
624  ret <4 x float> %res
625}
626
627define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
628; FMA-LABEL: test_x86_fnmsub_bba_ps:
629; FMA:       # %bb.0:
630; FMA-NEXT:    vmovaps (%rdx), %xmm0
631; FMA-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
632; FMA-NEXT:    retq
633  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
634  ret <4 x float> %res
635}
636
637declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
638define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
639; FMA-LABEL: test_x86_fnmsub_baa_ps_y:
640; FMA:       # %bb.0:
641; FMA-NEXT:    vmovaps (%rcx), %ymm0
642; FMA-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
643; FMA-NEXT:    retq
644  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
645  ret <8 x float> %res
646}
647
648define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
649; FMA-LABEL: test_x86_fnmsub_aba_ps_y:
650; FMA:       # %bb.0:
651; FMA-NEXT:    vmovaps (%rcx), %ymm0
652; FMA-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
653; FMA-NEXT:    retq
654  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
655  ret <8 x float> %res
656}
657
658define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
659; FMA-LABEL: test_x86_fnmsub_bba_ps_y:
660; FMA:       # %bb.0:
661; FMA-NEXT:    vmovaps (%rdx), %ymm0
662; FMA-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
663; FMA-NEXT:    retq
664  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
665  ret <8 x float> %res
666}
667
668declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
669define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
670; FMA-LABEL: test_x86_fnmsub_baa_sd:
671; FMA:       # %bb.0:
672; FMA-NEXT:    vmovapd (%rdx), %xmm0
673; FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
674; FMA-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
675; FMA-NEXT:    retq
676  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
677  ret <2 x double> %res
678}
679
680define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
681; FMA-LABEL: test_x86_fnmsub_aba_sd:
682; FMA:       # %bb.0:
683; FMA-NEXT:    vmovapd (%rcx), %xmm0
684; FMA-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
685; FMA-NEXT:    retq
686  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
687  ret <2 x double> %res
688}
689
690define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
691; FMA-LABEL: test_x86_fnmsub_bba_sd:
692; FMA:       # %bb.0:
693; FMA-NEXT:    vmovapd (%rdx), %xmm0
694; FMA-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
695; FMA-NEXT:    retq
696  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
697  ret <2 x double> %res
698}
699
700declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
701define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
702; FMA-LABEL: test_x86_fnmsub_baa_pd:
703; FMA:       # %bb.0:
704; FMA-NEXT:    vmovapd (%rcx), %xmm0
705; FMA-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
706; FMA-NEXT:    retq
707  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
708  ret <2 x double> %res
709}
710
711define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
712; FMA-LABEL: test_x86_fnmsub_aba_pd:
713; FMA:       # %bb.0:
714; FMA-NEXT:    vmovapd (%rcx), %xmm0
715; FMA-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
716; FMA-NEXT:    retq
717  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
718  ret <2 x double> %res
719}
720
721define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
722; FMA-LABEL: test_x86_fnmsub_bba_pd:
723; FMA:       # %bb.0:
724; FMA-NEXT:    vmovapd (%rdx), %xmm0
725; FMA-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
726; FMA-NEXT:    retq
727  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
728  ret <2 x double> %res
729}
730
731declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
732define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
733; FMA-LABEL: test_x86_fnmsub_baa_pd_y:
734; FMA:       # %bb.0:
735; FMA-NEXT:    vmovapd (%rcx), %ymm0
736; FMA-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
737; FMA-NEXT:    retq
738  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
739  ret <4 x double> %res
740}
741
742define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
743; FMA-LABEL: test_x86_fnmsub_aba_pd_y:
744; FMA:       # %bb.0:
745; FMA-NEXT:    vmovapd (%rcx), %ymm0
746; FMA-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
747; FMA-NEXT:    retq
748  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
749  ret <4 x double> %res
750}
751
752define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
753; FMA-LABEL: test_x86_fnmsub_bba_pd_y:
754; FMA:       # %bb.0:
755; FMA-NEXT:    vmovapd (%rdx), %ymm0
756; FMA-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
757; FMA-NEXT:    retq
758  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
759  ret <4 x double> %res
760}
761
762