1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
4
5define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
6; CHECK-LABEL: test_x86_fma_vfmadd_ps:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
9; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
10; CHECK-NEXT:    retq # encoding: [0xc3]
11  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
12  ret <4 x float> %res
13}
14declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
15
16define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
17; CHECK-LABEL: test_x86_fma_vfmadd_pd:
18; CHECK:       # %bb.0:
19; CHECK-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
20; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
21; CHECK-NEXT:    retq # encoding: [0xc3]
22  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
23  ret <2 x double> %res
24}
25declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
26
27define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
28; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
31; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
32; CHECK-NEXT:    retq # encoding: [0xc3]
33  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
34  ret <8 x float> %res
35}
36declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
37
38define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
39; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
42; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
43; CHECK-NEXT:    retq # encoding: [0xc3]
44  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
45  ret <4 x double> %res
46}
47declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
48
49; VFMSUB
50define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
51; CHECK-LABEL: test_x86_fma_vfmsub_ps:
52; CHECK:       # %bb.0:
53; CHECK-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
54; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) - xmm2
55; CHECK-NEXT:    retq # encoding: [0xc3]
56  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
57  ret <4 x float> %res
58}
59declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
60
61define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
62; CHECK-LABEL: test_x86_fma_vfmsub_pd:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
65; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) - xmm2
66; CHECK-NEXT:    retq # encoding: [0xc3]
67  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
68  ret <2 x double> %res
69}
70declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
71
72define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
73; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
74; CHECK:       # %bb.0:
75; CHECK-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
76; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) - ymm2
77; CHECK-NEXT:    retq # encoding: [0xc3]
78  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
79  ret <8 x float> %res
80}
81declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
82
83define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
84; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
87; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) - ymm2
88; CHECK-NEXT:    retq # encoding: [0xc3]
89  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
90  ret <4 x double> %res
91}
92declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
93
94; VFNMADD
95define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
96; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
99; CHECK-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
100; CHECK-NEXT:    retq # encoding: [0xc3]
101  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
102  ret <4 x float> %res
103}
104declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
105
106define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
107; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
110; CHECK-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
111; CHECK-NEXT:    retq # encoding: [0xc3]
112  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
113  ret <2 x double> %res
114}
115declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
116
117define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
118; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
121; CHECK-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
122; CHECK-NEXT:    retq # encoding: [0xc3]
123  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
124  ret <8 x float> %res
125}
126declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
127
128define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
129; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
130; CHECK:       # %bb.0:
131; CHECK-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
132; CHECK-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
133; CHECK-NEXT:    retq # encoding: [0xc3]
134  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
135  ret <4 x double> %res
136}
137declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
138
139; VFNMSUB
140define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
141; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
142; CHECK:       # %bb.0:
143; CHECK-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
144; CHECK-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
145; CHECK-NEXT:    retq # encoding: [0xc3]
146  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
147  ret <4 x float> %res
148}
149declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
150
151define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
152; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
155; CHECK-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
156; CHECK-NEXT:    retq # encoding: [0xc3]
157  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
158  ret <2 x double> %res
159}
160declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
161
162define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
163; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
164; CHECK:       # %bb.0:
165; CHECK-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
166; CHECK-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
167; CHECK-NEXT:    retq # encoding: [0xc3]
168  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
169  ret <8 x float> %res
170}
171declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
172
173define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
174; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
177; CHECK-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
178; CHECK-NEXT:    retq # encoding: [0xc3]
179  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
180  ret <4 x double> %res
181}
182declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
183
184; VFMADDSUB
185define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
186; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
189; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
190; CHECK-NEXT:    retq # encoding: [0xc3]
191  %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
192  ret <4 x float> %res
193}
194declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
195
196define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
197; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
198; CHECK:       # %bb.0:
199; CHECK-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
200; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
201; CHECK-NEXT:    retq # encoding: [0xc3]
202  %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
203  ret <2 x double> %res
204}
205declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
206
207define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
208; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
209; CHECK:       # %bb.0:
210; CHECK-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
211; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
212; CHECK-NEXT:    retq # encoding: [0xc3]
213  %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
214  ret <8 x float> %res
215}
216declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
217
218define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
219; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
222; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
223; CHECK-NEXT:    retq # encoding: [0xc3]
224  %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
225  ret <4 x double> %res
226}
227declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
228
229; VFMSUBADD
230define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
231; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
232; CHECK:       # %bb.0:
233; CHECK-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
234; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) -/+ xmm2
235; CHECK-NEXT:    retq # encoding: [0xc3]
236  %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
237  ret <4 x float> %res
238}
239declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
240
241define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
242; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
245; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) -/+ xmm2
246; CHECK-NEXT:    retq # encoding: [0xc3]
247  %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
248  ret <2 x double> %res
249}
250declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
251
252define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
253; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
254; CHECK:       # %bb.0:
255; CHECK-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
256; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) -/+ ymm2
257; CHECK-NEXT:    retq # encoding: [0xc3]
258  %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
259  ret <8 x float> %res
260}
261declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
262
263define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
264; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
265; CHECK:       # %bb.0:
266; CHECK-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
267; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) -/+ ymm2
268; CHECK-NEXT:    retq # encoding: [0xc3]
269  %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
270  ret <4 x double> %res
271}
272declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
273
274attributes #0 = { nounwind }
275