1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mcpu=core-avx2 -show-mc-encoding | FileCheck %s --check-prefix=AVX2
3; RUN: llc < %s -disable-peephole -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=AVX512
4
5target triple = "x86_64-unknown-unknown"
6
7declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
8declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
9declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
10declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
11
12declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
13declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
14declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
15declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
16
17define void @fmadd_aab_ss(float* %a, float* %b) {
18; AVX2-LABEL: fmadd_aab_ss:
19; AVX2:       # %bb.0:
20; AVX2-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
21; AVX2-NEXT:    # xmm0 = mem[0],zero,zero,zero
22; AVX2-NEXT:    vfmadd213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xa9,0x06]
23; AVX2-NEXT:    # xmm0 = (xmm0 * xmm0) + mem
24; AVX2-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
25; AVX2-NEXT:    retq # encoding: [0xc3]
26;
27; AVX512-LABEL: fmadd_aab_ss:
28; AVX512:       # %bb.0:
29; AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
30; AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
31; AVX512-NEXT:    vfmadd213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0x06]
32; AVX512-NEXT:    # xmm0 = (xmm0 * xmm0) + mem
33; AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
34; AVX512-NEXT:    retq # encoding: [0xc3]
35  %a.val = load float, float* %a
36  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
37  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
38  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
39  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
40
41  %b.val = load float, float* %b
42  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
43  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
44  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
45  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
46
47  %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
48
49  %sr = extractelement <4 x float> %vr, i32 0
50  store float %sr, float* %a
51  ret void
52}
53
54define void @fmadd_aba_ss(float* %a, float* %b) {
55; AVX2-LABEL: fmadd_aba_ss:
56; AVX2:       # %bb.0:
57; AVX2-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
58; AVX2-NEXT:    # xmm0 = mem[0],zero,zero,zero
59; AVX2-NEXT:    vfmadd231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xb9,0x06]
60; AVX2-NEXT:    # xmm0 = (xmm0 * mem) + xmm0
61; AVX2-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
62; AVX2-NEXT:    retq # encoding: [0xc3]
63;
64; AVX512-LABEL: fmadd_aba_ss:
65; AVX512:       # %bb.0:
66; AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
67; AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
68; AVX512-NEXT:    vfmadd231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06]
69; AVX512-NEXT:    # xmm0 = (xmm0 * mem) + xmm0
70; AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
71; AVX512-NEXT:    retq # encoding: [0xc3]
72  %a.val = load float, float* %a
73  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
74  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
75  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
76  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
77
78  %b.val = load float, float* %b
79  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
80  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
81  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
82  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
83
84  %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
85
86  %sr = extractelement <4 x float> %vr, i32 0
87  store float %sr, float* %a
88  ret void
89}
90
91define void @fmsub_aab_ss(float* %a, float* %b) {
92; AVX2-LABEL: fmsub_aab_ss:
93; AVX2:       # %bb.0:
94; AVX2-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
95; AVX2-NEXT:    # xmm0 = mem[0],zero,zero,zero
96; AVX2-NEXT:    vfmsub213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xab,0x06]
97; AVX2-NEXT:    # xmm0 = (xmm0 * xmm0) - mem
98; AVX2-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
99; AVX2-NEXT:    retq # encoding: [0xc3]
100;
101; AVX512-LABEL: fmsub_aab_ss:
102; AVX512:       # %bb.0:
103; AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
104; AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
105; AVX512-NEXT:    vfmsub213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0x06]
106; AVX512-NEXT:    # xmm0 = (xmm0 * xmm0) - mem
107; AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
108; AVX512-NEXT:    retq # encoding: [0xc3]
109  %a.val = load float, float* %a
110  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
111  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
112  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
113  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
114
115  %b.val = load float, float* %b
116  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
117  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
118  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
119  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
120
121  %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
122
123  %sr = extractelement <4 x float> %vr, i32 0
124  store float %sr, float* %a
125  ret void
126}
127
128define void @fmsub_aba_ss(float* %a, float* %b) {
129; AVX2-LABEL: fmsub_aba_ss:
130; AVX2:       # %bb.0:
131; AVX2-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
132; AVX2-NEXT:    # xmm0 = mem[0],zero,zero,zero
133; AVX2-NEXT:    vfmsub231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbb,0x06]
134; AVX2-NEXT:    # xmm0 = (xmm0 * mem) - xmm0
135; AVX2-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
136; AVX2-NEXT:    retq # encoding: [0xc3]
137;
138; AVX512-LABEL: fmsub_aba_ss:
139; AVX512:       # %bb.0:
140; AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
141; AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
142; AVX512-NEXT:    vfmsub231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0x06]
143; AVX512-NEXT:    # xmm0 = (xmm0 * mem) - xmm0
144; AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
145; AVX512-NEXT:    retq # encoding: [0xc3]
146  %a.val = load float, float* %a
147  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
148  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
149  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
150  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
151
152  %b.val = load float, float* %b
153  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
154  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
155  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
156  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
157
158  %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
159
160  %sr = extractelement <4 x float> %vr, i32 0
161  store float %sr, float* %a
162  ret void
163}
164
165define void @fnmadd_aab_ss(float* %a, float* %b) {
166; AVX2-LABEL: fnmadd_aab_ss:
167; AVX2:       # %bb.0:
168; AVX2-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
169; AVX2-NEXT:    # xmm0 = mem[0],zero,zero,zero
170; AVX2-NEXT:    vfnmadd213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xad,0x06]
171; AVX2-NEXT:    # xmm0 = -(xmm0 * xmm0) + mem
172; AVX2-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
173; AVX2-NEXT:    retq # encoding: [0xc3]
174;
175; AVX512-LABEL: fnmadd_aab_ss:
176; AVX512:       # %bb.0:
177; AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
178; AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
179; AVX512-NEXT:    vfnmadd213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0x06]
180; AVX512-NEXT:    # xmm0 = -(xmm0 * xmm0) + mem
181; AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
182; AVX512-NEXT:    retq # encoding: [0xc3]
183  %a.val = load float, float* %a
184  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
185  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
186  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
187  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
188
189  %b.val = load float, float* %b
190  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
191  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
192  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
193  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
194
195  %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
196
197  %sr = extractelement <4 x float> %vr, i32 0
198  store float %sr, float* %a
199  ret void
200}
201
202define void @fnmadd_aba_ss(float* %a, float* %b) {
203; AVX2-LABEL: fnmadd_aba_ss:
204; AVX2:       # %bb.0:
205; AVX2-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
206; AVX2-NEXT:    # xmm0 = mem[0],zero,zero,zero
207; AVX2-NEXT:    vfnmadd231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbd,0x06]
208; AVX2-NEXT:    # xmm0 = -(xmm0 * mem) + xmm0
209; AVX2-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
210; AVX2-NEXT:    retq # encoding: [0xc3]
211;
212; AVX512-LABEL: fnmadd_aba_ss:
213; AVX512:       # %bb.0:
214; AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
215; AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
216; AVX512-NEXT:    vfnmadd231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbd,0x06]
217; AVX512-NEXT:    # xmm0 = -(xmm0 * mem) + xmm0
218; AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
219; AVX512-NEXT:    retq # encoding: [0xc3]
220  %a.val = load float, float* %a
221  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
222  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
223  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
224  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
225
226  %b.val = load float, float* %b
227  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
228  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
229  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
230  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
231
232  %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
233
234  %sr = extractelement <4 x float> %vr, i32 0
235  store float %sr, float* %a
236  ret void
237}
238
239define void @fnmsub_aab_ss(float* %a, float* %b) {
240; AVX2-LABEL: fnmsub_aab_ss:
241; AVX2:       # %bb.0:
242; AVX2-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
243; AVX2-NEXT:    # xmm0 = mem[0],zero,zero,zero
244; AVX2-NEXT:    vfnmsub213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xaf,0x06]
245; AVX2-NEXT:    # xmm0 = -(xmm0 * xmm0) - mem
246; AVX2-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
247; AVX2-NEXT:    retq # encoding: [0xc3]
248;
249; AVX512-LABEL: fnmsub_aab_ss:
250; AVX512:       # %bb.0:
251; AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
252; AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
253; AVX512-NEXT:    vfnmsub213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0x06]
254; AVX512-NEXT:    # xmm0 = -(xmm0 * xmm0) - mem
255; AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
256; AVX512-NEXT:    retq # encoding: [0xc3]
257  %a.val = load float, float* %a
258  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
259  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
260  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
261  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
262
263  %b.val = load float, float* %b
264  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
265  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
266  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
267  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
268
269  %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
270
271  %sr = extractelement <4 x float> %vr, i32 0
272  store float %sr, float* %a
273  ret void
274}
275
276define void @fnmsub_aba_ss(float* %a, float* %b) {
277; AVX2-LABEL: fnmsub_aba_ss:
278; AVX2:       # %bb.0:
279; AVX2-NEXT:    vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07]
280; AVX2-NEXT:    # xmm0 = mem[0],zero,zero,zero
281; AVX2-NEXT:    vfnmsub231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbf,0x06]
282; AVX2-NEXT:    # xmm0 = -(xmm0 * mem) - xmm0
283; AVX2-NEXT:    vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07]
284; AVX2-NEXT:    retq # encoding: [0xc3]
285;
286; AVX512-LABEL: fnmsub_aba_ss:
287; AVX512:       # %bb.0:
288; AVX512-NEXT:    vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
289; AVX512-NEXT:    # xmm0 = mem[0],zero,zero,zero
290; AVX512-NEXT:    vfnmsub231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0x06]
291; AVX512-NEXT:    # xmm0 = -(xmm0 * mem) - xmm0
292; AVX512-NEXT:    vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
293; AVX512-NEXT:    retq # encoding: [0xc3]
294  %a.val = load float, float* %a
295  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
296  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
297  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
298  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
299
300  %b.val = load float, float* %b
301  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
302  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
303  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
304  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
305
306  %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
307
308  %sr = extractelement <4 x float> %vr, i32 0
309  store float %sr, float* %a
310  ret void
311}
312
313define void @fmadd_aab_sd(double* %a, double* %b) {
314; AVX2-LABEL: fmadd_aab_sd:
315; AVX2:       # %bb.0:
316; AVX2-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
317; AVX2-NEXT:    # xmm0 = mem[0],zero
318; AVX2-NEXT:    vfmadd213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xa9,0x06]
319; AVX2-NEXT:    # xmm0 = (xmm0 * xmm0) + mem
320; AVX2-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
321; AVX2-NEXT:    retq # encoding: [0xc3]
322;
323; AVX512-LABEL: fmadd_aab_sd:
324; AVX512:       # %bb.0:
325; AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
326; AVX512-NEXT:    # xmm0 = mem[0],zero
327; AVX512-NEXT:    vfmadd213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0x06]
328; AVX512-NEXT:    # xmm0 = (xmm0 * xmm0) + mem
329; AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
330; AVX512-NEXT:    retq # encoding: [0xc3]
331  %a.val = load double, double* %a
332  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
333  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
334
335  %b.val = load double, double* %b
336  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
337  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
338
339  %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
340
341  %sr = extractelement <2 x double> %vr, i32 0
342  store double %sr, double* %a
343  ret void
344}
345
346define void @fmadd_aba_sd(double* %a, double* %b) {
347; AVX2-LABEL: fmadd_aba_sd:
348; AVX2:       # %bb.0:
349; AVX2-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
350; AVX2-NEXT:    # xmm0 = mem[0],zero
351; AVX2-NEXT:    vfmadd231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
352; AVX2-NEXT:    # xmm0 = (xmm0 * mem) + xmm0
353; AVX2-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
354; AVX2-NEXT:    retq # encoding: [0xc3]
355;
356; AVX512-LABEL: fmadd_aba_sd:
357; AVX512:       # %bb.0:
358; AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
359; AVX512-NEXT:    # xmm0 = mem[0],zero
360; AVX512-NEXT:    vfmadd231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
361; AVX512-NEXT:    # xmm0 = (xmm0 * mem) + xmm0
362; AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
363; AVX512-NEXT:    retq # encoding: [0xc3]
364  %a.val = load double, double* %a
365  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
366  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
367
368  %b.val = load double, double* %b
369  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
370  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
371
372  %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
373
374  %sr = extractelement <2 x double> %vr, i32 0
375  store double %sr, double* %a
376  ret void
377}
378
379define void @fmsub_aab_sd(double* %a, double* %b) {
380; AVX2-LABEL: fmsub_aab_sd:
381; AVX2:       # %bb.0:
382; AVX2-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
383; AVX2-NEXT:    # xmm0 = mem[0],zero
384; AVX2-NEXT:    vfmsub213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xab,0x06]
385; AVX2-NEXT:    # xmm0 = (xmm0 * xmm0) - mem
386; AVX2-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
387; AVX2-NEXT:    retq # encoding: [0xc3]
388;
389; AVX512-LABEL: fmsub_aab_sd:
390; AVX512:       # %bb.0:
391; AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
392; AVX512-NEXT:    # xmm0 = mem[0],zero
393; AVX512-NEXT:    vfmsub213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0x06]
394; AVX512-NEXT:    # xmm0 = (xmm0 * xmm0) - mem
395; AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
396; AVX512-NEXT:    retq # encoding: [0xc3]
397  %a.val = load double, double* %a
398  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
399  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
400
401  %b.val = load double, double* %b
402  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
403  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
404
405  %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
406
407  %sr = extractelement <2 x double> %vr, i32 0
408  store double %sr, double* %a
409  ret void
410}
411
412define void @fmsub_aba_sd(double* %a, double* %b) {
413; AVX2-LABEL: fmsub_aba_sd:
414; AVX2:       # %bb.0:
415; AVX2-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
416; AVX2-NEXT:    # xmm0 = mem[0],zero
417; AVX2-NEXT:    vfmsub231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbb,0x06]
418; AVX2-NEXT:    # xmm0 = (xmm0 * mem) - xmm0
419; AVX2-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
420; AVX2-NEXT:    retq # encoding: [0xc3]
421;
422; AVX512-LABEL: fmsub_aba_sd:
423; AVX512:       # %bb.0:
424; AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
425; AVX512-NEXT:    # xmm0 = mem[0],zero
426; AVX512-NEXT:    vfmsub231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0x06]
427; AVX512-NEXT:    # xmm0 = (xmm0 * mem) - xmm0
428; AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
429; AVX512-NEXT:    retq # encoding: [0xc3]
430  %a.val = load double, double* %a
431  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
432  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
433
434  %b.val = load double, double* %b
435  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
436  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
437
438  %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
439
440  %sr = extractelement <2 x double> %vr, i32 0
441  store double %sr, double* %a
442  ret void
443}
444
445define void @fnmadd_aab_sd(double* %a, double* %b) {
446; AVX2-LABEL: fnmadd_aab_sd:
447; AVX2:       # %bb.0:
448; AVX2-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
449; AVX2-NEXT:    # xmm0 = mem[0],zero
450; AVX2-NEXT:    vfnmadd213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xad,0x06]
451; AVX2-NEXT:    # xmm0 = -(xmm0 * xmm0) + mem
452; AVX2-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
453; AVX2-NEXT:    retq # encoding: [0xc3]
454;
455; AVX512-LABEL: fnmadd_aab_sd:
456; AVX512:       # %bb.0:
457; AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
458; AVX512-NEXT:    # xmm0 = mem[0],zero
459; AVX512-NEXT:    vfnmadd213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0x06]
460; AVX512-NEXT:    # xmm0 = -(xmm0 * xmm0) + mem
461; AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
462; AVX512-NEXT:    retq # encoding: [0xc3]
463  %a.val = load double, double* %a
464  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
465  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
466
467  %b.val = load double, double* %b
468  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
469  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
470
471  %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
472
473  %sr = extractelement <2 x double> %vr, i32 0
474  store double %sr, double* %a
475  ret void
476}
477
478define void @fnmadd_aba_sd(double* %a, double* %b) {
479; AVX2-LABEL: fnmadd_aba_sd:
480; AVX2:       # %bb.0:
481; AVX2-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
482; AVX2-NEXT:    # xmm0 = mem[0],zero
483; AVX2-NEXT:    vfnmadd231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbd,0x06]
484; AVX2-NEXT:    # xmm0 = -(xmm0 * mem) + xmm0
485; AVX2-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
486; AVX2-NEXT:    retq # encoding: [0xc3]
487;
488; AVX512-LABEL: fnmadd_aba_sd:
489; AVX512:       # %bb.0:
490; AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
491; AVX512-NEXT:    # xmm0 = mem[0],zero
492; AVX512-NEXT:    vfnmadd231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbd,0x06]
493; AVX512-NEXT:    # xmm0 = -(xmm0 * mem) + xmm0
494; AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
495; AVX512-NEXT:    retq # encoding: [0xc3]
496  %a.val = load double, double* %a
497  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
498  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
499
500  %b.val = load double, double* %b
501  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
502  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
503
504  %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
505
506  %sr = extractelement <2 x double> %vr, i32 0
507  store double %sr, double* %a
508  ret void
509}
510
511define void @fnmsub_aab_sd(double* %a, double* %b) {
512; AVX2-LABEL: fnmsub_aab_sd:
513; AVX2:       # %bb.0:
514; AVX2-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
515; AVX2-NEXT:    # xmm0 = mem[0],zero
516; AVX2-NEXT:    vfnmsub213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xaf,0x06]
517; AVX2-NEXT:    # xmm0 = -(xmm0 * xmm0) - mem
518; AVX2-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
519; AVX2-NEXT:    retq # encoding: [0xc3]
520;
521; AVX512-LABEL: fnmsub_aab_sd:
522; AVX512:       # %bb.0:
523; AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
524; AVX512-NEXT:    # xmm0 = mem[0],zero
525; AVX512-NEXT:    vfnmsub213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0x06]
526; AVX512-NEXT:    # xmm0 = -(xmm0 * xmm0) - mem
527; AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
528; AVX512-NEXT:    retq # encoding: [0xc3]
529  %a.val = load double, double* %a
530  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
531  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
532
533  %b.val = load double, double* %b
534  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
535  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
536
537  %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
538
539  %sr = extractelement <2 x double> %vr, i32 0
540  store double %sr, double* %a
541  ret void
542}
543
544define void @fnmsub_aba_sd(double* %a, double* %b) {
545; AVX2-LABEL: fnmsub_aba_sd:
546; AVX2:       # %bb.0:
547; AVX2-NEXT:    vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07]
548; AVX2-NEXT:    # xmm0 = mem[0],zero
549; AVX2-NEXT:    vfnmsub231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbf,0x06]
550; AVX2-NEXT:    # xmm0 = -(xmm0 * mem) - xmm0
551; AVX2-NEXT:    vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07]
552; AVX2-NEXT:    retq # encoding: [0xc3]
553;
554; AVX512-LABEL: fnmsub_aba_sd:
555; AVX512:       # %bb.0:
556; AVX512-NEXT:    vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
557; AVX512-NEXT:    # xmm0 = mem[0],zero
558; AVX512-NEXT:    vfnmsub231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0x06]
559; AVX512-NEXT:    # xmm0 = -(xmm0 * mem) - xmm0
560; AVX512-NEXT:    vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
561; AVX512-NEXT:    retq # encoding: [0xc3]
562  %a.val = load double, double* %a
563  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
564  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
565
566  %b.val = load double, double* %b
567  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
568  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
569
570  %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
571
572  %sr = extractelement <2 x double> %vr, i32 0
573  store double %sr, double* %a
574  ret void
575}
576
577
578