1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefix=SSE3
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
4
5define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
6; SSE3-LABEL: haddpd1:
7; SSE3:       # BB#0:
8; SSE3-NEXT:    haddpd %xmm1, %xmm0
9; SSE3-NEXT:    retq
10;
11; AVX-LABEL: haddpd1:
12; AVX:       # BB#0:
13; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
14; AVX-NEXT:    retq
15  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
16  %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
17  %r = fadd <2 x double> %a, %b
18  ret <2 x double> %r
19}
20
21define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
22; SSE3-LABEL: haddpd2:
23; SSE3:       # BB#0:
24; SSE3-NEXT:    haddpd %xmm1, %xmm0
25; SSE3-NEXT:    retq
26;
27; AVX-LABEL: haddpd2:
28; AVX:       # BB#0:
29; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
30; AVX-NEXT:    retq
31  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2>
32  %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1>
33  %r = fadd <2 x double> %a, %b
34  ret <2 x double> %r
35}
36
37define <2 x double> @haddpd3(<2 x double> %x) {
38; SSE3-LABEL: haddpd3:
39; SSE3:       # BB#0:
40; SSE3-NEXT:    haddpd %xmm0, %xmm0
41; SSE3-NEXT:    retq
42;
43; AVX-LABEL: haddpd3:
44; AVX:       # BB#0:
45; AVX-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
46; AVX-NEXT:    retq
47  %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
48  %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
49  %r = fadd <2 x double> %a, %b
50  ret <2 x double> %r
51}
52
53define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
54; SSE3-LABEL: haddps1:
55; SSE3:       # BB#0:
56; SSE3-NEXT:    haddps %xmm1, %xmm0
57; SSE3-NEXT:    retq
58;
59; AVX-LABEL: haddps1:
60; AVX:       # BB#0:
61; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
62; AVX-NEXT:    retq
63  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
64  %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
65  %r = fadd <4 x float> %a, %b
66  ret <4 x float> %r
67}
68
69define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
70; SSE3-LABEL: haddps2:
71; SSE3:       # BB#0:
72; SSE3-NEXT:    haddps %xmm1, %xmm0
73; SSE3-NEXT:    retq
74;
75; AVX-LABEL: haddps2:
76; AVX:       # BB#0:
77; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
78; AVX-NEXT:    retq
79  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
80  %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
81  %r = fadd <4 x float> %a, %b
82  ret <4 x float> %r
83}
84
85define <4 x float> @haddps3(<4 x float> %x) {
86; SSE3-LABEL: haddps3:
87; SSE3:       # BB#0:
88; SSE3-NEXT:    haddps %xmm0, %xmm0
89; SSE3-NEXT:    retq
90;
91; AVX-LABEL: haddps3:
92; AVX:       # BB#0:
93; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
94; AVX-NEXT:    retq
95  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
96  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
97  %r = fadd <4 x float> %a, %b
98  ret <4 x float> %r
99}
100
101define <4 x float> @haddps4(<4 x float> %x) {
102; SSE3-LABEL: haddps4:
103; SSE3:       # BB#0:
104; SSE3-NEXT:    haddps %xmm0, %xmm0
105; SSE3-NEXT:    retq
106;
107; AVX-LABEL: haddps4:
108; AVX:       # BB#0:
109; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
110; AVX-NEXT:    retq
111  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
112  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
113  %r = fadd <4 x float> %a, %b
114  ret <4 x float> %r
115}
116
117define <4 x float> @haddps5(<4 x float> %x) {
118; SSE3-LABEL: haddps5:
119; SSE3:       # BB#0:
120; SSE3-NEXT:    haddps %xmm0, %xmm0
121; SSE3-NEXT:    retq
122;
123; AVX-LABEL: haddps5:
124; AVX:       # BB#0:
125; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
126; AVX-NEXT:    retq
127  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
128  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
129  %r = fadd <4 x float> %a, %b
130  ret <4 x float> %r
131}
132
133define <4 x float> @haddps6(<4 x float> %x) {
134; SSE3-LABEL: haddps6:
135; SSE3:       # BB#0:
136; SSE3-NEXT:    haddps %xmm0, %xmm0
137; SSE3-NEXT:    retq
138;
139; AVX-LABEL: haddps6:
140; AVX:       # BB#0:
141; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
142; AVX-NEXT:    retq
143  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
144  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
145  %r = fadd <4 x float> %a, %b
146  ret <4 x float> %r
147}
148
149define <4 x float> @haddps7(<4 x float> %x) {
150; SSE3-LABEL: haddps7:
151; SSE3:       # BB#0:
152; SSE3-NEXT:    haddps %xmm0, %xmm0
153; SSE3-NEXT:    retq
154;
155; AVX-LABEL: haddps7:
156; AVX:       # BB#0:
157; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
158; AVX-NEXT:    retq
159  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
160  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
161  %r = fadd <4 x float> %a, %b
162  ret <4 x float> %r
163}
164
165define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
166; SSE3-LABEL: hsubpd1:
167; SSE3:       # BB#0:
168; SSE3-NEXT:    hsubpd %xmm1, %xmm0
169; SSE3-NEXT:    retq
170;
171; AVX-LABEL: hsubpd1:
172; AVX:       # BB#0:
173; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
174; AVX-NEXT:    retq
175  %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
176  %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
177  %r = fsub <2 x double> %a, %b
178  ret <2 x double> %r
179}
180
181define <2 x double> @hsubpd2(<2 x double> %x) {
182; SSE3-LABEL: hsubpd2:
183; SSE3:       # BB#0:
184; SSE3-NEXT:    hsubpd %xmm0, %xmm0
185; SSE3-NEXT:    retq
186;
187; AVX-LABEL: hsubpd2:
188; AVX:       # BB#0:
189; AVX-NEXT:    vhsubpd %xmm0, %xmm0, %xmm0
190; AVX-NEXT:    retq
191  %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
192  %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
193  %r = fsub <2 x double> %a, %b
194  ret <2 x double> %r
195}
196
197define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
198; SSE3-LABEL: hsubps1:
199; SSE3:       # BB#0:
200; SSE3-NEXT:    hsubps %xmm1, %xmm0
201; SSE3-NEXT:    retq
202;
203; AVX-LABEL: hsubps1:
204; AVX:       # BB#0:
205; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
206; AVX-NEXT:    retq
207  %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
208  %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
209  %r = fsub <4 x float> %a, %b
210  ret <4 x float> %r
211}
212
213define <4 x float> @hsubps2(<4 x float> %x) {
214; SSE3-LABEL: hsubps2:
215; SSE3:       # BB#0:
216; SSE3-NEXT:    hsubps %xmm0, %xmm0
217; SSE3-NEXT:    retq
218;
219; AVX-LABEL: hsubps2:
220; AVX:       # BB#0:
221; AVX-NEXT:    vhsubps %xmm0, %xmm0, %xmm0
222; AVX-NEXT:    retq
223  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
224  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
225  %r = fsub <4 x float> %a, %b
226  ret <4 x float> %r
227}
228
229define <4 x float> @hsubps3(<4 x float> %x) {
230; SSE3-LABEL: hsubps3:
231; SSE3:       # BB#0:
232; SSE3-NEXT:    hsubps %xmm0, %xmm0
233; SSE3-NEXT:    retq
234;
235; AVX-LABEL: hsubps3:
236; AVX:       # BB#0:
237; AVX-NEXT:    vhsubps %xmm0, %xmm0, %xmm0
238; AVX-NEXT:    retq
239  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
240  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
241  %r = fsub <4 x float> %a, %b
242  ret <4 x float> %r
243}
244
245define <4 x float> @hsubps4(<4 x float> %x) {
246; SSE3-LABEL: hsubps4:
247; SSE3:       # BB#0:
248; SSE3-NEXT:    hsubps %xmm0, %xmm0
249; SSE3-NEXT:    retq
250;
251; AVX-LABEL: hsubps4:
252; AVX:       # BB#0:
253; AVX-NEXT:    vhsubps %xmm0, %xmm0, %xmm0
254; AVX-NEXT:    retq
255  %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
256  %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
257  %r = fsub <4 x float> %a, %b
258  ret <4 x float> %r
259}
260
261define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
262; SSE3-LABEL: vhaddps1:
263; SSE3:       # BB#0:
264; SSE3-NEXT:    haddps %xmm2, %xmm0
265; SSE3-NEXT:    haddps %xmm3, %xmm1
266; SSE3-NEXT:    retq
267;
268; AVX-LABEL: vhaddps1:
269; AVX:       # BB#0:
270; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
271; AVX-NEXT:    retq
272  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
273  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
274  %r = fadd <8 x float> %a, %b
275  ret <8 x float> %r
276}
277
278define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
279; SSE3-LABEL: vhaddps2:
280; SSE3:       # BB#0:
281; SSE3-NEXT:    haddps %xmm2, %xmm0
282; SSE3-NEXT:    haddps %xmm3, %xmm1
283; SSE3-NEXT:    retq
284;
285; AVX-LABEL: vhaddps2:
286; AVX:       # BB#0:
287; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
288; AVX-NEXT:    retq
289  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
290  %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
291  %r = fadd <8 x float> %a, %b
292  ret <8 x float> %r
293}
294
295define <8 x float> @vhaddps3(<8 x float> %x) {
296; SSE3-LABEL: vhaddps3:
297; SSE3:       # BB#0:
298; SSE3-NEXT:    haddps %xmm0, %xmm0
299; SSE3-NEXT:    haddps %xmm1, %xmm1
300; SSE3-NEXT:    retq
301;
302; AVX-LABEL: vhaddps3:
303; AVX:       # BB#0:
304; AVX-NEXT:    vhaddps %ymm0, %ymm0, %ymm0
305; AVX-NEXT:    retq
306  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
307  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
308  %r = fadd <8 x float> %a, %b
309  ret <8 x float> %r
310}
311
312define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
313; SSE3-LABEL: vhsubps1:
314; SSE3:       # BB#0:
315; SSE3-NEXT:    hsubps %xmm2, %xmm0
316; SSE3-NEXT:    hsubps %xmm3, %xmm1
317; SSE3-NEXT:    retq
318;
319; AVX-LABEL: vhsubps1:
320; AVX:       # BB#0:
321; AVX-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
322; AVX-NEXT:    retq
323  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
324  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
325  %r = fsub <8 x float> %a, %b
326  ret <8 x float> %r
327}
328
329define <8 x float> @vhsubps3(<8 x float> %x) {
330; SSE3-LABEL: vhsubps3:
331; SSE3:       # BB#0:
332; SSE3-NEXT:    hsubps %xmm0, %xmm0
333; SSE3-NEXT:    hsubps %xmm1, %xmm1
334; SSE3-NEXT:    retq
335;
336; AVX-LABEL: vhsubps3:
337; AVX:       # BB#0:
338; AVX-NEXT:    vhsubps %ymm0, %ymm0, %ymm0
339; AVX-NEXT:    retq
340  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
341  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
342  %r = fsub <8 x float> %a, %b
343  ret <8 x float> %r
344}
345
346define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
347; SSE3-LABEL: vhaddpd1:
348; SSE3:       # BB#0:
349; SSE3-NEXT:    haddpd %xmm2, %xmm0
350; SSE3-NEXT:    haddpd %xmm3, %xmm1
351; SSE3-NEXT:    retq
352;
353; AVX-LABEL: vhaddpd1:
354; AVX:       # BB#0:
355; AVX-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
356; AVX-NEXT:    retq
357  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
358  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
359  %r = fadd <4 x double> %a, %b
360  ret <4 x double> %r
361}
362
363define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
364; SSE3-LABEL: vhsubpd1:
365; SSE3:       # BB#0:
366; SSE3-NEXT:    hsubpd %xmm2, %xmm0
367; SSE3-NEXT:    hsubpd %xmm3, %xmm1
368; SSE3-NEXT:    retq
369;
370; AVX-LABEL: vhsubpd1:
371; AVX:       # BB#0:
372; AVX-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
373; AVX-NEXT:    retq
374  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
375  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
376  %r = fsub <4 x double> %a, %b
377  ret <4 x double> %r
378}
379
380define <2 x float> @haddps_v2f32(<4 x float> %v0) {
381; SSE3-LABEL: haddps_v2f32:
382; SSE3:       # BB#0:
383; SSE3-NEXT:    haddps %xmm0, %xmm0
384; SSE3-NEXT:    retq
385;
386; AVX-LABEL: haddps_v2f32:
387; AVX:       # BB#0:
388; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
389; AVX-NEXT:    retq
390  %v0.0 = extractelement <4 x float> %v0, i32 0
391  %v0.1 = extractelement <4 x float> %v0, i32 1
392  %v0.2 = extractelement <4 x float> %v0, i32 2
393  %v0.3 = extractelement <4 x float> %v0, i32 3
394  %op0 = fadd float %v0.0, %v0.1
395  %op1 = fadd float %v0.2, %v0.3
396  %res0 = insertelement <2 x float> undef, float %op0, i32 0
397  %res1 = insertelement <2 x float> %res0, float %op1, i32 1
398  ret <2 x float> %res1
399}
400