1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
5
6; Test ADDSUB ISel patterns.
7
8; Functions below are obtained from the following source:
9;
10; typedef double double2 __attribute__((ext_vector_type(2)));
11; typedef double double4 __attribute__((ext_vector_type(4)));
12; typedef float float4 __attribute__((ext_vector_type(4)));
13; typedef float float8 __attribute__((ext_vector_type(8)));
14;
15; float4 test1(float4 A, float4 B) {
16;   float4 X = A - B;
17;   float4 Y = A + B;
18;   return (float4){X[0], Y[1], X[2], Y[3]};
19; }
20;
21; float8 test2(float8 A, float8 B) {
22;   float8 X = A - B;
23;   float8 Y = A + B;
24;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
25; }
26;
27; double4 test3(double4 A, double4 B) {
28;   double4 X = A - B;
29;   double4 Y = A + B;
30;   return (double4){X[0], Y[1], X[2], Y[3]};
31; }
32;
33; double2 test4(double2 A, double2 B) {
34;   double2 X = A - B;
35;   double2 Y = A + B;
36;   return (double2){X[0], Y[1]};
37; }
38
39define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
40; SSE-LABEL: test1:
41; SSE:       # BB#0:
42; SSE-NEXT:    addsubps %xmm1, %xmm0
43; SSE-NEXT:    retq
44;
45; AVX-LABEL: test1:
46; AVX:       # BB#0:
47; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
48; AVX-NEXT:    retq
49  %sub = fsub <4 x float> %A, %B
50  %add = fadd <4 x float> %A, %B
51  %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
52  ret <4 x float> %vecinit6
53}
54
55define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
56; SSE-LABEL: test2:
57; SSE:       # BB#0:
58; SSE-NEXT:    addsubps %xmm2, %xmm0
59; SSE-NEXT:    addsubps %xmm3, %xmm1
60; SSE-NEXT:    retq
61;
62; AVX-LABEL: test2:
63; AVX:       # BB#0:
64; AVX-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
65; AVX-NEXT:    retq
66  %sub = fsub <8 x float> %A, %B
67  %add = fadd <8 x float> %A, %B
68  %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
69  ret <8 x float> %vecinit14
70}
71
72define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
73; SSE-LABEL: test3:
74; SSE:       # BB#0:
75; SSE-NEXT:    addsubpd %xmm2, %xmm0
76; SSE-NEXT:    addsubpd %xmm3, %xmm1
77; SSE-NEXT:    retq
78;
79; AVX-LABEL: test3:
80; AVX:       # BB#0:
81; AVX-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
82; AVX-NEXT:    retq
83  %sub = fsub <4 x double> %A, %B
84  %add = fadd <4 x double> %A, %B
85  %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
86  ret <4 x double> %vecinit6
87}
88
89define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
90; SSE-LABEL: test4:
91; SSE:       # BB#0:
92; SSE-NEXT:    addsubpd %xmm1, %xmm0
93; SSE-NEXT:    retq
94;
95; AVX-LABEL: test4:
96; AVX:       # BB#0:
97; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
98; AVX-NEXT:    retq
99  %add = fadd <2 x double> %A, %B
100  %sub = fsub <2 x double> %A, %B
101  %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
102  ret <2 x double> %vecinit2
103}
104
105define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
106; SSE-LABEL: test5:
107; SSE:       # BB#0:
108; SSE-NEXT:    addsubps %xmm4, %xmm0
109; SSE-NEXT:    addsubps %xmm5, %xmm1
110; SSE-NEXT:    addsubps %xmm6, %xmm2
111; SSE-NEXT:    addsubps %xmm7, %xmm3
112; SSE-NEXT:    retq
113;
114; AVX1-LABEL: test5:
115; AVX1:       # BB#0:
116; AVX1-NEXT:    vaddsubps %ymm2, %ymm0, %ymm0
117; AVX1-NEXT:    vaddsubps %ymm3, %ymm1, %ymm1
118; AVX1-NEXT:    retq
119;
120; AVX512-LABEL: test5:
121; AVX512:       # BB#0:
122; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm2
123; AVX512-NEXT:    vsubps %zmm1, %zmm0, %zmm0
124; AVX512-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[0,2],zmm2[1,3],zmm0[4,6],zmm2[5,7],zmm0[8,10],zmm2[9,11],zmm0[12,14],zmm2[13,15]
125; AVX512-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15]
126; AVX512-NEXT:    retq
127  %add = fadd <16 x float> %A, %B
128  %sub = fsub <16 x float> %A, %B
129  %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
130  ret <16 x float> %vecinit2
131}
132
133define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
134; SSE-LABEL: test6:
135; SSE:       # BB#0:
136; SSE-NEXT:    addsubpd %xmm4, %xmm0
137; SSE-NEXT:    addsubpd %xmm5, %xmm1
138; SSE-NEXT:    addsubpd %xmm6, %xmm2
139; SSE-NEXT:    addsubpd %xmm7, %xmm3
140; SSE-NEXT:    retq
141;
142; AVX1-LABEL: test6:
143; AVX1:       # BB#0:
144; AVX1-NEXT:    vaddsubpd %ymm2, %ymm0, %ymm0
145; AVX1-NEXT:    vaddsubpd %ymm3, %ymm1, %ymm1
146; AVX1-NEXT:    retq
147;
148; AVX512-LABEL: test6:
149; AVX512:       # BB#0:
150; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm2
151; AVX512-NEXT:    vsubpd %zmm1, %zmm0, %zmm0
152; AVX512-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7]
153; AVX512-NEXT:    retq
154  %add = fadd <8 x double> %A, %B
155  %sub = fsub <8 x double> %A, %B
156  %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
157  ret <8 x double> %vecinit2
158}
159
160define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
161; SSE-LABEL: test1b:
162; SSE:       # BB#0:
163; SSE-NEXT:    addsubps (%rdi), %xmm0
164; SSE-NEXT:    retq
165;
166; AVX-LABEL: test1b:
167; AVX:       # BB#0:
168; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
169; AVX-NEXT:    retq
170  %1 = load <4 x float>, <4 x float>* %B
171  %add = fadd <4 x float> %A, %1
172  %sub = fsub <4 x float> %A, %1
173  %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
174  ret <4 x float> %vecinit6
175}
176
177define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
178; SSE-LABEL: test2b:
179; SSE:       # BB#0:
180; SSE-NEXT:    addsubps (%rdi), %xmm0
181; SSE-NEXT:    addsubps 16(%rdi), %xmm1
182; SSE-NEXT:    retq
183;
184; AVX-LABEL: test2b:
185; AVX:       # BB#0:
186; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
187; AVX-NEXT:    retq
188  %1 = load <8 x float>, <8 x float>* %B
189  %add = fadd <8 x float> %A, %1
190  %sub = fsub <8 x float> %A, %1
191  %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
192  ret <8 x float> %vecinit14
193}
194
195define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
196; SSE-LABEL: test3b:
197; SSE:       # BB#0:
198; SSE-NEXT:    addsubpd (%rdi), %xmm0
199; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
200; SSE-NEXT:    retq
201;
202; AVX-LABEL: test3b:
203; AVX:       # BB#0:
204; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
205; AVX-NEXT:    retq
206  %1 = load <4 x double>, <4 x double>* %B
207  %add = fadd <4 x double> %A, %1
208  %sub = fsub <4 x double> %A, %1
209  %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
210  ret <4 x double> %vecinit6
211}
212
213define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
214; SSE-LABEL: test4b:
215; SSE:       # BB#0:
216; SSE-NEXT:    addsubpd (%rdi), %xmm0
217; SSE-NEXT:    retq
218;
219; AVX-LABEL: test4b:
220; AVX:       # BB#0:
221; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
222; AVX-NEXT:    retq
223  %1 = load <2 x double>, <2 x double>* %B
224  %sub = fsub <2 x double> %A, %1
225  %add = fadd <2 x double> %A, %1
226  %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
227  ret <2 x double> %vecinit2
228}
229
230define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
231; SSE-LABEL: test1c:
232; SSE:       # BB#0:
233; SSE-NEXT:    addsubps (%rdi), %xmm0
234; SSE-NEXT:    retq
235;
236; AVX-LABEL: test1c:
237; AVX:       # BB#0:
238; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
239; AVX-NEXT:    retq
240  %1 = load <4 x float>, <4 x float>* %B
241  %add = fadd <4 x float> %A, %1
242  %sub = fsub <4 x float> %A, %1
243  %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
244  ret <4 x float> %vecinit6
245}
246
247define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
248; SSE-LABEL: test2c:
249; SSE:       # BB#0:
250; SSE-NEXT:    addsubps (%rdi), %xmm0
251; SSE-NEXT:    addsubps 16(%rdi), %xmm1
252; SSE-NEXT:    retq
253;
254; AVX-LABEL: test2c:
255; AVX:       # BB#0:
256; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
257; AVX-NEXT:    retq
258  %1 = load <8 x float>, <8 x float>* %B
259  %add = fadd <8 x float> %A, %1
260  %sub = fsub <8 x float> %A, %1
261  %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
262  ret <8 x float> %vecinit14
263}
264
265define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
266; SSE-LABEL: test3c:
267; SSE:       # BB#0:
268; SSE-NEXT:    addsubpd (%rdi), %xmm0
269; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
270; SSE-NEXT:    retq
271;
272; AVX-LABEL: test3c:
273; AVX:       # BB#0:
274; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
275; AVX-NEXT:    retq
276  %1 = load <4 x double>, <4 x double>* %B
277  %add = fadd <4 x double> %A, %1
278  %sub = fsub <4 x double> %A, %1
279  %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
280  ret <4 x double> %vecinit6
281}
282
283define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
284; SSE-LABEL: test4c:
285; SSE:       # BB#0:
286; SSE-NEXT:    addsubpd (%rdi), %xmm0
287; SSE-NEXT:    retq
288;
289; AVX-LABEL: test4c:
290; AVX:       # BB#0:
291; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
292; AVX-NEXT:    retq
293  %1 = load <2 x double>, <2 x double>* %B
294  %sub = fsub <2 x double> %A, %1
295  %add = fadd <2 x double> %A, %1
296  %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
297  ret <2 x double> %vecinit2
298}
299