1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
8
9;
10; 128-bit vectors
11;
12
13define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) {
14; SSE-LABEL: @test_v2f64(
15; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
16; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
17; SSE-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
18; SSE-NEXT:    ret <2 x double> [[TMP3]]
19;
20; SLM-LABEL: @test_v2f64(
21; SLM-NEXT:    [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
22; SLM-NEXT:    [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
23; SLM-NEXT:    [[B0:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
24; SLM-NEXT:    [[B1:%.*]] = extractelement <2 x double> [[B]], i32 1
25; SLM-NEXT:    [[R0:%.*]] = fadd double [[A0]], [[A1]]
26; SLM-NEXT:    [[R1:%.*]] = fadd double [[B0]], [[B1]]
27; SLM-NEXT:    [[R00:%.*]] = insertelement <2 x double> undef, double [[R0]], i32 0
28; SLM-NEXT:    [[R01:%.*]] = insertelement <2 x double> [[R00]], double [[R1]], i32 1
29; SLM-NEXT:    ret <2 x double> [[R01]]
30;
31; AVX-LABEL: @test_v2f64(
32; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2>
33; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3>
34; AVX-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
35; AVX-NEXT:    ret <2 x double> [[TMP3]]
36;
37  %a0 = extractelement <2 x double> %a, i32 0
38  %a1 = extractelement <2 x double> %a, i32 1
39  %b0 = extractelement <2 x double> %b, i32 0
40  %b1 = extractelement <2 x double> %b, i32 1
41  %r0 = fadd double %a0, %a1
42  %r1 = fadd double %b0, %b1
43  %r00 = insertelement <2 x double> undef, double %r0, i32 0
44  %r01 = insertelement <2 x double>  %r00, double %r1, i32 1
45  ret <2 x double> %r01
46}
47
48define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) {
49; CHECK-LABEL: @test_v4f32(
50; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
51; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
52; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
53; CHECK-NEXT:    ret <4 x float> [[TMP3]]
54;
55  %a0 = extractelement <4 x float> %a, i32 0
56  %a1 = extractelement <4 x float> %a, i32 1
57  %a2 = extractelement <4 x float> %a, i32 2
58  %a3 = extractelement <4 x float> %a, i32 3
59  %b0 = extractelement <4 x float> %b, i32 0
60  %b1 = extractelement <4 x float> %b, i32 1
61  %b2 = extractelement <4 x float> %b, i32 2
62  %b3 = extractelement <4 x float> %b, i32 3
63  %r0 = fadd float %a0, %a1
64  %r1 = fadd float %a2, %a3
65  %r2 = fadd float %b0, %b1
66  %r3 = fadd float %b2, %b3
67  %r00 = insertelement <4 x float> undef, float %r0, i32 0
68  %r01 = insertelement <4 x float>  %r00, float %r1, i32 1
69  %r02 = insertelement <4 x float>  %r01, float %r2, i32 2
70  %r03 = insertelement <4 x float>  %r02, float %r3, i32 3
71  ret <4 x float> %r03
72}
73
74define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) {
75; CHECK-LABEL: @test_v2i64(
76; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 2>
77; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> <i32 1, i32 3>
78; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
79; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
80;
81  %a0 = extractelement <2 x i64> %a, i32 0
82  %a1 = extractelement <2 x i64> %a, i32 1
83  %b0 = extractelement <2 x i64> %b, i32 0
84  %b1 = extractelement <2 x i64> %b, i32 1
85  %r0 = add i64 %a0, %a1
86  %r1 = add i64 %b0, %b1
87  %r00 = insertelement <2 x i64> undef, i64 %r0, i32 0
88  %r01 = insertelement <2 x i64>  %r00, i64 %r1, i32 1
89  ret <2 x i64> %r01
90}
91
92define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) {
93; CHECK-LABEL: @test_v4i32(
94; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
95; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
96; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
97; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
98;
99  %a0 = extractelement <4 x i32> %a, i32 0
100  %a1 = extractelement <4 x i32> %a, i32 1
101  %a2 = extractelement <4 x i32> %a, i32 2
102  %a3 = extractelement <4 x i32> %a, i32 3
103  %b0 = extractelement <4 x i32> %b, i32 0
104  %b1 = extractelement <4 x i32> %b, i32 1
105  %b2 = extractelement <4 x i32> %b, i32 2
106  %b3 = extractelement <4 x i32> %b, i32 3
107  %r0 = add i32 %a0, %a1
108  %r1 = add i32 %a2, %a3
109  %r2 = add i32 %b0, %b1
110  %r3 = add i32 %b2, %b3
111  %r00 = insertelement <4 x i32> undef, i32 %r0, i32 0
112  %r01 = insertelement <4 x i32>  %r00, i32 %r1, i32 1
113  %r02 = insertelement <4 x i32>  %r01, i32 %r2, i32 2
114  %r03 = insertelement <4 x i32>  %r02, i32 %r3, i32 3
115  ret <4 x i32> %r03
116}
117
118define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
119; CHECK-LABEL: @test_v8i16(
120; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
121; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
122; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
123; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
124;
125  %a0 = extractelement <8 x i16> %a, i32 0
126  %a1 = extractelement <8 x i16> %a, i32 1
127  %a2 = extractelement <8 x i16> %a, i32 2
128  %a3 = extractelement <8 x i16> %a, i32 3
129  %a4 = extractelement <8 x i16> %a, i32 4
130  %a5 = extractelement <8 x i16> %a, i32 5
131  %a6 = extractelement <8 x i16> %a, i32 6
132  %a7 = extractelement <8 x i16> %a, i32 7
133  %b0 = extractelement <8 x i16> %b, i32 0
134  %b1 = extractelement <8 x i16> %b, i32 1
135  %b2 = extractelement <8 x i16> %b, i32 2
136  %b3 = extractelement <8 x i16> %b, i32 3
137  %b4 = extractelement <8 x i16> %b, i32 4
138  %b5 = extractelement <8 x i16> %b, i32 5
139  %b6 = extractelement <8 x i16> %b, i32 6
140  %b7 = extractelement <8 x i16> %b, i32 7
141  %r0 = add i16 %a0, %a1
142  %r1 = add i16 %a2, %a3
143  %r2 = add i16 %a4, %a5
144  %r3 = add i16 %a6, %a7
145  %r4 = add i16 %b0, %b1
146  %r5 = add i16 %b2, %b3
147  %r6 = add i16 %b4, %b5
148  %r7 = add i16 %b6, %b7
149  %r00 = insertelement <8 x i16> undef, i16 %r0, i32 0
150  %r01 = insertelement <8 x i16>  %r00, i16 %r1, i32 1
151  %r02 = insertelement <8 x i16>  %r01, i16 %r2, i32 2
152  %r03 = insertelement <8 x i16>  %r02, i16 %r3, i32 3
153  %r04 = insertelement <8 x i16>  %r03, i16 %r4, i32 4
154  %r05 = insertelement <8 x i16>  %r04, i16 %r5, i32 5
155  %r06 = insertelement <8 x i16>  %r05, i16 %r6, i32 6
156  %r07 = insertelement <8 x i16>  %r06, i16 %r7, i32 7
157  ret <8 x i16> %r07
158}
159
160;
161; 256-bit vectors
162;
163
164define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
165; SSE-LABEL: @test_v4f64(
166; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
167; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
168; SSE-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
169; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
170; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
171; SSE-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
172; SSE-NEXT:    [[R03:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
173; SSE-NEXT:    ret <4 x double> [[R03]]
174;
175; SLM-LABEL: @test_v4f64(
176; SLM-NEXT:    [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
177; SLM-NEXT:    [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
178; SLM-NEXT:    [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
179; SLM-NEXT:    [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
180; SLM-NEXT:    [[B0:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0
181; SLM-NEXT:    [[B1:%.*]] = extractelement <4 x double> [[B]], i32 1
182; SLM-NEXT:    [[B2:%.*]] = extractelement <4 x double> [[B]], i32 2
183; SLM-NEXT:    [[B3:%.*]] = extractelement <4 x double> [[B]], i32 3
184; SLM-NEXT:    [[R0:%.*]] = fadd double [[A0]], [[A1]]
185; SLM-NEXT:    [[R1:%.*]] = fadd double [[B0]], [[B1]]
186; SLM-NEXT:    [[R2:%.*]] = fadd double [[A2]], [[A3]]
187; SLM-NEXT:    [[R3:%.*]] = fadd double [[B2]], [[B3]]
188; SLM-NEXT:    [[R00:%.*]] = insertelement <4 x double> undef, double [[R0]], i32 0
189; SLM-NEXT:    [[R01:%.*]] = insertelement <4 x double> [[R00]], double [[R1]], i32 1
190; SLM-NEXT:    [[R02:%.*]] = insertelement <4 x double> [[R01]], double [[R2]], i32 2
191; SLM-NEXT:    [[R03:%.*]] = insertelement <4 x double> [[R02]], double [[R3]], i32 3
192; SLM-NEXT:    ret <4 x double> [[R03]]
193;
194; AVX-LABEL: @test_v4f64(
195; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
196; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
197; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
198; AVX-NEXT:    ret <4 x double> [[TMP3]]
199;
200  %a0 = extractelement <4 x double> %a, i32 0
201  %a1 = extractelement <4 x double> %a, i32 1
202  %a2 = extractelement <4 x double> %a, i32 2
203  %a3 = extractelement <4 x double> %a, i32 3
204  %b0 = extractelement <4 x double> %b, i32 0
205  %b1 = extractelement <4 x double> %b, i32 1
206  %b2 = extractelement <4 x double> %b, i32 2
207  %b3 = extractelement <4 x double> %b, i32 3
208  %r0 = fadd double %a0, %a1
209  %r1 = fadd double %b0, %b1
210  %r2 = fadd double %a2, %a3
211  %r3 = fadd double %b2, %b3
212  %r00 = insertelement <4 x double> undef, double %r0, i32 0
213  %r01 = insertelement <4 x double>  %r00, double %r1, i32 1
214  %r02 = insertelement <4 x double>  %r01, double %r2, i32 2
215  %r03 = insertelement <4 x double>  %r02, double %r3, i32 3
216  ret <4 x double> %r03
217}
218
219define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
220; SSE-LABEL: @test_v8f32(
221; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
222; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
223; SSE-NEXT:    [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
224; SSE-NEXT:    ret <8 x float> [[TMP3]]
225;
226; SLM-LABEL: @test_v8f32(
227; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
228; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
229; SLM-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
230; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
231; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
232; SLM-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
233; SLM-NEXT:    [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
234; SLM-NEXT:    ret <8 x float> [[R07]]
235;
236; AVX-LABEL: @test_v8f32(
237; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
238; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
239; AVX-NEXT:    [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
240; AVX-NEXT:    ret <8 x float> [[TMP3]]
241;
242  %a0 = extractelement <8 x float> %a, i32 0
243  %a1 = extractelement <8 x float> %a, i32 1
244  %a2 = extractelement <8 x float> %a, i32 2
245  %a3 = extractelement <8 x float> %a, i32 3
246  %a4 = extractelement <8 x float> %a, i32 4
247  %a5 = extractelement <8 x float> %a, i32 5
248  %a6 = extractelement <8 x float> %a, i32 6
249  %a7 = extractelement <8 x float> %a, i32 7
250  %b0 = extractelement <8 x float> %b, i32 0
251  %b1 = extractelement <8 x float> %b, i32 1
252  %b2 = extractelement <8 x float> %b, i32 2
253  %b3 = extractelement <8 x float> %b, i32 3
254  %b4 = extractelement <8 x float> %b, i32 4
255  %b5 = extractelement <8 x float> %b, i32 5
256  %b6 = extractelement <8 x float> %b, i32 6
257  %b7 = extractelement <8 x float> %b, i32 7
258  %r0 = fadd float %a0, %a1
259  %r1 = fadd float %a2, %a3
260  %r2 = fadd float %b0, %b1
261  %r3 = fadd float %b2, %b3
262  %r4 = fadd float %a4, %a5
263  %r5 = fadd float %a6, %a7
264  %r6 = fadd float %b4, %b5
265  %r7 = fadd float %b6, %b7
266  %r00 = insertelement <8 x float> undef, float %r0, i32 0
267  %r01 = insertelement <8 x float>  %r00, float %r1, i32 1
268  %r02 = insertelement <8 x float>  %r01, float %r2, i32 2
269  %r03 = insertelement <8 x float>  %r02, float %r3, i32 3
270  %r04 = insertelement <8 x float>  %r03, float %r4, i32 4
271  %r05 = insertelement <8 x float>  %r04, float %r5, i32 5
272  %r06 = insertelement <8 x float>  %r05, float %r6, i32 6
273  %r07 = insertelement <8 x float>  %r06, float %r7, i32 7
274  ret <8 x float> %r07
275}
276
277define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
278; CHECK-LABEL: @test_v4i64(
279; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
280; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
281; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
282; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
283;
284  %a0 = extractelement <4 x i64> %a, i32 0
285  %a1 = extractelement <4 x i64> %a, i32 1
286  %a2 = extractelement <4 x i64> %a, i32 2
287  %a3 = extractelement <4 x i64> %a, i32 3
288  %b0 = extractelement <4 x i64> %b, i32 0
289  %b1 = extractelement <4 x i64> %b, i32 1
290  %b2 = extractelement <4 x i64> %b, i32 2
291  %b3 = extractelement <4 x i64> %b, i32 3
292  %r0 = add i64 %a0, %a1
293  %r1 = add i64 %b0, %b1
294  %r2 = add i64 %a2, %a3
295  %r3 = add i64 %b2, %b3
296  %r00 = insertelement <4 x i64> undef, i64 %r0, i32 0
297  %r01 = insertelement <4 x i64>  %r00, i64 %r1, i32 1
298  %r02 = insertelement <4 x i64>  %r01, i64 %r2, i32 2
299  %r03 = insertelement <4 x i64>  %r02, i64 %r3, i32 3
300  ret <4 x i64> %r03
301}
302
303define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
304; CHECK-LABEL: @test_v8i32(
305; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
306; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
307; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
308; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
309;
310  %a0 = extractelement <8 x i32> %a, i32 0
311  %a1 = extractelement <8 x i32> %a, i32 1
312  %a2 = extractelement <8 x i32> %a, i32 2
313  %a3 = extractelement <8 x i32> %a, i32 3
314  %a4 = extractelement <8 x i32> %a, i32 4
315  %a5 = extractelement <8 x i32> %a, i32 5
316  %a6 = extractelement <8 x i32> %a, i32 6
317  %a7 = extractelement <8 x i32> %a, i32 7
318  %b0 = extractelement <8 x i32> %b, i32 0
319  %b1 = extractelement <8 x i32> %b, i32 1
320  %b2 = extractelement <8 x i32> %b, i32 2
321  %b3 = extractelement <8 x i32> %b, i32 3
322  %b4 = extractelement <8 x i32> %b, i32 4
323  %b5 = extractelement <8 x i32> %b, i32 5
324  %b6 = extractelement <8 x i32> %b, i32 6
325  %b7 = extractelement <8 x i32> %b, i32 7
326  %r0 = add i32 %a0, %a1
327  %r1 = add i32 %a2, %a3
328  %r2 = add i32 %b0, %b1
329  %r3 = add i32 %b2, %b3
330  %r4 = add i32 %a4, %a5
331  %r5 = add i32 %a6, %a7
332  %r6 = add i32 %b4, %b5
333  %r7 = add i32 %b6, %b7
334  %r00 = insertelement <8 x i32> undef, i32 %r0, i32 0
335  %r01 = insertelement <8 x i32>  %r00, i32 %r1, i32 1
336  %r02 = insertelement <8 x i32>  %r01, i32 %r2, i32 2
337  %r03 = insertelement <8 x i32>  %r02, i32 %r3, i32 3
338  %r04 = insertelement <8 x i32>  %r03, i32 %r4, i32 4
339  %r05 = insertelement <8 x i32>  %r04, i32 %r5, i32 5
340  %r06 = insertelement <8 x i32>  %r05, i32 %r6, i32 6
341  %r07 = insertelement <8 x i32>  %r06, i32 %r7, i32 7
342  ret <8 x i32> %r07
343}
344
345define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
346; SSE-LABEL: @test_v16i16(
347; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22>
348; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23>
349; SSE-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]]
350; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
351; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
352; SSE-NEXT:    [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
353; SSE-NEXT:    [[RV15:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
354; SSE-NEXT:    ret <16 x i16> [[RV15]]
355;
356; SLM-LABEL: @test_v16i16(
357; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
358; SLM-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
359; SLM-NEXT:    [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
360; SLM-NEXT:    ret <16 x i16> [[TMP3]]
361;
362; AVX-LABEL: @test_v16i16(
363; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
364; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
365; AVX-NEXT:    [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]]
366; AVX-NEXT:    ret <16 x i16> [[TMP3]]
367;
368  %a0  = extractelement <16 x i16> %a, i32 0
369  %a1  = extractelement <16 x i16> %a, i32 1
370  %a2  = extractelement <16 x i16> %a, i32 2
371  %a3  = extractelement <16 x i16> %a, i32 3
372  %a4  = extractelement <16 x i16> %a, i32 4
373  %a5  = extractelement <16 x i16> %a, i32 5
374  %a6  = extractelement <16 x i16> %a, i32 6
375  %a7  = extractelement <16 x i16> %a, i32 7
376  %a8  = extractelement <16 x i16> %a, i32 8
377  %a9  = extractelement <16 x i16> %a, i32 9
378  %a10 = extractelement <16 x i16> %a, i32 10
379  %a11 = extractelement <16 x i16> %a, i32 11
380  %a12 = extractelement <16 x i16> %a, i32 12
381  %a13 = extractelement <16 x i16> %a, i32 13
382  %a14 = extractelement <16 x i16> %a, i32 14
383  %a15 = extractelement <16 x i16> %a, i32 15
384  %b0  = extractelement <16 x i16> %b, i32 0
385  %b1  = extractelement <16 x i16> %b, i32 1
386  %b2  = extractelement <16 x i16> %b, i32 2
387  %b3  = extractelement <16 x i16> %b, i32 3
388  %b4  = extractelement <16 x i16> %b, i32 4
389  %b5  = extractelement <16 x i16> %b, i32 5
390  %b6  = extractelement <16 x i16> %b, i32 6
391  %b7  = extractelement <16 x i16> %b, i32 7
392  %b8  = extractelement <16 x i16> %b, i32 8
393  %b9  = extractelement <16 x i16> %b, i32 9
394  %b10 = extractelement <16 x i16> %b, i32 10
395  %b11 = extractelement <16 x i16> %b, i32 11
396  %b12 = extractelement <16 x i16> %b, i32 12
397  %b13 = extractelement <16 x i16> %b, i32 13
398  %b14 = extractelement <16 x i16> %b, i32 14
399  %b15 = extractelement <16 x i16> %b, i32 15
400  %r0  = add i16 %a0 , %a1
401  %r1  = add i16 %a2 , %a3
402  %r2  = add i16 %a4 , %a5
403  %r3  = add i16 %a6 , %a7
404  %r4  = add i16 %b0 , %b1
405  %r5  = add i16 %b2 , %b3
406  %r6  = add i16 %b4 , %b5
407  %r7  = add i16 %b6 , %b7
408  %r8  = add i16 %a8 , %a9
409  %r9  = add i16 %a10, %a11
410  %r10 = add i16 %a12, %a13
411  %r11 = add i16 %a14, %a15
412  %r12 = add i16 %b8 , %b9
413  %r13 = add i16 %b10, %b11
414  %r14 = add i16 %b12, %b13
415  %r15 = add i16 %b14, %b15
416  %rv0  = insertelement <16 x i16> undef, i16 %r0 , i32 0
417  %rv1  = insertelement <16 x i16> %rv0 , i16 %r1 , i32 1
418  %rv2  = insertelement <16 x i16> %rv1 , i16 %r2 , i32 2
419  %rv3  = insertelement <16 x i16> %rv2 , i16 %r3 , i32 3
420  %rv4  = insertelement <16 x i16> %rv3 , i16 %r4 , i32 4
421  %rv5  = insertelement <16 x i16> %rv4 , i16 %r5 , i32 5
422  %rv6  = insertelement <16 x i16> %rv5 , i16 %r6 , i32 6
423  %rv7  = insertelement <16 x i16> %rv6 , i16 %r7 , i32 7
424  %rv8  = insertelement <16 x i16> %rv7 , i16 %r8 , i32 8
425  %rv9  = insertelement <16 x i16> %rv8 , i16 %r9 , i32 9
426  %rv10 = insertelement <16 x i16> %rv9 , i16 %r10, i32 10
427  %rv11 = insertelement <16 x i16> %rv10, i16 %r11, i32 11
428  %rv12 = insertelement <16 x i16> %rv11, i16 %r12, i32 12
429  %rv13 = insertelement <16 x i16> %rv12, i16 %r13, i32 13
430  %rv14 = insertelement <16 x i16> %rv13, i16 %r14, i32 14
431  %rv15 = insertelement <16 x i16> %rv14, i16 %r15, i32 15
432  ret <16 x i16> %rv15
433}
434