1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2
3
4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5; CHECK-LABEL: ins16bw:
6; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
7  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
8  ret <16 x i8> %tmp3
9}
10
11define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
12; CHECK-LABEL: ins8hw:
13; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
14  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
15  ret <8 x i16> %tmp3
16}
17
18define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
19; CHECK-LABEL: ins4sw:
20; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
21  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
22  ret <4 x i32> %tmp3
23}
24
25define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
26; CHECK-LABEL: ins2dw:
27; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
28  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
29  ret <2 x i64> %tmp3
30}
31
32define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
33; CHECK-LABEL: ins8bw:
34; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
35  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
36  ret <8 x i8> %tmp3
37}
38
39define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
40; CHECK-LABEL: ins4hw:
41; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
42  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
43  ret <4 x i16> %tmp3
44}
45
46define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
47; CHECK-LABEL: ins2sw:
48; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
49  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
50  ret <2 x i32> %tmp3
51}
52
53define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
54; CHECK-LABEL: ins16b16:
55; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
56  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
57  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
58  ret <16 x i8> %tmp4
59}
60
61define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
62; CHECK-LABEL: ins8h8:
63; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
64  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
65  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
66  ret <8 x i16> %tmp4
67}
68
69define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
70; CHECK-LABEL: ins4s4:
71; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
72  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
73  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
74  ret <4 x i32> %tmp4
75}
76
77define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
78; CHECK-LABEL: ins2d2:
79; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
80  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
81  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
82  ret <2 x i64> %tmp4
83}
84
85define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
86; CHECK-LABEL: ins4f4:
87; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
88  %tmp3 = extractelement <4 x float> %tmp1, i32 2
89  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
90  ret <4 x float> %tmp4
91}
92
93define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
94; CHECK-LABEL: ins2df2:
95; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
96  %tmp3 = extractelement <2 x double> %tmp1, i32 0
97  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
98  ret <2 x double> %tmp4
99}
100
101define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
102; CHECK-LABEL: ins8b16:
103; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
104  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
105  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
106  ret <16 x i8> %tmp4
107}
108
109define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
110; CHECK-LABEL: ins4h8:
111; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
112  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
113  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
114  ret <8 x i16> %tmp4
115}
116
117define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
118; CHECK-LABEL: ins2s4:
119; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
120  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
121  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
122  ret <4 x i32> %tmp4
123}
124
125define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
126; CHECK-LABEL: ins1d2:
127; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
128  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
129  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
130  ret <2 x i64> %tmp4
131}
132
133define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
134; CHECK-LABEL: ins2f4:
135; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
136  %tmp3 = extractelement <2 x float> %tmp1, i32 1
137  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
138  ret <4 x float> %tmp4
139}
140
141define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
142; CHECK-LABEL: ins1f2:
143; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
144  %tmp3 = extractelement <1 x double> %tmp1, i32 0
145  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
146  ret <2 x double> %tmp4
147}
148
149define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
150; CHECK-LABEL: ins16b8:
151; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
152  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
153  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
154  ret <8 x i8> %tmp4
155}
156
157define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
158; CHECK-LABEL: ins8h4:
159; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
160  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
161  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
162  ret <4 x i16> %tmp4
163}
164
165define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
166; CHECK-LABEL: ins4s2:
167; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
168  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
169  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
170  ret <2 x i32> %tmp4
171}
172
173define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
174; CHECK-LABEL: ins2d1:
175; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
176  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
177  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
178  ret <1 x i64> %tmp4
179}
180
181define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
182; CHECK-LABEL: ins4f2:
183; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
184  %tmp3 = extractelement <4 x float> %tmp1, i32 2
185  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
186  ret <2 x float> %tmp4
187}
188
189define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
190; CHECK-LABEL: ins2f1:
191; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
192  %tmp3 = extractelement <2 x double> %tmp1, i32 1
193  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
194  ret <1 x double> %tmp4
195}
196
197define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
198; CHECK-LABEL: ins8b8:
199; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
200  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
201  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
202  ret <8 x i8> %tmp4
203}
204
205define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
206; CHECK-LABEL: ins4h4:
207; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
208  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
209  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
210  ret <4 x i16> %tmp4
211}
212
213define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
214; CHECK-LABEL: ins2s2:
215; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
216  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
217  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
218  ret <2 x i32> %tmp4
219}
220
221define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
222; CHECK-LABEL: ins1d1:
223; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
224  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
225  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
226  ret <1 x i64> %tmp4
227}
228
229define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
230; CHECK-LABEL: ins2f2:
231; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
232  %tmp3 = extractelement <2 x float> %tmp1, i32 0
233  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
234  ret <2 x float> %tmp4
235}
236
237define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
238; CHECK-LABEL: ins1df1:
239; CHECK-NOT: ins {{v[0-9]+}}
240  %tmp3 = extractelement <1 x double> %tmp1, i32 0
241  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
242  ret <1 x double> %tmp4
243}
244
245define i32 @umovw16b(<16 x i8> %tmp1) {
246; CHECK-LABEL: umovw16b:
247; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
248  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
249  %tmp4 = zext i8 %tmp3 to i32
250  ret i32 %tmp4
251}
252
253define i32 @umovw8h(<8 x i16> %tmp1) {
254; CHECK-LABEL: umovw8h:
255; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
256  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
257  %tmp4 = zext i16 %tmp3 to i32
258  ret i32 %tmp4
259}
260
261define i32 @umovw4s(<4 x i32> %tmp1) {
262; CHECK-LABEL: umovw4s:
263; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
264  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
265  ret i32 %tmp3
266}
267
268define i64 @umovx2d(<2 x i64> %tmp1) {
269; CHECK-LABEL: umovx2d:
270; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
271  %tmp3 = extractelement <2 x i64> %tmp1, i32 1
272  ret i64 %tmp3
273}
274
275define i32 @umovw8b(<8 x i8> %tmp1) {
276; CHECK-LABEL: umovw8b:
277; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
278  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
279  %tmp4 = zext i8 %tmp3 to i32
280  ret i32 %tmp4
281}
282
283define i32 @umovw4h(<4 x i16> %tmp1) {
284; CHECK-LABEL: umovw4h:
285; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
286  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
287  %tmp4 = zext i16 %tmp3 to i32
288  ret i32 %tmp4
289}
290
291define i32 @umovw2s(<2 x i32> %tmp1) {
292; CHECK-LABEL: umovw2s:
293; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
294  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
295  ret i32 %tmp3
296}
297
298define i64 @umovx1d(<1 x i64> %tmp1) {
299; CHECK-LABEL: umovx1d:
300; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
301  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
302  ret i64 %tmp3
303}
304
305define i32 @smovw16b(<16 x i8> %tmp1) {
306; CHECK-LABEL: smovw16b:
307; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
308  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
309  %tmp4 = sext i8 %tmp3 to i32
310  %tmp5 = add i32 %tmp4, %tmp4
311  ret i32 %tmp5
312}
313
314define i32 @smovw8h(<8 x i16> %tmp1) {
315; CHECK-LABEL: smovw8h:
316; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
317  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
318  %tmp4 = sext i16 %tmp3 to i32
319  %tmp5 = add i32 %tmp4, %tmp4
320  ret i32 %tmp5
321}
322
323define i64 @smovx16b(<16 x i8> %tmp1) {
324; CHECK-LABEL: smovx16b:
325; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
326  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
327  %tmp4 = sext i8 %tmp3 to i64
328  ret i64 %tmp4
329}
330
331define i64 @smovx8h(<8 x i16> %tmp1) {
332; CHECK-LABEL: smovx8h:
333; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
334  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
335  %tmp4 = sext i16 %tmp3 to i64
336  ret i64 %tmp4
337}
338
339define i64 @smovx4s(<4 x i32> %tmp1) {
340; CHECK-LABEL: smovx4s:
341; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
342  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
343  %tmp4 = sext i32 %tmp3 to i64
344  ret i64 %tmp4
345}
346
347define i32 @smovw8b(<8 x i8> %tmp1) {
348; CHECK-LABEL: smovw8b:
349; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
350  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
351  %tmp4 = sext i8 %tmp3 to i32
352  %tmp5 = add i32 %tmp4, %tmp4
353  ret i32 %tmp5
354}
355
356define i32 @smovw4h(<4 x i16> %tmp1) {
357; CHECK-LABEL: smovw4h:
358; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
359  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
360  %tmp4 = sext i16 %tmp3 to i32
361  %tmp5 = add i32 %tmp4, %tmp4
362  ret i32 %tmp5
363}
364
365define i32 @smovx8b(<8 x i8> %tmp1) {
366; CHECK-LABEL: smovx8b:
367; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
368  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
369  %tmp4 = sext i8 %tmp3 to i32
370  ret i32 %tmp4
371}
372
373define i32 @smovx4h(<4 x i16> %tmp1) {
374; CHECK-LABEL: smovx4h:
375; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
376  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
377  %tmp4 = sext i16 %tmp3 to i32
378  ret i32 %tmp4
379}
380
381define i64 @smovx2s(<2 x i32> %tmp1) {
382; CHECK-LABEL: smovx2s:
383; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
384  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
385  %tmp4 = sext i32 %tmp3 to i64
386  ret i64 %tmp4
387}
388
389define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
390; CHECK-LABEL: test_vcopy_lane_s8:
391; CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
392  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
393  ret <8 x i8> %vset_lane
394}
395
396define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
397; CHECK-LABEL: test_vcopyq_laneq_s8:
398; CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
399  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
400  ret <16 x i8> %vset_lane
401}
402
403define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
404; CHECK-LABEL: test_vcopy_lane_swap_s8:
405; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
406  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
407  ret <8 x i8> %vset_lane
408}
409
410define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
411; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
412; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
413  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
414  ret <16 x i8> %vset_lane
415}
416
417define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
418; CHECK-LABEL: test_vdup_n_u8:
419; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
420  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
421  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
422  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
423  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
424  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
425  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
426  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
427  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
428  ret <8 x i8> %vecinit7.i
429}
430
431define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
432; CHECK-LABEL: test_vdup_n_u16:
433; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
434  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
435  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
436  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
437  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
438  ret <4 x i16> %vecinit3.i
439}
440
441define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
442; CHECK-LABEL: test_vdup_n_u32:
443; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
444  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
445  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
446  ret <2 x i32> %vecinit1.i
447}
448
449define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
450; CHECK-LABEL: test_vdup_n_u64:
451; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
452  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
453  ret <1 x i64> %vecinit.i
454}
455
456define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
457; CHECK-LABEL: test_vdupq_n_u8:
458; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
459  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
460  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
461  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
462  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
463  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
464  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
465  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
466  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
467  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
468  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
469  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
470  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
471  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
472  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
473  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
474  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
475  ret <16 x i8> %vecinit15.i
476}
477
478define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
479; CHECK-LABEL: test_vdupq_n_u16:
480; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
481  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
482  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
483  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
484  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
485  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
486  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
487  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
488  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
489  ret <8 x i16> %vecinit7.i
490}
491
492define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
493; CHECK-LABEL: test_vdupq_n_u32:
494; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
495  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
496  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
497  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
498  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
499  ret <4 x i32> %vecinit3.i
500}
501
502define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
503; CHECK-LABEL: test_vdupq_n_u64:
504; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
505  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
506  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
507  ret <2 x i64> %vecinit1.i
508}
509
510define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
511; CHECK-LABEL: test_vdup_lane_s8:
512; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
513  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
514  ret <8 x i8> %shuffle
515}
516
517define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
518; CHECK-LABEL: test_vdup_lane_s16:
519; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
520  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
521  ret <4 x i16> %shuffle
522}
523
524define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
525; CHECK-LABEL: test_vdup_lane_s32:
526; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
527  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
528  ret <2 x i32> %shuffle
529}
530
531define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
532; CHECK-LABEL: test_vdupq_lane_s8:
533; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
534  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
535  ret <16 x i8> %shuffle
536}
537
538define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
539; CHECK-LABEL: test_vdupq_lane_s16:
540; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
541  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
542  ret <8 x i16> %shuffle
543}
544
545define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
546; CHECK-LABEL: test_vdupq_lane_s32:
547; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
548  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
549  ret <4 x i32> %shuffle
550}
551
552define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
553; CHECK-LABEL: test_vdupq_lane_s64:
554; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
555  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
556  ret <2 x i64> %shuffle
557}
558
559define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
560; CHECK-LABEL: test_vdup_laneq_s8:
561; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
562  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
563  ret <8 x i8> %shuffle
564}
565
566define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
567; CHECK-LABEL: test_vdup_laneq_s16:
568; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
569  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
570  ret <4 x i16> %shuffle
571}
572
573define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
574; CHECK-LABEL: test_vdup_laneq_s32:
575; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
576  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
577  ret <2 x i32> %shuffle
578}
579
580define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
581; CHECK-LABEL: test_vdupq_laneq_s8:
582; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
583  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
584  ret <16 x i8> %shuffle
585}
586
587define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
588; CHECK-LABEL: test_vdupq_laneq_s16:
589; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
590  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
591  ret <8 x i16> %shuffle
592}
593
594define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
595; CHECK-LABEL: test_vdupq_laneq_s32:
596; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
597  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
598  ret <4 x i32> %shuffle
599}
600
601define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
602; CHECK-LABEL: test_vdupq_laneq_s64:
603; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
604  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
605  ret <2 x i64> %shuffle
606}
607
608define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
609; CHECK-LABEL: test_bitcastv8i8toi64:
610   %res = bitcast <8 x i8> %in to i64
611; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
612   ret i64 %res
613}
614
615define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
616; CHECK-LABEL: test_bitcastv4i16toi64:
617   %res = bitcast <4 x i16> %in to i64
618; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
619   ret i64 %res
620}
621
622define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
623; CHECK-LABEL: test_bitcastv2i32toi64:
624   %res = bitcast <2 x i32> %in to i64
625; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
626   ret i64 %res
627}
628
629define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
630; CHECK-LABEL: test_bitcastv2f32toi64:
631   %res = bitcast <2 x float> %in to i64
632; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
633   ret i64 %res
634}
635
636define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
637; CHECK-LABEL: test_bitcastv1i64toi64:
638   %res = bitcast <1 x i64> %in to i64
639; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
640   ret i64 %res
641}
642
643define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
644; CHECK-LABEL: test_bitcastv1f64toi64:
645   %res = bitcast <1 x double> %in to i64
646; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
647   ret i64 %res
648}
649
650define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
651; CHECK-LABEL: test_bitcasti64tov8i8:
652   %res = bitcast i64 %in to <8 x i8>
653; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
654   ret <8 x i8> %res
655}
656
657define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
658; CHECK-LABEL: test_bitcasti64tov4i16:
659   %res = bitcast i64 %in to <4 x i16>
660; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
661   ret <4 x i16> %res
662}
663
664define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
665; CHECK-LABEL: test_bitcasti64tov2i32:
666   %res = bitcast i64 %in to <2 x i32>
667; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
668   ret <2 x i32> %res
669}
670
671define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
672; CHECK-LABEL: test_bitcasti64tov2f32:
673   %res = bitcast i64 %in to <2 x float>
674; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
675   ret <2 x float> %res
676}
677
678define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
679; CHECK-LABEL: test_bitcasti64tov1i64:
680   %res = bitcast i64 %in to <1 x i64>
681; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
682   ret <1 x i64> %res
683}
684
685define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
686; CHECK-LABEL: test_bitcasti64tov1f64:
687   %res = bitcast i64 %in to <1 x double>
688; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
689   ret <1 x double> %res
690}
691
692define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
693; CHECK-LABEL: test_bitcastv8i8tov1f64:
694; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
695; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
696  %sub.i = sub <8 x i8> zeroinitializer, %a
697  %1 = bitcast <8 x i8> %sub.i to <1 x double>
698  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
699  ret <1 x i64> %vcvt.i
700}
701
702define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
703; CHECK-LABEL: test_bitcastv4i16tov1f64:
704; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
705; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
706  %sub.i = sub <4 x i16> zeroinitializer, %a
707  %1 = bitcast <4 x i16> %sub.i to <1 x double>
708  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
709  ret <1 x i64> %vcvt.i
710}
711
712define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
713; CHECK-LABEL: test_bitcastv2i32tov1f64:
714; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
715; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
716  %sub.i = sub <2 x i32> zeroinitializer, %a
717  %1 = bitcast <2 x i32> %sub.i to <1 x double>
718  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
719  ret <1 x i64> %vcvt.i
720}
721
722define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
723; CHECK-LABEL: test_bitcastv1i64tov1f64:
724; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
725; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
726  %sub.i = sub <1 x i64> zeroinitializer, %a
727  %1 = bitcast <1 x i64> %sub.i to <1 x double>
728  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
729  ret <1 x i64> %vcvt.i
730}
731
732define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
733; CHECK-LABEL: test_bitcastv2f32tov1f64:
734; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
735; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
736  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
737  %1 = bitcast <2 x float> %sub.i to <1 x double>
738  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
739  ret <1 x i64> %vcvt.i
740}
741
742define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
743; CHECK-LABEL: test_bitcastv1f64tov8i8:
744; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
745; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
746  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
747  %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
748  %sub.i = sub <8 x i8> zeroinitializer, %1
749  ret <8 x i8> %sub.i
750}
751
752define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
753; CHECK-LABEL: test_bitcastv1f64tov4i16:
754; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
755; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
756  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
757  %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
758  %sub.i = sub <4 x i16> zeroinitializer, %1
759  ret <4 x i16> %sub.i
760}
761
762define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
763; CHECK-LABEL: test_bitcastv1f64tov2i32:
764; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
765; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
766  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
767  %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
768  %sub.i = sub <2 x i32> zeroinitializer, %1
769  ret <2 x i32> %sub.i
770}
771
772define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
773; CHECK-LABEL: test_bitcastv1f64tov1i64:
774; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
775; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
776  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
777  %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
778  %sub.i = sub <1 x i64> zeroinitializer, %1
779  ret <1 x i64> %sub.i
780}
781
782define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
783; CHECK-LABEL: test_bitcastv1f64tov2f32:
784; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
785; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
786  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
787  %1 = bitcast <1 x double> %vcvt.i to <2 x float>
788  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
789  ret <2 x float> %sub.i
790}
791
792; Test insert element into an undef vector
793define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
794; CHECK-LABEL: scalar_to_vector.v8i8:
795; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
796  %b = insertelement <8 x i8> undef, i8 %a, i32 0
797  ret <8 x i8> %b
798}
799
800define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
801; CHECK-LABEL: scalar_to_vector.v16i8:
802; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
803  %b = insertelement <16 x i8> undef, i8 %a, i32 0
804  ret <16 x i8> %b
805}
806
807define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
808; CHECK-LABEL: scalar_to_vector.v4i16:
809; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
810  %b = insertelement <4 x i16> undef, i16 %a, i32 0
811  ret <4 x i16> %b
812}
813
814define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
815; CHECK-LABEL: scalar_to_vector.v8i16:
816; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
817  %b = insertelement <8 x i16> undef, i16 %a, i32 0
818  ret <8 x i16> %b
819}
820
821define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
822; CHECK-LABEL: scalar_to_vector.v2i32:
823; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
824  %b = insertelement <2 x i32> undef, i32 %a, i32 0
825  ret <2 x i32> %b
826}
827
828define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
829; CHECK-LABEL: scalar_to_vector.v4i32:
830; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
831  %b = insertelement <4 x i32> undef, i32 %a, i32 0
832  ret <4 x i32> %b
833}
834
835define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
836; CHECK-LABEL: scalar_to_vector.v2i64:
837; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
838  %b = insertelement <2 x i64> undef, i64 %a, i32 0
839  ret <2 x i64> %b
840}
841
842define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
843; CHECK-LABEL: testDUP.v1i8:
844; CHECK: dup v0.8b, v0.b[0]
845  %b = extractelement <1 x i8> %a, i32 0
846  %c = insertelement <8 x i8> undef, i8 %b, i32 0
847  %d = insertelement <8 x i8> %c, i8 %b, i32 1
848  %e = insertelement <8 x i8> %d, i8 %b, i32 2
849  %f = insertelement <8 x i8> %e, i8 %b, i32 3
850  %g = insertelement <8 x i8> %f, i8 %b, i32 4
851  %h = insertelement <8 x i8> %g, i8 %b, i32 5
852  %i = insertelement <8 x i8> %h, i8 %b, i32 6
853  %j = insertelement <8 x i8> %i, i8 %b, i32 7
854  ret <8 x i8> %j
855}
856
857define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
858; CHECK-LABEL: testDUP.v1i16:
859; CHECK: dup v0.8h, v0.h[0]
860  %b = extractelement <1 x i16> %a, i32 0
861  %c = insertelement <8 x i16> undef, i16 %b, i32 0
862  %d = insertelement <8 x i16> %c, i16 %b, i32 1
863  %e = insertelement <8 x i16> %d, i16 %b, i32 2
864  %f = insertelement <8 x i16> %e, i16 %b, i32 3
865  %g = insertelement <8 x i16> %f, i16 %b, i32 4
866  %h = insertelement <8 x i16> %g, i16 %b, i32 5
867  %i = insertelement <8 x i16> %h, i16 %b, i32 6
868  %j = insertelement <8 x i16> %i, i16 %b, i32 7
869  ret <8 x i16> %j
870}
871
872define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
873; CHECK-LABEL: testDUP.v1i32:
874; CHECK: dup v0.4s, v0.s[0]
875  %b = extractelement <1 x i32> %a, i32 0
876  %c = insertelement <4 x i32> undef, i32 %b, i32 0
877  %d = insertelement <4 x i32> %c, i32 %b, i32 1
878  %e = insertelement <4 x i32> %d, i32 %b, i32 2
879  %f = insertelement <4 x i32> %e, i32 %b, i32 3
880  ret <4 x i32> %f
881}
882
883define <8 x i8> @getl(<16 x i8> %x) #0 {
884; CHECK-LABEL: getl:
885; CHECK: ret
886  %vecext = extractelement <16 x i8> %x, i32 0
887  %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
888  %vecext1 = extractelement <16 x i8> %x, i32 1
889  %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
890  %vecext3 = extractelement <16 x i8> %x, i32 2
891  %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
892  %vecext5 = extractelement <16 x i8> %x, i32 3
893  %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
894  %vecext7 = extractelement <16 x i8> %x, i32 4
895  %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
896  %vecext9 = extractelement <16 x i8> %x, i32 5
897  %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
898  %vecext11 = extractelement <16 x i8> %x, i32 6
899  %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
900  %vecext13 = extractelement <16 x i8> %x, i32 7
901  %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
902  ret <8 x i8> %vecinit14
903}
904
905; CHECK-LABEL: test_extracts_inserts_varidx_extract:
906; CHECK: str q0
907; CHECK: add x[[PTR:[0-9]+]], {{.*}}, w0, sxtw #1
908; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], [x[[PTR]]]
909; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
910; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
911; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
912define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
913  %tmp = extractelement <8 x i16> %x, i32 %idx
914  %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
915  %tmp3 = extractelement <8 x i16> %x, i32 1
916  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
917  %tmp5 = extractelement <8 x i16> %x, i32 2
918  %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
919  %tmp7 = extractelement <8 x i16> %x, i32 3
920  %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
921  ret <4 x i16> %tmp8
922}
923
924; CHECK-LABEL: test_extracts_inserts_varidx_insert:
925; CHECK: str h0, [{{.*}}, w0, sxtw #1]
926; CHECK-DAG: ldr d[[R:[0-9]+]]
927; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
928; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
929; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
930define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
931  %tmp = extractelement <8 x i16> %x, i32 0
932  %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
933  %tmp3 = extractelement <8 x i16> %x, i32 1
934  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
935  %tmp5 = extractelement <8 x i16> %x, i32 2
936  %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
937  %tmp7 = extractelement <8 x i16> %x, i32 3
938  %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
939  ret <4 x i16> %tmp8
940}
941
942define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
943; CHECK-LABEL: test_dup_v2i32_v4i16:
944; CHECK: dup v0.4h, v0.h[2]
945entry:
946  %x = extractelement <2 x i32> %a, i32 1
947  %vget_lane = trunc i32 %x to i16
948  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
949  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
950  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
951  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
952  ret <4 x i16> %vecinit3.i
953}
954
955define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
956; CHECK-LABEL: test_dup_v4i32_v8i16:
957; CHECK: dup v0.8h, v0.h[6]
958entry:
959  %x = extractelement <4 x i32> %a, i32 3
960  %vget_lane = trunc i32 %x to i16
961  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
962  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
963  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
964  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
965  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
966  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
967  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
968  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
969  ret <8 x i16> %vecinit7.i
970}
971
972define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
973; CHECK-LABEL: test_dup_v1i64_v4i16:
974; CHECK: dup v0.4h, v0.h[0]
975entry:
976  %x = extractelement <1 x i64> %a, i32 0
977  %vget_lane = trunc i64 %x to i16
978  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
979  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
980  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
981  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
982  ret <4 x i16> %vecinit3.i
983}
984
985define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
986; CHECK-LABEL: test_dup_v1i64_v2i32:
987; CHECK: dup v0.2s, v0.s[0]
988entry:
989  %x = extractelement <1 x i64> %a, i32 0
990  %vget_lane = trunc i64 %x to i32
991  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
992  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
993  ret <2 x i32> %vecinit1.i
994}
995
996define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
997; CHECK-LABEL: test_dup_v2i64_v8i16:
998; CHECK: dup v0.8h, v0.h[4]
999entry:
1000  %x = extractelement <2 x i64> %a, i32 1
1001  %vget_lane = trunc i64 %x to i16
1002  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1003  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1004  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1005  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1006  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1007  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1008  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1009  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1010  ret <8 x i16> %vecinit7.i
1011}
1012
1013define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1014; CHECK-LABEL: test_dup_v2i64_v4i32:
1015; CHECK: dup v0.4s, v0.s[2]
1016entry:
1017  %x = extractelement <2 x i64> %a, i32 1
1018  %vget_lane = trunc i64 %x to i32
1019  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1020  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1021  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1022  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1023  ret <4 x i32> %vecinit3.i
1024}
1025
1026define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1027; CHECK-LABEL: test_dup_v4i32_v4i16:
1028; CHECK: dup v0.4h, v0.h[2]
1029entry:
1030  %x = extractelement <4 x i32> %a, i32 1
1031  %vget_lane = trunc i32 %x to i16
1032  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1033  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1034  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1035  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1036  ret <4 x i16> %vecinit3.i
1037}
1038
1039define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1040; CHECK-LABEL: test_dup_v2i64_v4i16:
1041; CHECK: dup v0.4h, v0.h[0]
1042entry:
1043  %x = extractelement <2 x i64> %a, i32 0
1044  %vget_lane = trunc i64 %x to i16
1045  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1046  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1047  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1048  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1049  ret <4 x i16> %vecinit3.i
1050}
1051
1052define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1053; CHECK-LABEL: test_dup_v2i64_v2i32:
1054; CHECK: dup v0.2s, v0.s[0]
1055entry:
1056  %x = extractelement <2 x i64> %a, i32 0
1057  %vget_lane = trunc i64 %x to i32
1058  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1059  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1060  ret <2 x i32> %vecinit1.i
1061}
1062
1063
1064define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1065; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1066; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1067; CHECK-NEXT: ret
1068entry:
1069  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1070  %1 = insertelement <1 x float> undef, float %0, i32 0
1071  %2 = extractelement <1 x float> %1, i32 0
1072  %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1073  ret <2 x float> %vecinit1.i
1074}
1075
1076define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1077; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1078; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1079; CHECK-NEXT: ret
1080entry:
1081  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1082  %1 = insertelement <1 x float> undef, float %0, i32 0
1083  %2 = extractelement <1 x float> %1, i32 0
1084  %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1085  ret <4 x float> %vecinit1.i
1086}
1087
1088declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1089
1090define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1091; CHECK-LABEL: test_concat_undef_v1i32:
1092; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1093entry:
1094  %0 = extractelement <2 x i32> %a, i32 0
1095  %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1096  ret <2 x i32> %vecinit1.i
1097}
1098
1099declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1100
1101define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1102; CHECK-LABEL: test_concat_v1i32_undef:
1103; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1104; CHECK-NEXT: ret
1105entry:
1106  %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1107  %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1108  ret <2 x i32> %vecinit.i432
1109}
1110
1111define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1112; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1113; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
1114entry:
1115  %0 = extractelement <2 x i32> %a, i32 0
1116  %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1117  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1118  ret <2 x i32> %vecinit1.i
1119}
1120
1121define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1122; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1123; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1124; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1125; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
1126entry:
1127  %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1128  %d = insertelement <2 x i32> undef, i32 %c, i32 0
1129  %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1130  %f = insertelement <2 x i32> undef, i32 %e, i32 0
1131  %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1132  ret <2 x i32> %h
1133}
1134
1135define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1136; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1137; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1138entry:
1139  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1140  ret <16 x i8> %vecinit30
1141}
1142
1143define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1144; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1145; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1146entry:
1147  %vecext = extractelement <8 x i8> %x, i32 0
1148  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1149  %vecext1 = extractelement <8 x i8> %x, i32 1
1150  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1151  %vecext3 = extractelement <8 x i8> %x, i32 2
1152  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1153  %vecext5 = extractelement <8 x i8> %x, i32 3
1154  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1155  %vecext7 = extractelement <8 x i8> %x, i32 4
1156  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1157  %vecext9 = extractelement <8 x i8> %x, i32 5
1158  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1159  %vecext11 = extractelement <8 x i8> %x, i32 6
1160  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1161  %vecext13 = extractelement <8 x i8> %x, i32 7
1162  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1163  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1164  ret <16 x i8> %vecinit30
1165}
1166
1167define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1168; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1169; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1170entry:
1171  %vecext = extractelement <16 x i8> %x, i32 0
1172  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1173  %vecext1 = extractelement <16 x i8> %x, i32 1
1174  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1175  %vecext3 = extractelement <16 x i8> %x, i32 2
1176  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1177  %vecext5 = extractelement <16 x i8> %x, i32 3
1178  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1179  %vecext7 = extractelement <16 x i8> %x, i32 4
1180  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1181  %vecext9 = extractelement <16 x i8> %x, i32 5
1182  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1183  %vecext11 = extractelement <16 x i8> %x, i32 6
1184  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1185  %vecext13 = extractelement <16 x i8> %x, i32 7
1186  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1187  %vecext15 = extractelement <8 x i8> %y, i32 0
1188  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1189  %vecext17 = extractelement <8 x i8> %y, i32 1
1190  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1191  %vecext19 = extractelement <8 x i8> %y, i32 2
1192  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1193  %vecext21 = extractelement <8 x i8> %y, i32 3
1194  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1195  %vecext23 = extractelement <8 x i8> %y, i32 4
1196  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1197  %vecext25 = extractelement <8 x i8> %y, i32 5
1198  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1199  %vecext27 = extractelement <8 x i8> %y, i32 6
1200  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1201  %vecext29 = extractelement <8 x i8> %y, i32 7
1202  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1203  ret <16 x i8> %vecinit30
1204}
1205
1206define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1207; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1208; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1209entry:
1210  %vecext = extractelement <8 x i8> %x, i32 0
1211  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1212  %vecext1 = extractelement <8 x i8> %x, i32 1
1213  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1214  %vecext3 = extractelement <8 x i8> %x, i32 2
1215  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1216  %vecext5 = extractelement <8 x i8> %x, i32 3
1217  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1218  %vecext7 = extractelement <8 x i8> %x, i32 4
1219  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1220  %vecext9 = extractelement <8 x i8> %x, i32 5
1221  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1222  %vecext11 = extractelement <8 x i8> %x, i32 6
1223  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1224  %vecext13 = extractelement <8 x i8> %x, i32 7
1225  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1226  %vecext15 = extractelement <8 x i8> %y, i32 0
1227  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1228  %vecext17 = extractelement <8 x i8> %y, i32 1
1229  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1230  %vecext19 = extractelement <8 x i8> %y, i32 2
1231  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1232  %vecext21 = extractelement <8 x i8> %y, i32 3
1233  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1234  %vecext23 = extractelement <8 x i8> %y, i32 4
1235  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1236  %vecext25 = extractelement <8 x i8> %y, i32 5
1237  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1238  %vecext27 = extractelement <8 x i8> %y, i32 6
1239  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1240  %vecext29 = extractelement <8 x i8> %y, i32 7
1241  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1242  ret <16 x i8> %vecinit30
1243}
1244
1245define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1246; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1247; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1248entry:
1249  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1250  ret <8 x i16> %vecinit14
1251}
1252
1253define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1254; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1255; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1256entry:
1257  %vecext = extractelement <4 x i16> %x, i32 0
1258  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1259  %vecext1 = extractelement <4 x i16> %x, i32 1
1260  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1261  %vecext3 = extractelement <4 x i16> %x, i32 2
1262  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1263  %vecext5 = extractelement <4 x i16> %x, i32 3
1264  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1265  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1266  ret <8 x i16> %vecinit14
1267}
1268
1269define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1270; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1271; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1272entry:
1273  %vecext = extractelement <8 x i16> %x, i32 0
1274  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1275  %vecext1 = extractelement <8 x i16> %x, i32 1
1276  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1277  %vecext3 = extractelement <8 x i16> %x, i32 2
1278  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1279  %vecext5 = extractelement <8 x i16> %x, i32 3
1280  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1281  %vecext7 = extractelement <4 x i16> %y, i32 0
1282  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1283  %vecext9 = extractelement <4 x i16> %y, i32 1
1284  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1285  %vecext11 = extractelement <4 x i16> %y, i32 2
1286  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1287  %vecext13 = extractelement <4 x i16> %y, i32 3
1288  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1289  ret <8 x i16> %vecinit14
1290}
1291
1292define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1293; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1294; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1295entry:
1296  %vecext = extractelement <4 x i16> %x, i32 0
1297  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1298  %vecext1 = extractelement <4 x i16> %x, i32 1
1299  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1300  %vecext3 = extractelement <4 x i16> %x, i32 2
1301  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1302  %vecext5 = extractelement <4 x i16> %x, i32 3
1303  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1304  %vecext7 = extractelement <4 x i16> %y, i32 0
1305  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1306  %vecext9 = extractelement <4 x i16> %y, i32 1
1307  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1308  %vecext11 = extractelement <4 x i16> %y, i32 2
1309  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1310  %vecext13 = extractelement <4 x i16> %y, i32 3
1311  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1312  ret <8 x i16> %vecinit14
1313}
1314
1315define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1316; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1317; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1318entry:
1319  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1320  ret <4 x i32> %vecinit6
1321}
1322
1323define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1324; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1325; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1326entry:
1327  %vecext = extractelement <2 x i32> %x, i32 0
1328  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1329  %vecext1 = extractelement <2 x i32> %x, i32 1
1330  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1331  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1332  ret <4 x i32> %vecinit6
1333}
1334
1335define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1336; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1337; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1338entry:
1339  %vecext = extractelement <4 x i32> %x, i32 0
1340  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1341  %vecext1 = extractelement <4 x i32> %x, i32 1
1342  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1343  %vecext3 = extractelement <2 x i32> %y, i32 0
1344  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1345  %vecext5 = extractelement <2 x i32> %y, i32 1
1346  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1347  ret <4 x i32> %vecinit6
1348}
1349
1350define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1351; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1352; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1353entry:
1354  %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1355  ret <4 x i32> %vecinit6
1356}
1357
1358define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1359; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1360; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1361entry:
1362  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1363  ret <2 x i64> %vecinit2
1364}
1365
1366define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1367; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1368; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1369entry:
1370  %vecext = extractelement <1 x i64> %x, i32 0
1371  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1372  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1373  ret <2 x i64> %vecinit2
1374}
1375
1376define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1377; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1378; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1379entry:
1380  %vecext = extractelement <2 x i64> %x, i32 0
1381  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1382  %vecext1 = extractelement <1 x i64> %y, i32 0
1383  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1384  ret <2 x i64> %vecinit2
1385}
1386
1387define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1388; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1389; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1390entry:
1391  %vecext = extractelement <1 x i64> %x, i32 0
1392  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1393  %vecext1 = extractelement <1 x i64> %y, i32 0
1394  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1395  ret <2 x i64> %vecinit2
1396}
1397
1398
1399define <4 x i16> @concat_vector_v4i16_const() {
1400; CHECK-LABEL: concat_vector_v4i16_const:
1401; CHECK: movi {{d[0-9]+}}, #0
1402 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1403 ret <4 x i16> %r
1404}
1405
1406define <4 x i16> @concat_vector_v4i16_const_one() {
1407; CHECK-LABEL: concat_vector_v4i16_const_one:
1408; CHECK: movi {{v[0-9]+}}.4h, #1
1409 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1410 ret <4 x i16> %r
1411}
1412
1413define <4 x i32> @concat_vector_v4i32_const() {
1414; CHECK-LABEL: concat_vector_v4i32_const:
1415; CHECK: movi {{v[0-9]+}}.2d, #0
1416 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1417 ret <4 x i32> %r
1418}
1419
1420define <8 x i8> @concat_vector_v8i8_const() {
1421; CHECK-LABEL: concat_vector_v8i8_const:
1422; CHECK: movi {{d[0-9]+}}, #0
1423 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1424 ret <8 x i8> %r
1425}
1426
1427define <8 x i16> @concat_vector_v8i16_const() {
1428; CHECK-LABEL: concat_vector_v8i16_const:
1429; CHECK: movi {{v[0-9]+}}.2d, #0
1430 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1431 ret <8 x i16> %r
1432}
1433
1434define <8 x i16> @concat_vector_v8i16_const_one() {
1435; CHECK-LABEL: concat_vector_v8i16_const_one:
1436; CHECK: movi {{v[0-9]+}}.8h, #1
1437 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1438 ret <8 x i16> %r
1439}
1440
1441define <16 x i8> @concat_vector_v16i8_const() {
1442; CHECK-LABEL: concat_vector_v16i8_const:
1443; CHECK: movi {{v[0-9]+}}.2d, #0
1444 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1445 ret <16 x i8> %r
1446}
1447
1448define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1449; CHECK-LABEL: concat_vector_v4i16:
1450; CHECK: dup v0.4h, v0.h[0]
1451 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1452 ret <4 x i16> %r
1453}
1454
1455define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1456; CHECK-LABEL: concat_vector_v4i32:
1457; CHECK: dup v0.4s, v0.s[0]
1458 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1459 ret <4 x i32> %r
1460}
1461
1462define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1463; CHECK-LABEL: concat_vector_v8i8:
1464; CHECK: dup v0.8b, v0.b[0]
1465 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1466 ret <8 x i8> %r
1467}
1468
1469define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1470; CHECK-LABEL: concat_vector_v8i16:
1471; CHECK: dup v0.8h, v0.h[0]
1472 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1473 ret <8 x i16> %r
1474}
1475
1476define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1477; CHECK-LABEL: concat_vector_v16i8:
1478; CHECK: dup v0.16b, v0.b[0]
1479 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
1480 ret <16 x i8> %r
1481}
1482