1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2
3
4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5; CHECK-LABEL: ins16bw:
6; CHECK: mov {{v[0-9]+}}.b[15], {{w[0-9]+}}
7  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
8  ret <16 x i8> %tmp3
9}
10
11define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
12; CHECK-LABEL: ins8hw:
13; CHECK: mov {{v[0-9]+}}.h[6], {{w[0-9]+}}
14  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
15  ret <8 x i16> %tmp3
16}
17
18define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
19; CHECK-LABEL: ins4sw:
20; CHECK: mov {{v[0-9]+}}.s[2], {{w[0-9]+}}
21  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
22  ret <4 x i32> %tmp3
23}
24
25define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
26; CHECK-LABEL: ins2dw:
27; CHECK: mov {{v[0-9]+}}.d[1], {{x[0-9]+}}
28  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
29  ret <2 x i64> %tmp3
30}
31
32define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
33; CHECK-LABEL: ins8bw:
34; CHECK: mov {{v[0-9]+}}.b[5], {{w[0-9]+}}
35  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
36  ret <8 x i8> %tmp3
37}
38
39define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
40; CHECK-LABEL: ins4hw:
41; CHECK: mov {{v[0-9]+}}.h[3], {{w[0-9]+}}
42  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
43  ret <4 x i16> %tmp3
44}
45
46define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
47; CHECK-LABEL: ins2sw:
48; CHECK: mov {{v[0-9]+}}.s[1], {{w[0-9]+}}
49  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
50  ret <2 x i32> %tmp3
51}
52
53define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
54; CHECK-LABEL: ins16b16:
55; CHECK: mov {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
56  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
57  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
58  ret <16 x i8> %tmp4
59}
60
61define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
62; CHECK-LABEL: ins8h8:
63; CHECK: mov {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
64  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
65  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
66  ret <8 x i16> %tmp4
67}
68
69define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
70; CHECK-LABEL: ins4s4:
71; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
72  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
73  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
74  ret <4 x i32> %tmp4
75}
76
77define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
78; CHECK-LABEL: ins2d2:
79; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
80  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
81  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
82  ret <2 x i64> %tmp4
83}
84
85define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
86; CHECK-LABEL: ins4f4:
87; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
88  %tmp3 = extractelement <4 x float> %tmp1, i32 2
89  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
90  ret <4 x float> %tmp4
91}
92
93define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
94; CHECK-LABEL: ins2df2:
95; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
96  %tmp3 = extractelement <2 x double> %tmp1, i32 0
97  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
98  ret <2 x double> %tmp4
99}
100
101define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
102; CHECK-LABEL: ins8b16:
103; CHECK: mov {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
104  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
105  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
106  ret <16 x i8> %tmp4
107}
108
109define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
110; CHECK-LABEL: ins4h8:
111; CHECK: mov {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
112  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
113  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
114  ret <8 x i16> %tmp4
115}
116
117define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
118; CHECK-LABEL: ins2s4:
119; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
120  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
121  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
122  ret <4 x i32> %tmp4
123}
124
125define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
126; CHECK-LABEL: ins1d2:
127; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
128  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
129  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
130  ret <2 x i64> %tmp4
131}
132
133define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
134; CHECK-LABEL: ins2f4:
135; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
136  %tmp3 = extractelement <2 x float> %tmp1, i32 1
137  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
138  ret <4 x float> %tmp4
139}
140
141define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
142; CHECK-LABEL: ins1f2:
143; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
144  %tmp3 = extractelement <1 x double> %tmp1, i32 0
145  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
146  ret <2 x double> %tmp4
147}
148
149define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
150; CHECK-LABEL: ins16b8:
151; CHECK: mov {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
152  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
153  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
154  ret <8 x i8> %tmp4
155}
156
157define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
158; CHECK-LABEL: ins8h4:
159; CHECK: mov {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
160  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
161  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
162  ret <4 x i16> %tmp4
163}
164
165define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
166; CHECK-LABEL: ins4s2:
167; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
168  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
169  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
170  ret <2 x i32> %tmp4
171}
172
173define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
174; CHECK-LABEL: ins2d1:
175; CHECK: mov {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
176  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
177  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
178  ret <1 x i64> %tmp4
179}
180
181define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
182; CHECK-LABEL: ins4f2:
183; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
184  %tmp3 = extractelement <4 x float> %tmp1, i32 2
185  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
186  ret <2 x float> %tmp4
187}
188
189define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
190; CHECK-LABEL: ins2f1:
191; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
192  %tmp3 = extractelement <2 x double> %tmp1, i32 1
193  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
194  ret <1 x double> %tmp4
195}
196
197define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
198; CHECK-LABEL: ins8b8:
199; CHECK: mov {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
200  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
201  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
202  ret <8 x i8> %tmp4
203}
204
205define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
206; CHECK-LABEL: ins4h4:
207; CHECK: mov {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
208  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
209  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
210  ret <4 x i16> %tmp4
211}
212
213define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
214; CHECK-LABEL: ins2s2:
215; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
216  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
217  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
218  ret <2 x i32> %tmp4
219}
220
221define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
222; CHECK-LABEL: ins1d1:
223; CHECK: mov {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
224  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
225  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
226  ret <1 x i64> %tmp4
227}
228
229define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
230; CHECK-LABEL: ins2f2:
231; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
232  %tmp3 = extractelement <2 x float> %tmp1, i32 0
233  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
234  ret <2 x float> %tmp4
235}
236
237define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
238; CHECK-LABEL: ins1df1:
239; CHECK-NOT: mov {{v[0-9]+}}
240  %tmp3 = extractelement <1 x double> %tmp1, i32 0
241  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
242  ret <1 x double> %tmp4
243}
244
245define i32 @umovw16b(<16 x i8> %tmp1) {
246; CHECK-LABEL: umovw16b:
247; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
248  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
249  %tmp4 = zext i8 %tmp3 to i32
250  ret i32 %tmp4
251}
252
253define i32 @umovw8h(<8 x i16> %tmp1) {
254; CHECK-LABEL: umovw8h:
255; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
256  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
257  %tmp4 = zext i16 %tmp3 to i32
258  ret i32 %tmp4
259}
260
261define i32 @umovw4s(<4 x i32> %tmp1) {
262; CHECK-LABEL: umovw4s:
263; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
264  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
265  ret i32 %tmp3
266}
267
268define i64 @umovx2d(<2 x i64> %tmp1) {
269; CHECK-LABEL: umovx2d:
270; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
271  %tmp3 = extractelement <2 x i64> %tmp1, i32 1
272  ret i64 %tmp3
273}
274
275define i32 @umovw8b(<8 x i8> %tmp1) {
276; CHECK-LABEL: umovw8b:
277; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
278  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
279  %tmp4 = zext i8 %tmp3 to i32
280  ret i32 %tmp4
281}
282
283define i32 @umovw4h(<4 x i16> %tmp1) {
284; CHECK-LABEL: umovw4h:
285; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
286  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
287  %tmp4 = zext i16 %tmp3 to i32
288  ret i32 %tmp4
289}
290
291define i32 @umovw2s(<2 x i32> %tmp1) {
292; CHECK-LABEL: umovw2s:
293; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
294  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
295  ret i32 %tmp3
296}
297
298define i64 @umovx1d(<1 x i64> %tmp1) {
299; CHECK-LABEL: umovx1d:
300; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
301  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
302  ret i64 %tmp3
303}
304
305define i32 @smovw16b(<16 x i8> %tmp1) {
306; CHECK-LABEL: smovw16b:
307; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
308  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
309  %tmp4 = sext i8 %tmp3 to i32
310  %tmp5 = add i32 %tmp4, %tmp4
311  ret i32 %tmp5
312}
313
314define i32 @smovw8h(<8 x i16> %tmp1) {
315; CHECK-LABEL: smovw8h:
316; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
317  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
318  %tmp4 = sext i16 %tmp3 to i32
319  %tmp5 = add i32 %tmp4, %tmp4
320  ret i32 %tmp5
321}
322
323define i64 @smovx16b(<16 x i8> %tmp1) {
324; CHECK-LABEL: smovx16b:
325; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
326  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
327  %tmp4 = sext i8 %tmp3 to i64
328  ret i64 %tmp4
329}
330
331define i64 @smovx8h(<8 x i16> %tmp1) {
332; CHECK-LABEL: smovx8h:
333; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
334  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
335  %tmp4 = sext i16 %tmp3 to i64
336  ret i64 %tmp4
337}
338
339define i64 @smovx4s(<4 x i32> %tmp1) {
340; CHECK-LABEL: smovx4s:
341; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
342  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
343  %tmp4 = sext i32 %tmp3 to i64
344  ret i64 %tmp4
345}
346
347define i32 @smovw8b(<8 x i8> %tmp1) {
348; CHECK-LABEL: smovw8b:
349; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
350  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
351  %tmp4 = sext i8 %tmp3 to i32
352  %tmp5 = add i32 %tmp4, %tmp4
353  ret i32 %tmp5
354}
355
356define i32 @smovw4h(<4 x i16> %tmp1) {
357; CHECK-LABEL: smovw4h:
358; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
359  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
360  %tmp4 = sext i16 %tmp3 to i32
361  %tmp5 = add i32 %tmp4, %tmp4
362  ret i32 %tmp5
363}
364
365define i32 @smovx8b(<8 x i8> %tmp1) {
366; CHECK-LABEL: smovx8b:
367; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
368  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
369  %tmp4 = sext i8 %tmp3 to i32
370  ret i32 %tmp4
371}
372
373define i32 @smovx4h(<4 x i16> %tmp1) {
374; CHECK-LABEL: smovx4h:
375; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
376  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
377  %tmp4 = sext i16 %tmp3 to i32
378  ret i32 %tmp4
379}
380
381define i64 @smovx2s(<2 x i32> %tmp1) {
382; CHECK-LABEL: smovx2s:
383; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
384  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
385  %tmp4 = sext i32 %tmp3 to i64
386  ret i64 %tmp4
387}
388
389define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
390; CHECK-LABEL: test_vcopy_lane_s8:
391; CHECK: mov  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
392  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
393  ret <8 x i8> %vset_lane
394}
395
396define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
397; CHECK-LABEL: test_vcopyq_laneq_s8:
398; CHECK: mov  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
399  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
400  ret <16 x i8> %vset_lane
401}
402
403define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
404; CHECK-LABEL: test_vcopy_lane_swap_s8:
405; CHECK: mov {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
406  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
407  ret <8 x i8> %vset_lane
408}
409
410define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
411; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
412; CHECK: mov {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
413  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
414  ret <16 x i8> %vset_lane
415}
416
417define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
418; CHECK-LABEL: test_vdup_n_u8:
419; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
420  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
421  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
422  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
423  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
424  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
425  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
426  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
427  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
428  ret <8 x i8> %vecinit7.i
429}
430
431define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
432; CHECK-LABEL: test_vdup_n_u16:
433; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
434  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
435  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
436  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
437  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
438  ret <4 x i16> %vecinit3.i
439}
440
441define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
442; CHECK-LABEL: test_vdup_n_u32:
443; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
444  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
445  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
446  ret <2 x i32> %vecinit1.i
447}
448
449define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
450; CHECK-LABEL: test_vdup_n_u64:
451; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
452  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
453  ret <1 x i64> %vecinit.i
454}
455
456define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
457; CHECK-LABEL: test_vdupq_n_u8:
458; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
459  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
460  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
461  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
462  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
463  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
464  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
465  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
466  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
467  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
468  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
469  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
470  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
471  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
472  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
473  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
474  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
475  ret <16 x i8> %vecinit15.i
476}
477
478define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
479; CHECK-LABEL: test_vdupq_n_u16:
480; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
481  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
482  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
483  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
484  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
485  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
486  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
487  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
488  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
489  ret <8 x i16> %vecinit7.i
490}
491
492define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
493; CHECK-LABEL: test_vdupq_n_u32:
494; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
495  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
496  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
497  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
498  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
499  ret <4 x i32> %vecinit3.i
500}
501
502define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
503; CHECK-LABEL: test_vdupq_n_u64:
504; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
505  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
506  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
507  ret <2 x i64> %vecinit1.i
508}
509
510define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
511; CHECK-LABEL: test_vdup_lane_s8:
512; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
513  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
514  ret <8 x i8> %shuffle
515}
516
517define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
518; CHECK-LABEL: test_vdup_lane_s16:
519; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
520  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
521  ret <4 x i16> %shuffle
522}
523
524define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
525; CHECK-LABEL: test_vdup_lane_s32:
526; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
527  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
528  ret <2 x i32> %shuffle
529}
530
531define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
532; CHECK-LABEL: test_vdupq_lane_s8:
533; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
534  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
535  ret <16 x i8> %shuffle
536}
537
538define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
539; CHECK-LABEL: test_vdupq_lane_s16:
540; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
541  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
542  ret <8 x i16> %shuffle
543}
544
545define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
546; CHECK-LABEL: test_vdupq_lane_s32:
547; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
548  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
549  ret <4 x i32> %shuffle
550}
551
552define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
553; CHECK-LABEL: test_vdupq_lane_s64:
554; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
555  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
556  ret <2 x i64> %shuffle
557}
558
559define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
560; CHECK-LABEL: test_vdup_laneq_s8:
561; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
562  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
563  ret <8 x i8> %shuffle
564}
565
566define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
567; CHECK-LABEL: test_vdup_laneq_s16:
568; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
569  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
570  ret <4 x i16> %shuffle
571}
572
573define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
574; CHECK-LABEL: test_vdup_laneq_s32:
575; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
576  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
577  ret <2 x i32> %shuffle
578}
579
580define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
581; CHECK-LABEL: test_vdupq_laneq_s8:
582; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
583  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
584  ret <16 x i8> %shuffle
585}
586
587define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
588; CHECK-LABEL: test_vdupq_laneq_s16:
589; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
590  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
591  ret <8 x i16> %shuffle
592}
593
594define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
595; CHECK-LABEL: test_vdupq_laneq_s32:
596; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
597  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
598  ret <4 x i32> %shuffle
599}
600
601define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
602; CHECK-LABEL: test_vdupq_laneq_s64:
603; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
604  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
605  ret <2 x i64> %shuffle
606}
607
608define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
609; CHECK-LABEL: test_bitcastv8i8toi64:
610   %res = bitcast <8 x i8> %in to i64
611; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
612   ret i64 %res
613}
614
615define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
616; CHECK-LABEL: test_bitcastv4i16toi64:
617   %res = bitcast <4 x i16> %in to i64
618; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
619   ret i64 %res
620}
621
622define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
623; CHECK-LABEL: test_bitcastv2i32toi64:
624   %res = bitcast <2 x i32> %in to i64
625; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
626   ret i64 %res
627}
628
629define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
630; CHECK-LABEL: test_bitcastv2f32toi64:
631   %res = bitcast <2 x float> %in to i64
632; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
633   ret i64 %res
634}
635
636define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
637; CHECK-LABEL: test_bitcastv1i64toi64:
638   %res = bitcast <1 x i64> %in to i64
639; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
640   ret i64 %res
641}
642
643define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
644; CHECK-LABEL: test_bitcastv1f64toi64:
645   %res = bitcast <1 x double> %in to i64
646; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
647   ret i64 %res
648}
649
650define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
651; CHECK-LABEL: test_bitcasti64tov8i8:
652   %res = bitcast i64 %in to <8 x i8>
653; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
654   ret <8 x i8> %res
655}
656
657define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
658; CHECK-LABEL: test_bitcasti64tov4i16:
659   %res = bitcast i64 %in to <4 x i16>
660; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
661   ret <4 x i16> %res
662}
663
664define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
665; CHECK-LABEL: test_bitcasti64tov2i32:
666   %res = bitcast i64 %in to <2 x i32>
667; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
668   ret <2 x i32> %res
669}
670
671define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
672; CHECK-LABEL: test_bitcasti64tov2f32:
673   %res = bitcast i64 %in to <2 x float>
674; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
675   ret <2 x float> %res
676}
677
678define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
679; CHECK-LABEL: test_bitcasti64tov1i64:
680   %res = bitcast i64 %in to <1 x i64>
681; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
682   ret <1 x i64> %res
683}
684
685define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
686; CHECK-LABEL: test_bitcasti64tov1f64:
687   %res = bitcast i64 %in to <1 x double>
688; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
689   ret <1 x double> %res
690}
691
692define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
693; CHECK-LABEL: test_bitcastv8i8tov1f64:
694; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
695; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
696  %sub.i = sub <8 x i8> zeroinitializer, %a
697  %1 = bitcast <8 x i8> %sub.i to <1 x double>
698  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
699  ret <1 x i64> %vcvt.i
700}
701
702define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
703; CHECK-LABEL: test_bitcastv4i16tov1f64:
704; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
705; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
706  %sub.i = sub <4 x i16> zeroinitializer, %a
707  %1 = bitcast <4 x i16> %sub.i to <1 x double>
708  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
709  ret <1 x i64> %vcvt.i
710}
711
712define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
713; CHECK-LABEL: test_bitcastv2i32tov1f64:
714; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
715; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
716  %sub.i = sub <2 x i32> zeroinitializer, %a
717  %1 = bitcast <2 x i32> %sub.i to <1 x double>
718  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
719  ret <1 x i64> %vcvt.i
720}
721
722define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
723; CHECK-LABEL: test_bitcastv1i64tov1f64:
724; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
725; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
726  %sub.i = sub <1 x i64> zeroinitializer, %a
727  %1 = bitcast <1 x i64> %sub.i to <1 x double>
728  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
729  ret <1 x i64> %vcvt.i
730}
731
732define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
733; CHECK-LABEL: test_bitcastv2f32tov1f64:
734; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
735; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
736  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
737  %1 = bitcast <2 x float> %sub.i to <1 x double>
738  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
739  ret <1 x i64> %vcvt.i
740}
741
742define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
743; CHECK-LABEL: test_bitcastv1f64tov8i8:
744; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
745; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
746  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
747  %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
748  %sub.i = sub <8 x i8> zeroinitializer, %1
749  ret <8 x i8> %sub.i
750}
751
752define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
753; CHECK-LABEL: test_bitcastv1f64tov4i16:
754; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
755; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
756  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
757  %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
758  %sub.i = sub <4 x i16> zeroinitializer, %1
759  ret <4 x i16> %sub.i
760}
761
762define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
763; CHECK-LABEL: test_bitcastv1f64tov2i32:
764; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
765; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
766  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
767  %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
768  %sub.i = sub <2 x i32> zeroinitializer, %1
769  ret <2 x i32> %sub.i
770}
771
772define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
773; CHECK-LABEL: test_bitcastv1f64tov1i64:
774; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
775; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
776  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
777  %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
778  %sub.i = sub <1 x i64> zeroinitializer, %1
779  ret <1 x i64> %sub.i
780}
781
782define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
783; CHECK-LABEL: test_bitcastv1f64tov2f32:
784; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
785; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
786  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
787  %1 = bitcast <1 x double> %vcvt.i to <2 x float>
788  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
789  ret <2 x float> %sub.i
790}
791
792; Test insert element into an undef vector
793define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
794; CHECK-LABEL: scalar_to_vector.v8i8:
795; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
796  %b = insertelement <8 x i8> undef, i8 %a, i32 0
797  ret <8 x i8> %b
798}
799
800define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
801; CHECK-LABEL: scalar_to_vector.v16i8:
802; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
803  %b = insertelement <16 x i8> undef, i8 %a, i32 0
804  ret <16 x i8> %b
805}
806
807define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
808; CHECK-LABEL: scalar_to_vector.v4i16:
809; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
810  %b = insertelement <4 x i16> undef, i16 %a, i32 0
811  ret <4 x i16> %b
812}
813
814define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
815; CHECK-LABEL: scalar_to_vector.v8i16:
816; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
817  %b = insertelement <8 x i16> undef, i16 %a, i32 0
818  ret <8 x i16> %b
819}
820
821define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
822; CHECK-LABEL: scalar_to_vector.v2i32:
823; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
824  %b = insertelement <2 x i32> undef, i32 %a, i32 0
825  ret <2 x i32> %b
826}
827
828define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
829; CHECK-LABEL: scalar_to_vector.v4i32:
830; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
831  %b = insertelement <4 x i32> undef, i32 %a, i32 0
832  ret <4 x i32> %b
833}
834
835define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
836; CHECK-LABEL: scalar_to_vector.v2i64:
837; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
838  %b = insertelement <2 x i64> undef, i64 %a, i32 0
839  ret <2 x i64> %b
840}
841
842define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
843; CHECK-LABEL: testDUP.v1i8:
844; CHECK: dup v0.8b, v0.b[0]
845  %b = extractelement <1 x i8> %a, i32 0
846  %c = insertelement <8 x i8> undef, i8 %b, i32 0
847  %d = insertelement <8 x i8> %c, i8 %b, i32 1
848  %e = insertelement <8 x i8> %d, i8 %b, i32 2
849  %f = insertelement <8 x i8> %e, i8 %b, i32 3
850  %g = insertelement <8 x i8> %f, i8 %b, i32 4
851  %h = insertelement <8 x i8> %g, i8 %b, i32 5
852  %i = insertelement <8 x i8> %h, i8 %b, i32 6
853  %j = insertelement <8 x i8> %i, i8 %b, i32 7
854  ret <8 x i8> %j
855}
856
857define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
858; CHECK-LABEL: testDUP.v1i16:
859; CHECK: dup v0.8h, v0.h[0]
860  %b = extractelement <1 x i16> %a, i32 0
861  %c = insertelement <8 x i16> undef, i16 %b, i32 0
862  %d = insertelement <8 x i16> %c, i16 %b, i32 1
863  %e = insertelement <8 x i16> %d, i16 %b, i32 2
864  %f = insertelement <8 x i16> %e, i16 %b, i32 3
865  %g = insertelement <8 x i16> %f, i16 %b, i32 4
866  %h = insertelement <8 x i16> %g, i16 %b, i32 5
867  %i = insertelement <8 x i16> %h, i16 %b, i32 6
868  %j = insertelement <8 x i16> %i, i16 %b, i32 7
869  ret <8 x i16> %j
870}
871
872define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
873; CHECK-LABEL: testDUP.v1i32:
874; CHECK: dup v0.4s, v0.s[0]
875  %b = extractelement <1 x i32> %a, i32 0
876  %c = insertelement <4 x i32> undef, i32 %b, i32 0
877  %d = insertelement <4 x i32> %c, i32 %b, i32 1
878  %e = insertelement <4 x i32> %d, i32 %b, i32 2
879  %f = insertelement <4 x i32> %e, i32 %b, i32 3
880  ret <4 x i32> %f
881}
882
883define <8 x i8> @getl(<16 x i8> %x) #0 {
884; CHECK-LABEL: getl:
885; CHECK: ret
886  %vecext = extractelement <16 x i8> %x, i32 0
887  %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
888  %vecext1 = extractelement <16 x i8> %x, i32 1
889  %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
890  %vecext3 = extractelement <16 x i8> %x, i32 2
891  %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
892  %vecext5 = extractelement <16 x i8> %x, i32 3
893  %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
894  %vecext7 = extractelement <16 x i8> %x, i32 4
895  %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
896  %vecext9 = extractelement <16 x i8> %x, i32 5
897  %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
898  %vecext11 = extractelement <16 x i8> %x, i32 6
899  %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
900  %vecext13 = extractelement <16 x i8> %x, i32 7
901  %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
902  ret <8 x i8> %vecinit14
903}
904
905; CHECK-LABEL: test_extracts_inserts_varidx_extract:
906; CHECK: str q0
907; CHECK-DAG: and [[MASKED_IDX:x[0-9]+]], x0, #0x7
908; CHECK: bfi [[PTR:x[0-9]+]], [[MASKED_IDX]], #1, #3
909; CHECK-DAG: ldr h[[R:[0-9]+]], {{\[}}[[PTR]]{{\]}}
910; CHECK-DAG: mov v[[R]].h[1], v0.h[1]
911; CHECK-DAG: mov v[[R]].h[2], v0.h[2]
912; CHECK-DAG: mov v[[R]].h[3], v0.h[3]
913define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
914  %tmp = extractelement <8 x i16> %x, i32 %idx
915  %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
916  %tmp3 = extractelement <8 x i16> %x, i32 1
917  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
918  %tmp5 = extractelement <8 x i16> %x, i32 2
919  %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
920  %tmp7 = extractelement <8 x i16> %x, i32 3
921  %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
922  ret <4 x i16> %tmp8
923}
924
925; CHECK-LABEL: test_extracts_inserts_varidx_insert:
926; CHECK: and [[MASKED_IDX:x[0-9]+]], x0, #0x3
927; CHECK: bfi x9, [[MASKED_IDX]], #1, #2
928; CHECK: str h0, [x9]
929; CHECK-DAG: ldr d[[R:[0-9]+]]
930; CHECK-DAG: mov v[[R]].h[1], v0.h[1]
931; CHECK-DAG: mov v[[R]].h[2], v0.h[2]
932; CHECK-DAG: mov v[[R]].h[3], v0.h[3]
933define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
934  %tmp = extractelement <8 x i16> %x, i32 0
935  %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
936  %tmp3 = extractelement <8 x i16> %x, i32 1
937  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
938  %tmp5 = extractelement <8 x i16> %x, i32 2
939  %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
940  %tmp7 = extractelement <8 x i16> %x, i32 3
941  %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
942  ret <4 x i16> %tmp8
943}
944
945define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
946; CHECK-LABEL: test_dup_v2i32_v4i16:
947; CHECK: dup v0.4h, v0.h[2]
948entry:
949  %x = extractelement <2 x i32> %a, i32 1
950  %vget_lane = trunc i32 %x to i16
951  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
952  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
953  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
954  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
955  ret <4 x i16> %vecinit3.i
956}
957
958define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
959; CHECK-LABEL: test_dup_v4i32_v8i16:
960; CHECK: dup v0.8h, v0.h[6]
961entry:
962  %x = extractelement <4 x i32> %a, i32 3
963  %vget_lane = trunc i32 %x to i16
964  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
965  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
966  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
967  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
968  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
969  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
970  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
971  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
972  ret <8 x i16> %vecinit7.i
973}
974
975define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
976; CHECK-LABEL: test_dup_v1i64_v4i16:
977; CHECK: dup v0.4h, v0.h[0]
978entry:
979  %x = extractelement <1 x i64> %a, i32 0
980  %vget_lane = trunc i64 %x to i16
981  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
982  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
983  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
984  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
985  ret <4 x i16> %vecinit3.i
986}
987
988define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
989; CHECK-LABEL: test_dup_v1i64_v2i32:
990; CHECK: dup v0.2s, v0.s[0]
991entry:
992  %x = extractelement <1 x i64> %a, i32 0
993  %vget_lane = trunc i64 %x to i32
994  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
995  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
996  ret <2 x i32> %vecinit1.i
997}
998
999define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
1000; CHECK-LABEL: test_dup_v2i64_v8i16:
1001; CHECK: dup v0.8h, v0.h[4]
1002entry:
1003  %x = extractelement <2 x i64> %a, i32 1
1004  %vget_lane = trunc i64 %x to i16
1005  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1006  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1007  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1008  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1009  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1010  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1011  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1012  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1013  ret <8 x i16> %vecinit7.i
1014}
1015
1016define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1017; CHECK-LABEL: test_dup_v2i64_v4i32:
1018; CHECK: dup v0.4s, v0.s[2]
1019entry:
1020  %x = extractelement <2 x i64> %a, i32 1
1021  %vget_lane = trunc i64 %x to i32
1022  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1023  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1024  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1025  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1026  ret <4 x i32> %vecinit3.i
1027}
1028
1029define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1030; CHECK-LABEL: test_dup_v4i32_v4i16:
1031; CHECK: dup v0.4h, v0.h[2]
1032entry:
1033  %x = extractelement <4 x i32> %a, i32 1
1034  %vget_lane = trunc i32 %x to i16
1035  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1036  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1037  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1038  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1039  ret <4 x i16> %vecinit3.i
1040}
1041
1042define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1043; CHECK-LABEL: test_dup_v2i64_v4i16:
1044; CHECK: dup v0.4h, v0.h[0]
1045entry:
1046  %x = extractelement <2 x i64> %a, i32 0
1047  %vget_lane = trunc i64 %x to i16
1048  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1049  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1050  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1051  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1052  ret <4 x i16> %vecinit3.i
1053}
1054
1055define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1056; CHECK-LABEL: test_dup_v2i64_v2i32:
1057; CHECK: dup v0.2s, v0.s[0]
1058entry:
1059  %x = extractelement <2 x i64> %a, i32 0
1060  %vget_lane = trunc i64 %x to i32
1061  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1062  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1063  ret <2 x i32> %vecinit1.i
1064}
1065
1066
1067define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1068; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1069; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1070; CHECK-NEXT: ret
1071entry:
1072  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1073  %1 = insertelement <1 x float> undef, float %0, i32 0
1074  %2 = extractelement <1 x float> %1, i32 0
1075  %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1076  ret <2 x float> %vecinit1.i
1077}
1078
1079define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1080; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1081; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1082; CHECK-NEXT: ret
1083entry:
1084  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1085  %1 = insertelement <1 x float> undef, float %0, i32 0
1086  %2 = extractelement <1 x float> %1, i32 0
1087  %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1088  ret <4 x float> %vecinit1.i
1089}
1090
1091declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1092
1093define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1094; CHECK-LABEL: test_concat_undef_v1i32:
1095; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1096entry:
1097  %0 = extractelement <2 x i32> %a, i32 0
1098  %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1099  ret <2 x i32> %vecinit1.i
1100}
1101
1102declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1103
1104define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1105; CHECK-LABEL: test_concat_v1i32_undef:
1106; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1107; CHECK-NEXT: ret
1108entry:
1109  %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1110  %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1111  ret <2 x i32> %vecinit.i432
1112}
1113
1114define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1115; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1116; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
1117entry:
1118  %0 = extractelement <2 x i32> %a, i32 0
1119  %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1120  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1121  ret <2 x i32> %vecinit1.i
1122}
1123
1124define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1125; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1126; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1127; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1128; CHECK: mov {{v[0-9]+}}.s[1], w{{[0-9]+}}
1129entry:
1130  %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1131  %d = insertelement <2 x i32> undef, i32 %c, i32 0
1132  %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1133  %f = insertelement <2 x i32> undef, i32 %e, i32 0
1134  %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1135  ret <2 x i32> %h
1136}
1137
1138define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1139; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1140; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1141entry:
1142  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1143  ret <16 x i8> %vecinit30
1144}
1145
1146define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1147; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1148; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1149entry:
1150  %vecext = extractelement <8 x i8> %x, i32 0
1151  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1152  %vecext1 = extractelement <8 x i8> %x, i32 1
1153  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1154  %vecext3 = extractelement <8 x i8> %x, i32 2
1155  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1156  %vecext5 = extractelement <8 x i8> %x, i32 3
1157  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1158  %vecext7 = extractelement <8 x i8> %x, i32 4
1159  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1160  %vecext9 = extractelement <8 x i8> %x, i32 5
1161  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1162  %vecext11 = extractelement <8 x i8> %x, i32 6
1163  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1164  %vecext13 = extractelement <8 x i8> %x, i32 7
1165  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1166  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1167  ret <16 x i8> %vecinit30
1168}
1169
1170define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1171; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1172; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1173entry:
1174  %vecext = extractelement <16 x i8> %x, i32 0
1175  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1176  %vecext1 = extractelement <16 x i8> %x, i32 1
1177  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1178  %vecext3 = extractelement <16 x i8> %x, i32 2
1179  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1180  %vecext5 = extractelement <16 x i8> %x, i32 3
1181  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1182  %vecext7 = extractelement <16 x i8> %x, i32 4
1183  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1184  %vecext9 = extractelement <16 x i8> %x, i32 5
1185  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1186  %vecext11 = extractelement <16 x i8> %x, i32 6
1187  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1188  %vecext13 = extractelement <16 x i8> %x, i32 7
1189  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1190  %vecext15 = extractelement <8 x i8> %y, i32 0
1191  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1192  %vecext17 = extractelement <8 x i8> %y, i32 1
1193  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1194  %vecext19 = extractelement <8 x i8> %y, i32 2
1195  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1196  %vecext21 = extractelement <8 x i8> %y, i32 3
1197  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1198  %vecext23 = extractelement <8 x i8> %y, i32 4
1199  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1200  %vecext25 = extractelement <8 x i8> %y, i32 5
1201  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1202  %vecext27 = extractelement <8 x i8> %y, i32 6
1203  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1204  %vecext29 = extractelement <8 x i8> %y, i32 7
1205  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1206  ret <16 x i8> %vecinit30
1207}
1208
1209define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1210; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1211; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1212entry:
1213  %vecext = extractelement <8 x i8> %x, i32 0
1214  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1215  %vecext1 = extractelement <8 x i8> %x, i32 1
1216  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1217  %vecext3 = extractelement <8 x i8> %x, i32 2
1218  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1219  %vecext5 = extractelement <8 x i8> %x, i32 3
1220  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1221  %vecext7 = extractelement <8 x i8> %x, i32 4
1222  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1223  %vecext9 = extractelement <8 x i8> %x, i32 5
1224  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1225  %vecext11 = extractelement <8 x i8> %x, i32 6
1226  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1227  %vecext13 = extractelement <8 x i8> %x, i32 7
1228  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1229  %vecext15 = extractelement <8 x i8> %y, i32 0
1230  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1231  %vecext17 = extractelement <8 x i8> %y, i32 1
1232  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1233  %vecext19 = extractelement <8 x i8> %y, i32 2
1234  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1235  %vecext21 = extractelement <8 x i8> %y, i32 3
1236  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1237  %vecext23 = extractelement <8 x i8> %y, i32 4
1238  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1239  %vecext25 = extractelement <8 x i8> %y, i32 5
1240  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1241  %vecext27 = extractelement <8 x i8> %y, i32 6
1242  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1243  %vecext29 = extractelement <8 x i8> %y, i32 7
1244  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1245  ret <16 x i8> %vecinit30
1246}
1247
1248define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1249; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1250; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1251entry:
1252  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1253  ret <8 x i16> %vecinit14
1254}
1255
1256define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1257; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1258; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1259entry:
1260  %vecext = extractelement <4 x i16> %x, i32 0
1261  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1262  %vecext1 = extractelement <4 x i16> %x, i32 1
1263  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1264  %vecext3 = extractelement <4 x i16> %x, i32 2
1265  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1266  %vecext5 = extractelement <4 x i16> %x, i32 3
1267  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1268  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1269  ret <8 x i16> %vecinit14
1270}
1271
1272define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1273; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1274; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1275entry:
1276  %vecext = extractelement <8 x i16> %x, i32 0
1277  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1278  %vecext1 = extractelement <8 x i16> %x, i32 1
1279  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1280  %vecext3 = extractelement <8 x i16> %x, i32 2
1281  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1282  %vecext5 = extractelement <8 x i16> %x, i32 3
1283  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1284  %vecext7 = extractelement <4 x i16> %y, i32 0
1285  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1286  %vecext9 = extractelement <4 x i16> %y, i32 1
1287  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1288  %vecext11 = extractelement <4 x i16> %y, i32 2
1289  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1290  %vecext13 = extractelement <4 x i16> %y, i32 3
1291  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1292  ret <8 x i16> %vecinit14
1293}
1294
1295define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1296; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1297; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1298entry:
1299  %vecext = extractelement <4 x i16> %x, i32 0
1300  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1301  %vecext1 = extractelement <4 x i16> %x, i32 1
1302  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1303  %vecext3 = extractelement <4 x i16> %x, i32 2
1304  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1305  %vecext5 = extractelement <4 x i16> %x, i32 3
1306  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1307  %vecext7 = extractelement <4 x i16> %y, i32 0
1308  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1309  %vecext9 = extractelement <4 x i16> %y, i32 1
1310  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1311  %vecext11 = extractelement <4 x i16> %y, i32 2
1312  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1313  %vecext13 = extractelement <4 x i16> %y, i32 3
1314  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1315  ret <8 x i16> %vecinit14
1316}
1317
1318define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1319; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1320; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1321entry:
1322  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1323  ret <4 x i32> %vecinit6
1324}
1325
1326define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1327; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1328; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1329entry:
1330  %vecext = extractelement <2 x i32> %x, i32 0
1331  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1332  %vecext1 = extractelement <2 x i32> %x, i32 1
1333  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1334  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1335  ret <4 x i32> %vecinit6
1336}
1337
1338define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1339; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1340; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1341entry:
1342  %vecext = extractelement <4 x i32> %x, i32 0
1343  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1344  %vecext1 = extractelement <4 x i32> %x, i32 1
1345  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1346  %vecext3 = extractelement <2 x i32> %y, i32 0
1347  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1348  %vecext5 = extractelement <2 x i32> %y, i32 1
1349  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1350  ret <4 x i32> %vecinit6
1351}
1352
1353define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1354; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1355; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1356entry:
1357  %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1358  ret <4 x i32> %vecinit6
1359}
1360
1361define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1362; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1363; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1364entry:
1365  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1366  ret <2 x i64> %vecinit2
1367}
1368
1369define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1370; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1371; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1372entry:
1373  %vecext = extractelement <1 x i64> %x, i32 0
1374  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1375  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1376  ret <2 x i64> %vecinit2
1377}
1378
1379define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1380; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1381; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1382entry:
1383  %vecext = extractelement <2 x i64> %x, i32 0
1384  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1385  %vecext1 = extractelement <1 x i64> %y, i32 0
1386  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1387  ret <2 x i64> %vecinit2
1388}
1389
1390define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1391; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1392; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1393entry:
1394  %vecext = extractelement <1 x i64> %x, i32 0
1395  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1396  %vecext1 = extractelement <1 x i64> %y, i32 0
1397  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1398  ret <2 x i64> %vecinit2
1399}
1400
1401
1402define <4 x i16> @concat_vector_v4i16_const() {
1403; CHECK-LABEL: concat_vector_v4i16_const:
1404; CHECK: movi {{d[0-9]+}}, #0
1405 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1406 ret <4 x i16> %r
1407}
1408
1409define <4 x i16> @concat_vector_v4i16_const_one() {
1410; CHECK-LABEL: concat_vector_v4i16_const_one:
1411; CHECK: movi {{v[0-9]+}}.4h, #1
1412 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1413 ret <4 x i16> %r
1414}
1415
1416define <4 x i32> @concat_vector_v4i32_const() {
1417; CHECK-LABEL: concat_vector_v4i32_const:
1418; CHECK: movi {{v[0-9]+}}.2d, #0
1419 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1420 ret <4 x i32> %r
1421}
1422
1423define <8 x i8> @concat_vector_v8i8_const() {
1424; CHECK-LABEL: concat_vector_v8i8_const:
1425; CHECK: movi {{d[0-9]+}}, #0
1426 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1427 ret <8 x i8> %r
1428}
1429
1430define <8 x i16> @concat_vector_v8i16_const() {
1431; CHECK-LABEL: concat_vector_v8i16_const:
1432; CHECK: movi {{v[0-9]+}}.2d, #0
1433 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1434 ret <8 x i16> %r
1435}
1436
1437define <8 x i16> @concat_vector_v8i16_const_one() {
1438; CHECK-LABEL: concat_vector_v8i16_const_one:
1439; CHECK: movi {{v[0-9]+}}.8h, #1
1440 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1441 ret <8 x i16> %r
1442}
1443
1444define <16 x i8> @concat_vector_v16i8_const() {
1445; CHECK-LABEL: concat_vector_v16i8_const:
1446; CHECK: movi {{v[0-9]+}}.2d, #0
1447 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1448 ret <16 x i8> %r
1449}
1450
1451define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1452; CHECK-LABEL: concat_vector_v4i16:
1453; CHECK: dup v0.4h, v0.h[0]
1454 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1455 ret <4 x i16> %r
1456}
1457
1458define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1459; CHECK-LABEL: concat_vector_v4i32:
1460; CHECK: dup v0.4s, v0.s[0]
1461 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1462 ret <4 x i32> %r
1463}
1464
1465define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1466; CHECK-LABEL: concat_vector_v8i8:
1467; CHECK: dup v0.8b, v0.b[0]
1468 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1469 ret <8 x i8> %r
1470}
1471
1472define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1473; CHECK-LABEL: concat_vector_v8i16:
1474; CHECK: dup v0.8h, v0.h[0]
1475 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1476 ret <8 x i16> %r
1477}
1478
1479define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1480; CHECK-LABEL: concat_vector_v16i8:
1481; CHECK: dup v0.16b, v0.b[0]
1482 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
1483 ret <16 x i8> %r
1484}
1485