1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2
3
4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5; CHECK-LABEL: ins16bw:
6; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
7  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
8  ret <16 x i8> %tmp3
9}
10
11define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
12; CHECK-LABEL: ins8hw:
13; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
14  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
15  ret <8 x i16> %tmp3
16}
17
18define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
19; CHECK-LABEL: ins4sw:
20; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
21  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
22  ret <4 x i32> %tmp3
23}
24
25define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
26; CHECK-LABEL: ins2dw:
27; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
28  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
29  ret <2 x i64> %tmp3
30}
31
32define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
33; CHECK-LABEL: ins8bw:
34; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
35  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
36  ret <8 x i8> %tmp3
37}
38
39define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
40; CHECK-LABEL: ins4hw:
41; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
42  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
43  ret <4 x i16> %tmp3
44}
45
46define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
47; CHECK-LABEL: ins2sw:
48; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
49  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
50  ret <2 x i32> %tmp3
51}
52
53define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
54; CHECK-LABEL: ins16b16:
55; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
56  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
57  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
58  ret <16 x i8> %tmp4
59}
60
61define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
62; CHECK-LABEL: ins8h8:
63; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
64  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
65  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
66  ret <8 x i16> %tmp4
67}
68
69define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
70; CHECK-LABEL: ins4s4:
71; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
72  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
73  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
74  ret <4 x i32> %tmp4
75}
76
77define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
78; CHECK-LABEL: ins2d2:
79; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
80  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
81  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
82  ret <2 x i64> %tmp4
83}
84
85define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
86; CHECK-LABEL: ins4f4:
87; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
88  %tmp3 = extractelement <4 x float> %tmp1, i32 2
89  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
90  ret <4 x float> %tmp4
91}
92
93define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
94; CHECK-LABEL: ins2df2:
95; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
96  %tmp3 = extractelement <2 x double> %tmp1, i32 0
97  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
98  ret <2 x double> %tmp4
99}
100
101define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
102; CHECK-LABEL: ins8b16:
103; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
104  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
105  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
106  ret <16 x i8> %tmp4
107}
108
109define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
110; CHECK-LABEL: ins4h8:
111; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
112  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
113  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
114  ret <8 x i16> %tmp4
115}
116
117define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
118; CHECK-LABEL: ins2s4:
119; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
120  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
121  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
122  ret <4 x i32> %tmp4
123}
124
125define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
126; CHECK-LABEL: ins1d2:
127; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
128  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
129  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
130  ret <2 x i64> %tmp4
131}
132
133define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
134; CHECK-LABEL: ins2f4:
135; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
136  %tmp3 = extractelement <2 x float> %tmp1, i32 1
137  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
138  ret <4 x float> %tmp4
139}
140
141define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
142; CHECK-LABEL: ins1f2:
143; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
144  %tmp3 = extractelement <1 x double> %tmp1, i32 0
145  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
146  ret <2 x double> %tmp4
147}
148
149define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
150; CHECK-LABEL: ins16b8:
151; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
152  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
153  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
154  ret <8 x i8> %tmp4
155}
156
157define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
158; CHECK-LABEL: ins8h4:
159; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
160  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
161  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
162  ret <4 x i16> %tmp4
163}
164
165define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
166; CHECK-LABEL: ins4s2:
167; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
168  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
169  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
170  ret <2 x i32> %tmp4
171}
172
173define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
174; CHECK-LABEL: ins2d1:
175; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
176  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
177  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
178  ret <1 x i64> %tmp4
179}
180
181define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
182; CHECK-LABEL: ins4f2:
183; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
184  %tmp3 = extractelement <4 x float> %tmp1, i32 2
185  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
186  ret <2 x float> %tmp4
187}
188
189define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
190; CHECK-LABEL: ins2f1:
191; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
192  %tmp3 = extractelement <2 x double> %tmp1, i32 1
193  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
194  ret <1 x double> %tmp4
195}
196
197define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
198; CHECK-LABEL: ins8b8:
199; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
200  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
201  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
202  ret <8 x i8> %tmp4
203}
204
205define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
206; CHECK-LABEL: ins4h4:
207; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
208  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
209  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
210  ret <4 x i16> %tmp4
211}
212
213define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
214; CHECK-LABEL: ins2s2:
215; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
216  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
217  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
218  ret <2 x i32> %tmp4
219}
220
221define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
222; CHECK-LABEL: ins1d1:
223; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
224  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
225  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
226  ret <1 x i64> %tmp4
227}
228
229define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
230; CHECK-LABEL: ins2f2:
231; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
232  %tmp3 = extractelement <2 x float> %tmp1, i32 0
233  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
234  ret <2 x float> %tmp4
235}
236
237define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
238; CHECK-LABEL: ins1df1:
239; CHECK-NOT: ins {{v[0-9]+}}
240  %tmp3 = extractelement <1 x double> %tmp1, i32 0
241  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
242  ret <1 x double> %tmp4
243}
244
245define i32 @umovw16b(<16 x i8> %tmp1) {
246; CHECK-LABEL: umovw16b:
247; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
248  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
249  %tmp4 = zext i8 %tmp3 to i32
250  ret i32 %tmp4
251}
252
253define i32 @umovw8h(<8 x i16> %tmp1) {
254; CHECK-LABEL: umovw8h:
255; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
256  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
257  %tmp4 = zext i16 %tmp3 to i32
258  ret i32 %tmp4
259}
260
261define i32 @umovw4s(<4 x i32> %tmp1) {
262; CHECK-LABEL: umovw4s:
263; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
264  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
265  ret i32 %tmp3
266}
267
268define i64 @umovx2d(<2 x i64> %tmp1) {
269; CHECK-LABEL: umovx2d:
270; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
271  %tmp3 = extractelement <2 x i64> %tmp1, i32 1
272  ret i64 %tmp3
273}
274
275define i32 @umovw8b(<8 x i8> %tmp1) {
276; CHECK-LABEL: umovw8b:
277; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
278  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
279  %tmp4 = zext i8 %tmp3 to i32
280  ret i32 %tmp4
281}
282
283define i32 @umovw4h(<4 x i16> %tmp1) {
284; CHECK-LABEL: umovw4h:
285; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
286  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
287  %tmp4 = zext i16 %tmp3 to i32
288  ret i32 %tmp4
289}
290
291define i32 @umovw2s(<2 x i32> %tmp1) {
292; CHECK-LABEL: umovw2s:
293; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
294  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
295  ret i32 %tmp3
296}
297
298define i64 @umovx1d(<1 x i64> %tmp1) {
299; CHECK-LABEL: umovx1d:
300; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
301  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
302  ret i64 %tmp3
303}
304
305define i32 @smovw16b(<16 x i8> %tmp1) {
306; CHECK-LABEL: smovw16b:
307; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
308  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
309  %tmp4 = sext i8 %tmp3 to i32
310  %tmp5 = add i32 %tmp4, %tmp4
311  ret i32 %tmp5
312}
313
314define i32 @smovw8h(<8 x i16> %tmp1) {
315; CHECK-LABEL: smovw8h:
316; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
317  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
318  %tmp4 = sext i16 %tmp3 to i32
319  %tmp5 = add i32 %tmp4, %tmp4
320  ret i32 %tmp5
321}
322
323define i32 @smovx16b(<16 x i8> %tmp1) {
324; CHECK-LABEL: smovx16b:
325; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
326  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
327  %tmp4 = sext i8 %tmp3 to i32
328  %tmp5 = add i32 %tmp4, %tmp4
329  ret i32 %tmp5
330}
331
332define i32 @smovx8h(<8 x i16> %tmp1) {
333; CHECK-LABEL: smovx8h:
334; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
335  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
336  %tmp4 = sext i16 %tmp3 to i32
337  ret i32 %tmp4
338}
339
340define i64 @smovx4s(<4 x i32> %tmp1) {
341; CHECK-LABEL: smovx4s:
342; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
343  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
344  %tmp4 = sext i32 %tmp3 to i64
345  ret i64 %tmp4
346}
347
348define i32 @smovw8b(<8 x i8> %tmp1) {
349; CHECK-LABEL: smovw8b:
350; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
351  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
352  %tmp4 = sext i8 %tmp3 to i32
353  %tmp5 = add i32 %tmp4, %tmp4
354  ret i32 %tmp5
355}
356
357define i32 @smovw4h(<4 x i16> %tmp1) {
358; CHECK-LABEL: smovw4h:
359; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
360  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
361  %tmp4 = sext i16 %tmp3 to i32
362  %tmp5 = add i32 %tmp4, %tmp4
363  ret i32 %tmp5
364}
365
366define i32 @smovx8b(<8 x i8> %tmp1) {
367; CHECK-LABEL: smovx8b:
368; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
369  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
370  %tmp4 = sext i8 %tmp3 to i32
371  ret i32 %tmp4
372}
373
374define i32 @smovx4h(<4 x i16> %tmp1) {
375; CHECK-LABEL: smovx4h:
376; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
377  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
378  %tmp4 = sext i16 %tmp3 to i32
379  ret i32 %tmp4
380}
381
382define i64 @smovx2s(<2 x i32> %tmp1) {
383; CHECK-LABEL: smovx2s:
384; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
385  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
386  %tmp4 = sext i32 %tmp3 to i64
387  ret i64 %tmp4
388}
389
390define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
391; CHECK-LABEL: test_vcopy_lane_s8:
392; CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
393  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
394  ret <8 x i8> %vset_lane
395}
396
397define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
398; CHECK-LABEL: test_vcopyq_laneq_s8:
399; CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
400  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
401  ret <16 x i8> %vset_lane
402}
403
404define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
405; CHECK-LABEL: test_vcopy_lane_swap_s8:
406; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
407  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
408  ret <8 x i8> %vset_lane
409}
410
411define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
412; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
413; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
414  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
415  ret <16 x i8> %vset_lane
416}
417
418define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
419; CHECK-LABEL: test_vdup_n_u8:
420; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
421  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
422  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
423  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
424  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
425  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
426  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
427  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
428  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
429  ret <8 x i8> %vecinit7.i
430}
431
432define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
433; CHECK-LABEL: test_vdup_n_u16:
434; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
435  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
436  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
437  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
438  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
439  ret <4 x i16> %vecinit3.i
440}
441
442define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
443; CHECK-LABEL: test_vdup_n_u32:
444; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
445  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
446  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
447  ret <2 x i32> %vecinit1.i
448}
449
450define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
451; CHECK-LABEL: test_vdup_n_u64:
452; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
453  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
454  ret <1 x i64> %vecinit.i
455}
456
457define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
458; CHECK-LABEL: test_vdupq_n_u8:
459; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
460  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
461  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
462  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
463  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
464  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
465  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
466  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
467  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
468  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
469  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
470  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
471  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
472  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
473  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
474  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
475  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
476  ret <16 x i8> %vecinit15.i
477}
478
479define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
480; CHECK-LABEL: test_vdupq_n_u16:
481; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
482  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
483  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
484  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
485  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
486  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
487  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
488  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
489  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
490  ret <8 x i16> %vecinit7.i
491}
492
493define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
494; CHECK-LABEL: test_vdupq_n_u32:
495; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
496  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
497  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
498  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
499  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
500  ret <4 x i32> %vecinit3.i
501}
502
503define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
504; CHECK-LABEL: test_vdupq_n_u64:
505; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
506  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
507  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
508  ret <2 x i64> %vecinit1.i
509}
510
511define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
512; CHECK-LABEL: test_vdup_lane_s8:
513; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
514  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
515  ret <8 x i8> %shuffle
516}
517
518define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
519; CHECK-LABEL: test_vdup_lane_s16:
520; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
521  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
522  ret <4 x i16> %shuffle
523}
524
525define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
526; CHECK-LABEL: test_vdup_lane_s32:
527; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
528  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
529  ret <2 x i32> %shuffle
530}
531
532define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
533; CHECK-LABEL: test_vdupq_lane_s8:
534; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
535  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
536  ret <16 x i8> %shuffle
537}
538
539define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
540; CHECK-LABEL: test_vdupq_lane_s16:
541; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
542  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
543  ret <8 x i16> %shuffle
544}
545
546define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
547; CHECK-LABEL: test_vdupq_lane_s32:
548; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
549  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
550  ret <4 x i32> %shuffle
551}
552
553define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
554; CHECK-LABEL: test_vdupq_lane_s64:
555; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
556  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
557  ret <2 x i64> %shuffle
558}
559
560define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
561; CHECK-LABEL: test_vdup_laneq_s8:
562; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
563  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
564  ret <8 x i8> %shuffle
565}
566
567define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
568; CHECK-LABEL: test_vdup_laneq_s16:
569; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
570  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
571  ret <4 x i16> %shuffle
572}
573
574define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
575; CHECK-LABEL: test_vdup_laneq_s32:
576; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
577  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
578  ret <2 x i32> %shuffle
579}
580
581define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
582; CHECK-LABEL: test_vdupq_laneq_s8:
583; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
584  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
585  ret <16 x i8> %shuffle
586}
587
588define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
589; CHECK-LABEL: test_vdupq_laneq_s16:
590; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
591  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
592  ret <8 x i16> %shuffle
593}
594
595define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
596; CHECK-LABEL: test_vdupq_laneq_s32:
597; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
598  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
599  ret <4 x i32> %shuffle
600}
601
602define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
603; CHECK-LABEL: test_vdupq_laneq_s64:
604; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
605  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
606  ret <2 x i64> %shuffle
607}
608
609define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
610; CHECK-LABEL: test_bitcastv8i8toi64:
611   %res = bitcast <8 x i8> %in to i64
612; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
613   ret i64 %res
614}
615
616define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
617; CHECK-LABEL: test_bitcastv4i16toi64:
618   %res = bitcast <4 x i16> %in to i64
619; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
620   ret i64 %res
621}
622
623define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
624; CHECK-LABEL: test_bitcastv2i32toi64:
625   %res = bitcast <2 x i32> %in to i64
626; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
627   ret i64 %res
628}
629
630define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
631; CHECK-LABEL: test_bitcastv2f32toi64:
632   %res = bitcast <2 x float> %in to i64
633; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
634   ret i64 %res
635}
636
637define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
638; CHECK-LABEL: test_bitcastv1i64toi64:
639   %res = bitcast <1 x i64> %in to i64
640; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
641   ret i64 %res
642}
643
644define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
645; CHECK-LABEL: test_bitcastv1f64toi64:
646   %res = bitcast <1 x double> %in to i64
647; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
648   ret i64 %res
649}
650
651define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
652; CHECK-LABEL: test_bitcasti64tov8i8:
653   %res = bitcast i64 %in to <8 x i8>
654; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
655   ret <8 x i8> %res
656}
657
658define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
659; CHECK-LABEL: test_bitcasti64tov4i16:
660   %res = bitcast i64 %in to <4 x i16>
661; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
662   ret <4 x i16> %res
663}
664
665define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
666; CHECK-LABEL: test_bitcasti64tov2i32:
667   %res = bitcast i64 %in to <2 x i32>
668; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
669   ret <2 x i32> %res
670}
671
672define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
673; CHECK-LABEL: test_bitcasti64tov2f32:
674   %res = bitcast i64 %in to <2 x float>
675; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
676   ret <2 x float> %res
677}
678
679define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
680; CHECK-LABEL: test_bitcasti64tov1i64:
681   %res = bitcast i64 %in to <1 x i64>
682; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
683   ret <1 x i64> %res
684}
685
686define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
687; CHECK-LABEL: test_bitcasti64tov1f64:
688   %res = bitcast i64 %in to <1 x double>
689; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
690   ret <1 x double> %res
691}
692
693define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
694; CHECK-LABEL: test_bitcastv8i8tov1f64:
695; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
696; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
697  %sub.i = sub <8 x i8> zeroinitializer, %a
698  %1 = bitcast <8 x i8> %sub.i to <1 x double>
699  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
700  ret <1 x i64> %vcvt.i
701}
702
703define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
704; CHECK-LABEL: test_bitcastv4i16tov1f64:
705; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
706; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
707  %sub.i = sub <4 x i16> zeroinitializer, %a
708  %1 = bitcast <4 x i16> %sub.i to <1 x double>
709  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
710  ret <1 x i64> %vcvt.i
711}
712
713define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
714; CHECK-LABEL: test_bitcastv2i32tov1f64:
715; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
716; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
717  %sub.i = sub <2 x i32> zeroinitializer, %a
718  %1 = bitcast <2 x i32> %sub.i to <1 x double>
719  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
720  ret <1 x i64> %vcvt.i
721}
722
723define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
724; CHECK-LABEL: test_bitcastv1i64tov1f64:
725; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
726; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
727  %sub.i = sub <1 x i64> zeroinitializer, %a
728  %1 = bitcast <1 x i64> %sub.i to <1 x double>
729  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
730  ret <1 x i64> %vcvt.i
731}
732
733define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
734; CHECK-LABEL: test_bitcastv2f32tov1f64:
735; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
736; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
737  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
738  %1 = bitcast <2 x float> %sub.i to <1 x double>
739  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
740  ret <1 x i64> %vcvt.i
741}
742
743define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
744; CHECK-LABEL: test_bitcastv1f64tov8i8:
745; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
746; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
747  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
748  %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
749  %sub.i = sub <8 x i8> zeroinitializer, %1
750  ret <8 x i8> %sub.i
751}
752
753define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
754; CHECK-LABEL: test_bitcastv1f64tov4i16:
755; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
756; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
757  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
758  %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
759  %sub.i = sub <4 x i16> zeroinitializer, %1
760  ret <4 x i16> %sub.i
761}
762
763define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
764; CHECK-LABEL: test_bitcastv1f64tov2i32:
765; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
766; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
767  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
768  %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
769  %sub.i = sub <2 x i32> zeroinitializer, %1
770  ret <2 x i32> %sub.i
771}
772
773define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
774; CHECK-LABEL: test_bitcastv1f64tov1i64:
775; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
776; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
777  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
778  %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
779  %sub.i = sub <1 x i64> zeroinitializer, %1
780  ret <1 x i64> %sub.i
781}
782
783define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
784; CHECK-LABEL: test_bitcastv1f64tov2f32:
785; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
786; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
787  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
788  %1 = bitcast <1 x double> %vcvt.i to <2 x float>
789  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
790  ret <2 x float> %sub.i
791}
792
793; Test insert element into an undef vector
794define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
795; CHECK-LABEL: scalar_to_vector.v8i8:
796; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
797  %b = insertelement <8 x i8> undef, i8 %a, i32 0
798  ret <8 x i8> %b
799}
800
801define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
802; CHECK-LABEL: scalar_to_vector.v16i8:
803; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
804  %b = insertelement <16 x i8> undef, i8 %a, i32 0
805  ret <16 x i8> %b
806}
807
808define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
809; CHECK-LABEL: scalar_to_vector.v4i16:
810; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
811  %b = insertelement <4 x i16> undef, i16 %a, i32 0
812  ret <4 x i16> %b
813}
814
815define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
816; CHECK-LABEL: scalar_to_vector.v8i16:
817; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
818  %b = insertelement <8 x i16> undef, i16 %a, i32 0
819  ret <8 x i16> %b
820}
821
822define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
823; CHECK-LABEL: scalar_to_vector.v2i32:
824; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
825  %b = insertelement <2 x i32> undef, i32 %a, i32 0
826  ret <2 x i32> %b
827}
828
829define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
830; CHECK-LABEL: scalar_to_vector.v4i32:
831; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
832  %b = insertelement <4 x i32> undef, i32 %a, i32 0
833  ret <4 x i32> %b
834}
835
836define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
837; CHECK-LABEL: scalar_to_vector.v2i64:
838; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
839  %b = insertelement <2 x i64> undef, i64 %a, i32 0
840  ret <2 x i64> %b
841}
842
843define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
844; CHECK-LABEL: testDUP.v1i8:
845; CHECK: dup v0.8b, v0.b[0]
846  %b = extractelement <1 x i8> %a, i32 0
847  %c = insertelement <8 x i8> undef, i8 %b, i32 0
848  %d = insertelement <8 x i8> %c, i8 %b, i32 1
849  %e = insertelement <8 x i8> %d, i8 %b, i32 2
850  %f = insertelement <8 x i8> %e, i8 %b, i32 3
851  %g = insertelement <8 x i8> %f, i8 %b, i32 4
852  %h = insertelement <8 x i8> %g, i8 %b, i32 5
853  %i = insertelement <8 x i8> %h, i8 %b, i32 6
854  %j = insertelement <8 x i8> %i, i8 %b, i32 7
855  ret <8 x i8> %j
856}
857
858define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
859; CHECK-LABEL: testDUP.v1i16:
860; CHECK: dup v0.8h, v0.h[0]
861  %b = extractelement <1 x i16> %a, i32 0
862  %c = insertelement <8 x i16> undef, i16 %b, i32 0
863  %d = insertelement <8 x i16> %c, i16 %b, i32 1
864  %e = insertelement <8 x i16> %d, i16 %b, i32 2
865  %f = insertelement <8 x i16> %e, i16 %b, i32 3
866  %g = insertelement <8 x i16> %f, i16 %b, i32 4
867  %h = insertelement <8 x i16> %g, i16 %b, i32 5
868  %i = insertelement <8 x i16> %h, i16 %b, i32 6
869  %j = insertelement <8 x i16> %i, i16 %b, i32 7
870  ret <8 x i16> %j
871}
872
873define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
874; CHECK-LABEL: testDUP.v1i32:
875; CHECK: dup v0.4s, v0.s[0]
876  %b = extractelement <1 x i32> %a, i32 0
877  %c = insertelement <4 x i32> undef, i32 %b, i32 0
878  %d = insertelement <4 x i32> %c, i32 %b, i32 1
879  %e = insertelement <4 x i32> %d, i32 %b, i32 2
880  %f = insertelement <4 x i32> %e, i32 %b, i32 3
881  ret <4 x i32> %f
882}
883
884define <8 x i8> @getl(<16 x i8> %x) #0 {
885; CHECK-LABEL: getl:
886; CHECK: ret
887  %vecext = extractelement <16 x i8> %x, i32 0
888  %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
889  %vecext1 = extractelement <16 x i8> %x, i32 1
890  %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
891  %vecext3 = extractelement <16 x i8> %x, i32 2
892  %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
893  %vecext5 = extractelement <16 x i8> %x, i32 3
894  %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
895  %vecext7 = extractelement <16 x i8> %x, i32 4
896  %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
897  %vecext9 = extractelement <16 x i8> %x, i32 5
898  %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
899  %vecext11 = extractelement <16 x i8> %x, i32 6
900  %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
901  %vecext13 = extractelement <16 x i8> %x, i32 7
902  %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
903  ret <8 x i8> %vecinit14
904}
905
906define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
907; CHECK-LABEL: test_dup_v2i32_v4i16:
908; CHECK: dup v0.4h, v0.h[2]
909entry:
910  %x = extractelement <2 x i32> %a, i32 1
911  %vget_lane = trunc i32 %x to i16
912  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
913  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
914  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
915  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
916  ret <4 x i16> %vecinit3.i
917}
918
919define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
920; CHECK-LABEL: test_dup_v4i32_v8i16:
921; CHECK: dup v0.8h, v0.h[6]
922entry:
923  %x = extractelement <4 x i32> %a, i32 3
924  %vget_lane = trunc i32 %x to i16
925  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
926  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
927  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
928  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
929  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
930  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
931  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
932  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
933  ret <8 x i16> %vecinit7.i
934}
935
936define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
937; CHECK-LABEL: test_dup_v1i64_v4i16:
938; CHECK: dup v0.4h, v0.h[0]
939entry:
940  %x = extractelement <1 x i64> %a, i32 0
941  %vget_lane = trunc i64 %x to i16
942  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
943  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
944  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
945  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
946  ret <4 x i16> %vecinit3.i
947}
948
949define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
950; CHECK-LABEL: test_dup_v1i64_v2i32:
951; CHECK: dup v0.2s, v0.s[0]
952entry:
953  %x = extractelement <1 x i64> %a, i32 0
954  %vget_lane = trunc i64 %x to i32
955  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
956  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
957  ret <2 x i32> %vecinit1.i
958}
959
960define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
961; CHECK-LABEL: test_dup_v2i64_v8i16:
962; CHECK: dup v0.8h, v0.h[4]
963entry:
964  %x = extractelement <2 x i64> %a, i32 1
965  %vget_lane = trunc i64 %x to i16
966  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
967  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
968  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
969  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
970  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
971  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
972  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
973  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
974  ret <8 x i16> %vecinit7.i
975}
976
977define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
978; CHECK-LABEL: test_dup_v2i64_v4i32:
979; CHECK: dup v0.4s, v0.s[2]
980entry:
981  %x = extractelement <2 x i64> %a, i32 1
982  %vget_lane = trunc i64 %x to i32
983  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
984  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
985  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
986  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
987  ret <4 x i32> %vecinit3.i
988}
989
990define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
991; CHECK-LABEL: test_dup_v4i32_v4i16:
992; CHECK: dup v0.4h, v0.h[2]
993entry:
994  %x = extractelement <4 x i32> %a, i32 1
995  %vget_lane = trunc i32 %x to i16
996  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
997  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
998  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
999  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1000  ret <4 x i16> %vecinit3.i
1001}
1002
1003define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1004; CHECK-LABEL: test_dup_v2i64_v4i16:
1005; CHECK: dup v0.4h, v0.h[0]
1006entry:
1007  %x = extractelement <2 x i64> %a, i32 0
1008  %vget_lane = trunc i64 %x to i16
1009  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1010  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1011  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1012  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1013  ret <4 x i16> %vecinit3.i
1014}
1015
1016define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1017; CHECK-LABEL: test_dup_v2i64_v2i32:
1018; CHECK: dup v0.2s, v0.s[0]
1019entry:
1020  %x = extractelement <2 x i64> %a, i32 0
1021  %vget_lane = trunc i64 %x to i32
1022  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1023  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1024  ret <2 x i32> %vecinit1.i
1025}
1026
1027
1028define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1029; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1030; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1031; CHECK-NEXT: ret
1032entry:
1033  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1034  %1 = insertelement <1 x float> undef, float %0, i32 0
1035  %2 = extractelement <1 x float> %1, i32 0
1036  %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1037  ret <2 x float> %vecinit1.i
1038}
1039
1040define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1041; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1042; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1043; CHECK-NEXT: ret
1044entry:
1045  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1046  %1 = insertelement <1 x float> undef, float %0, i32 0
1047  %2 = extractelement <1 x float> %1, i32 0
1048  %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1049  ret <4 x float> %vecinit1.i
1050}
1051
1052declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1053
1054define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1055; CHECK-LABEL: test_concat_undef_v1i32:
1056; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1057entry:
1058  %0 = extractelement <2 x i32> %a, i32 0
1059  %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1060  ret <2 x i32> %vecinit1.i
1061}
1062
1063declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1064
1065define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1066; CHECK-LABEL: test_concat_v1i32_undef:
1067; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1068; CHECK-NEXT: ret
1069entry:
1070  %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1071  %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1072  ret <2 x i32> %vecinit.i432
1073}
1074
1075define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1076; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1077; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
1078entry:
1079  %0 = extractelement <2 x i32> %a, i32 0
1080  %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1081  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1082  ret <2 x i32> %vecinit1.i
1083}
1084
1085define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1086; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1087; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1088; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1089; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
1090entry:
1091  %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1092  %d = insertelement <2 x i32> undef, i32 %c, i32 0
1093  %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1094  %f = insertelement <2 x i32> undef, i32 %e, i32 0
1095  %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1096  ret <2 x i32> %h
1097}
1098
1099define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1100; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1101; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1102entry:
1103  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1104  ret <16 x i8> %vecinit30
1105}
1106
1107define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1108; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1109; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1110entry:
1111  %vecext = extractelement <8 x i8> %x, i32 0
1112  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1113  %vecext1 = extractelement <8 x i8> %x, i32 1
1114  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1115  %vecext3 = extractelement <8 x i8> %x, i32 2
1116  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1117  %vecext5 = extractelement <8 x i8> %x, i32 3
1118  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1119  %vecext7 = extractelement <8 x i8> %x, i32 4
1120  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1121  %vecext9 = extractelement <8 x i8> %x, i32 5
1122  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1123  %vecext11 = extractelement <8 x i8> %x, i32 6
1124  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1125  %vecext13 = extractelement <8 x i8> %x, i32 7
1126  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1127  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1128  ret <16 x i8> %vecinit30
1129}
1130
1131define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1132; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1133; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1134entry:
1135  %vecext = extractelement <16 x i8> %x, i32 0
1136  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1137  %vecext1 = extractelement <16 x i8> %x, i32 1
1138  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1139  %vecext3 = extractelement <16 x i8> %x, i32 2
1140  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1141  %vecext5 = extractelement <16 x i8> %x, i32 3
1142  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1143  %vecext7 = extractelement <16 x i8> %x, i32 4
1144  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1145  %vecext9 = extractelement <16 x i8> %x, i32 5
1146  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1147  %vecext11 = extractelement <16 x i8> %x, i32 6
1148  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1149  %vecext13 = extractelement <16 x i8> %x, i32 7
1150  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1151  %vecext15 = extractelement <8 x i8> %y, i32 0
1152  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1153  %vecext17 = extractelement <8 x i8> %y, i32 1
1154  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1155  %vecext19 = extractelement <8 x i8> %y, i32 2
1156  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1157  %vecext21 = extractelement <8 x i8> %y, i32 3
1158  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1159  %vecext23 = extractelement <8 x i8> %y, i32 4
1160  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1161  %vecext25 = extractelement <8 x i8> %y, i32 5
1162  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1163  %vecext27 = extractelement <8 x i8> %y, i32 6
1164  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1165  %vecext29 = extractelement <8 x i8> %y, i32 7
1166  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1167  ret <16 x i8> %vecinit30
1168}
1169
1170define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1171; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1172; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1173entry:
1174  %vecext = extractelement <8 x i8> %x, i32 0
1175  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1176  %vecext1 = extractelement <8 x i8> %x, i32 1
1177  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1178  %vecext3 = extractelement <8 x i8> %x, i32 2
1179  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1180  %vecext5 = extractelement <8 x i8> %x, i32 3
1181  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1182  %vecext7 = extractelement <8 x i8> %x, i32 4
1183  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1184  %vecext9 = extractelement <8 x i8> %x, i32 5
1185  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1186  %vecext11 = extractelement <8 x i8> %x, i32 6
1187  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1188  %vecext13 = extractelement <8 x i8> %x, i32 7
1189  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1190  %vecext15 = extractelement <8 x i8> %y, i32 0
1191  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1192  %vecext17 = extractelement <8 x i8> %y, i32 1
1193  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1194  %vecext19 = extractelement <8 x i8> %y, i32 2
1195  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1196  %vecext21 = extractelement <8 x i8> %y, i32 3
1197  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1198  %vecext23 = extractelement <8 x i8> %y, i32 4
1199  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1200  %vecext25 = extractelement <8 x i8> %y, i32 5
1201  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1202  %vecext27 = extractelement <8 x i8> %y, i32 6
1203  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1204  %vecext29 = extractelement <8 x i8> %y, i32 7
1205  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1206  ret <16 x i8> %vecinit30
1207}
1208
1209define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1210; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1211; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1212entry:
1213  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1214  ret <8 x i16> %vecinit14
1215}
1216
1217define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1218; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1219; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1220entry:
1221  %vecext = extractelement <4 x i16> %x, i32 0
1222  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1223  %vecext1 = extractelement <4 x i16> %x, i32 1
1224  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1225  %vecext3 = extractelement <4 x i16> %x, i32 2
1226  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1227  %vecext5 = extractelement <4 x i16> %x, i32 3
1228  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1229  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1230  ret <8 x i16> %vecinit14
1231}
1232
1233define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1234; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1235; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1236entry:
1237  %vecext = extractelement <8 x i16> %x, i32 0
1238  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1239  %vecext1 = extractelement <8 x i16> %x, i32 1
1240  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1241  %vecext3 = extractelement <8 x i16> %x, i32 2
1242  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1243  %vecext5 = extractelement <8 x i16> %x, i32 3
1244  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1245  %vecext7 = extractelement <4 x i16> %y, i32 0
1246  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1247  %vecext9 = extractelement <4 x i16> %y, i32 1
1248  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1249  %vecext11 = extractelement <4 x i16> %y, i32 2
1250  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1251  %vecext13 = extractelement <4 x i16> %y, i32 3
1252  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1253  ret <8 x i16> %vecinit14
1254}
1255
1256define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1257; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1258; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1259entry:
1260  %vecext = extractelement <4 x i16> %x, i32 0
1261  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1262  %vecext1 = extractelement <4 x i16> %x, i32 1
1263  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1264  %vecext3 = extractelement <4 x i16> %x, i32 2
1265  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1266  %vecext5 = extractelement <4 x i16> %x, i32 3
1267  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1268  %vecext7 = extractelement <4 x i16> %y, i32 0
1269  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1270  %vecext9 = extractelement <4 x i16> %y, i32 1
1271  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1272  %vecext11 = extractelement <4 x i16> %y, i32 2
1273  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1274  %vecext13 = extractelement <4 x i16> %y, i32 3
1275  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1276  ret <8 x i16> %vecinit14
1277}
1278
1279define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1280; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1281; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1282entry:
1283  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1284  ret <4 x i32> %vecinit6
1285}
1286
1287define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1288; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1289; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1290entry:
1291  %vecext = extractelement <2 x i32> %x, i32 0
1292  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1293  %vecext1 = extractelement <2 x i32> %x, i32 1
1294  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1295  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1296  ret <4 x i32> %vecinit6
1297}
1298
1299define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1300; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1301; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1302entry:
1303  %vecext = extractelement <4 x i32> %x, i32 0
1304  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1305  %vecext1 = extractelement <4 x i32> %x, i32 1
1306  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1307  %vecext3 = extractelement <2 x i32> %y, i32 0
1308  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1309  %vecext5 = extractelement <2 x i32> %y, i32 1
1310  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1311  ret <4 x i32> %vecinit6
1312}
1313
1314define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1315; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1316; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1317entry:
1318  %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1319  ret <4 x i32> %vecinit6
1320}
1321
1322define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1323; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1324; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1325entry:
1326  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1327  ret <2 x i64> %vecinit2
1328}
1329
1330define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1331; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1332; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1333entry:
1334  %vecext = extractelement <1 x i64> %x, i32 0
1335  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1336  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1337  ret <2 x i64> %vecinit2
1338}
1339
1340define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1341; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1342; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1343entry:
1344  %vecext = extractelement <2 x i64> %x, i32 0
1345  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1346  %vecext1 = extractelement <1 x i64> %y, i32 0
1347  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1348  ret <2 x i64> %vecinit2
1349}
1350
1351define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1352; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1353; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1354entry:
1355  %vecext = extractelement <1 x i64> %x, i32 0
1356  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1357  %vecext1 = extractelement <1 x i64> %y, i32 0
1358  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1359  ret <2 x i64> %vecinit2
1360}
1361
1362
1363define <4 x i16> @concat_vector_v4i16_const() {
1364; CHECK-LABEL: concat_vector_v4i16_const:
1365; CHECK: movi {{d[0-9]+}}, #0
1366 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1367 ret <4 x i16> %r
1368}
1369
1370define <4 x i16> @concat_vector_v4i16_const_one() {
1371; CHECK-LABEL: concat_vector_v4i16_const_one:
1372; CHECK: movi {{v[0-9]+}}.4h, #0x1
1373 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1374 ret <4 x i16> %r
1375}
1376
1377define <4 x i32> @concat_vector_v4i32_const() {
1378; CHECK-LABEL: concat_vector_v4i32_const:
1379; CHECK: movi {{v[0-9]+}}.2d, #0
1380 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1381 ret <4 x i32> %r
1382}
1383
1384define <8 x i8> @concat_vector_v8i8_const() {
1385; CHECK-LABEL: concat_vector_v8i8_const:
1386; CHECK: movi {{d[0-9]+}}, #0
1387 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1388 ret <8 x i8> %r
1389}
1390
1391define <8 x i16> @concat_vector_v8i16_const() {
1392; CHECK-LABEL: concat_vector_v8i16_const:
1393; CHECK: movi {{v[0-9]+}}.2d, #0
1394 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1395 ret <8 x i16> %r
1396}
1397
1398define <8 x i16> @concat_vector_v8i16_const_one() {
1399; CHECK-LABEL: concat_vector_v8i16_const_one:
1400; CHECK: movi {{v[0-9]+}}.8h, #0x1
1401 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1402 ret <8 x i16> %r
1403}
1404
1405define <16 x i8> @concat_vector_v16i8_const() {
1406; CHECK-LABEL: concat_vector_v16i8_const:
1407; CHECK: movi {{v[0-9]+}}.2d, #0
1408 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1409 ret <16 x i8> %r
1410}
1411
1412define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1413; CHECK-LABEL: concat_vector_v4i16:
1414; CHECK: dup v0.4h, v0.h[0]
1415 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1416 ret <4 x i16> %r
1417}
1418
1419define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1420; CHECK-LABEL: concat_vector_v4i32:
1421; CHECK: dup v0.4s, v0.s[0]
1422 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1423 ret <4 x i32> %r
1424}
1425
1426define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1427; CHECK-LABEL: concat_vector_v8i8:
1428; CHECK: dup v0.8b, v0.b[0]
1429 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1430 ret <8 x i8> %r
1431}
1432
1433define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1434; CHECK-LABEL: concat_vector_v8i16:
1435; CHECK: dup v0.8h, v0.h[0]
1436 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1437 ret <8 x i16> %r
1438}
1439
1440define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1441; CHECK-LABEL: concat_vector_v16i8:
1442; CHECK: dup v0.16b, v0.b[0]
1443 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
1444 ret <16 x i8> %r
1445}
1446