1; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
2
3target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
4
5; Test the (concat_vectors (bitcast (scalar)), ..) pattern.
6
7define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 {
8entry:
9; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup:
10; CHECK-NEXT: dup.4h v0, w0
11; CHECK-NEXT: ret
12  %t = trunc i32 %x to i16
13  %0 = bitcast i16 %t to <2 x i8>
14  %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
15  ret <8 x i8> %1
16}
17
18define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 {
19entry:
20; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup:
21; CHECK-NEXT: dup.2s v0, w0
22; CHECK-NEXT: ret
23  %0 = bitcast i32 %x to <4 x i8>
24  %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
25  ret <8 x i8> %1
26}
27
28define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 {
29entry:
30; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup:
31; CHECK-NEXT: dup.4s v0, w0
32; CHECK-NEXT: ret
33  %0 = bitcast i32 %x to <2 x i16>
34  %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
35  ret <8 x i16> %1
36}
37
38define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 {
39entry:
40; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8:
41; CHECK-NEXT: ins.h v0[0], w0
42; CHECK-NEXT: ins.h v0[1], w1
43; CHECK-NEXT: ins.h v0[3], w1
44; CHECK-NEXT: ret
45  %tx = trunc i32 %x to i16
46  %ty = trunc i32 %y to i16
47  %bx = bitcast i16 %tx to <2 x i8>
48  %by = bitcast i16 %ty to <2 x i8>
49  %r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3>
50  ret <8 x i8> %r
51}
52
53define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y) #0 {
54entry:
55; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup:
56; CHECK-NEXT: fmov s0, w1
57; CHECK-NEXT: ins.s v0[1], w0
58; CHECK-NEXT: ret
59  %bx = bitcast i32 %x to <4 x i8>
60  %by = bitcast i32 %y to <4 x i8>
61  %r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
62  ret <8 x i8> %r
63}
64
65define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32 %y) #0 {
66entry:
67; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup:
68; CHECK-NEXT: fmov s0, w0
69; CHECK-NEXT: ins.s v0[1], w1
70; CHECK-NEXT: ins.s v0[2], w1
71; CHECK-NEXT: ins.s v0[3], w0
72; CHECK-NEXT: ret
73  %bx = bitcast i32 %x to <2 x i16>
74  %by = bitcast i32 %y to <2 x i16>
75  %r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1>
76  ret <8 x i16> %r
77}
78
79; Also make sure we minimize bitcasts.
80
81; This is a pretty artificial testcase: make sure we bitcast to floating-point
82; if any of the scalars is floating-point.
83define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy, i32 %x, half %y) #0 {
84entry:
85; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8:
86; CHECK-NEXT: fmov s[[X:[0-9]+]], w0
87; CHECK-NEXT: ins.h v0[0], v[[X]][0]
88; CHECK-NEXT: ins.h v0[1], v1[0]
89; CHECK-NEXT: ins.h v0[2], v[[X]][0]
90; CHECK-NEXT: ins.h v0[3], v1[0]
91; CHECK-NEXT: ret
92  %t = trunc i32 %x to i16
93  %0 = bitcast i16 %t to <2 x i8>
94  %y0 = bitcast half %y to <2 x i8>
95  %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
96  ret <8 x i8> %1
97}
98
99define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy, half %x, half %y) #0 {
100entry:
101; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8:
102; CHECK-NEXT: ins.h v0[0], v1[0]
103; CHECK-NEXT: ins.h v0[1], v2[0]
104; CHECK-NEXT: ins.h v0[2], v1[0]
105; CHECK-NEXT: ins.h v0[3], v2[0]
106; CHECK-NEXT: ret
107  %0 = bitcast half %x to <2 x i8>
108  %y0 = bitcast half %y to <2 x i8>
109  %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
110  %2 = bitcast <8 x i8> %1 to <2 x float>
111  ret <2 x float> %2
112}
113
114define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0 {
115entry:
116; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup:
117; CHECK-NEXT: dup.4s v0, v0[0]
118; CHECK-NEXT: ret
119  %0 = bitcast float %x to <2 x i16>
120  %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
121  %2 = bitcast <8 x i16> %1 to <4 x float>
122  ret <4 x float> %2
123}
124
125attributes #0 = { nounwind }
126