1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE
4
5declare arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %c)
6declare arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %c)
7declare arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %c)
8
9define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) {
10; CHECK-LE-LABEL: shuffle1_v4i32:
11; CHECK-LE:       @ %bb.0: @ %entry
12; CHECK-LE-NEXT:    .save {r7, lr}
13; CHECK-LE-NEXT:    push {r7, lr}
14; CHECK-LE-NEXT:    .vsave {d8, d9}
15; CHECK-LE-NEXT:    vpush {d8, d9}
16; CHECK-LE-NEXT:    .pad #8
17; CHECK-LE-NEXT:    sub sp, #8
18; CHECK-LE-NEXT:    vcmp.i32 eq, q0, zr
19; CHECK-LE-NEXT:    vmov.i32 q0, #0x0
20; CHECK-LE-NEXT:    vpsel q0, q1, q0
21; CHECK-LE-NEXT:    vmov q4, q1
22; CHECK-LE-NEXT:    vstr p0, [sp, #4] @ 4-byte Spill
23; CHECK-LE-NEXT:    bl ext_i32
24; CHECK-LE-NEXT:    vldr p0, [sp, #4] @ 4-byte Reload
25; CHECK-LE-NEXT:    vpsel q0, q4, q0
26; CHECK-LE-NEXT:    add sp, #8
27; CHECK-LE-NEXT:    vpop {d8, d9}
28; CHECK-LE-NEXT:    pop {r7, pc}
29;
30; CHECK-BE-LABEL: shuffle1_v4i32:
31; CHECK-BE:       @ %bb.0: @ %entry
32; CHECK-BE-NEXT:    .save {r7, lr}
33; CHECK-BE-NEXT:    push {r7, lr}
34; CHECK-BE-NEXT:    .vsave {d8, d9}
35; CHECK-BE-NEXT:    vpush {d8, d9}
36; CHECK-BE-NEXT:    .pad #8
37; CHECK-BE-NEXT:    sub sp, #8
38; CHECK-BE-NEXT:    vrev64.32 q4, q1
39; CHECK-BE-NEXT:    vrev64.32 q1, q0
40; CHECK-BE-NEXT:    vcmp.i32 eq, q1, zr
41; CHECK-BE-NEXT:    vmov.i32 q0, #0x0
42; CHECK-BE-NEXT:    vpsel q1, q4, q0
43; CHECK-BE-NEXT:    vstr p0, [sp, #4] @ 4-byte Spill
44; CHECK-BE-NEXT:    vrev64.32 q0, q1
45; CHECK-BE-NEXT:    bl ext_i32
46; CHECK-BE-NEXT:    vldr p0, [sp, #4] @ 4-byte Reload
47; CHECK-BE-NEXT:    vrev64.32 q1, q0
48; CHECK-BE-NEXT:    vpsel q1, q4, q1
49; CHECK-BE-NEXT:    vrev64.32 q0, q1
50; CHECK-BE-NEXT:    add sp, #8
51; CHECK-BE-NEXT:    vpop {d8, d9}
52; CHECK-BE-NEXT:    pop {r7, pc}
53entry:
54  %c = icmp eq <4 x i32> %src, zeroinitializer
55  %s1 = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
56  %ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %s1)
57  %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ext
58  ret <4 x i32> %s
59}
60
61define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) {
62; CHECK-LE-LABEL: shuffle1_v8i16:
63; CHECK-LE:       @ %bb.0: @ %entry
64; CHECK-LE-NEXT:    .save {r7, lr}
65; CHECK-LE-NEXT:    push {r7, lr}
66; CHECK-LE-NEXT:    .vsave {d8, d9}
67; CHECK-LE-NEXT:    vpush {d8, d9}
68; CHECK-LE-NEXT:    .pad #8
69; CHECK-LE-NEXT:    sub sp, #8
70; CHECK-LE-NEXT:    vcmp.i16 eq, q0, zr
71; CHECK-LE-NEXT:    vmov.i32 q0, #0x0
72; CHECK-LE-NEXT:    vpsel q0, q1, q0
73; CHECK-LE-NEXT:    vmov q4, q1
74; CHECK-LE-NEXT:    vstr p0, [sp, #4] @ 4-byte Spill
75; CHECK-LE-NEXT:    bl ext_i16
76; CHECK-LE-NEXT:    vldr p0, [sp, #4] @ 4-byte Reload
77; CHECK-LE-NEXT:    vpsel q0, q4, q0
78; CHECK-LE-NEXT:    add sp, #8
79; CHECK-LE-NEXT:    vpop {d8, d9}
80; CHECK-LE-NEXT:    pop {r7, pc}
81;
82; CHECK-BE-LABEL: shuffle1_v8i16:
83; CHECK-BE:       @ %bb.0: @ %entry
84; CHECK-BE-NEXT:    .save {r7, lr}
85; CHECK-BE-NEXT:    push {r7, lr}
86; CHECK-BE-NEXT:    .vsave {d8, d9}
87; CHECK-BE-NEXT:    vpush {d8, d9}
88; CHECK-BE-NEXT:    .pad #8
89; CHECK-BE-NEXT:    sub sp, #8
90; CHECK-BE-NEXT:    vrev64.16 q4, q1
91; CHECK-BE-NEXT:    vmov.i32 q1, #0x0
92; CHECK-BE-NEXT:    vrev64.16 q2, q0
93; CHECK-BE-NEXT:    vrev32.16 q1, q1
94; CHECK-BE-NEXT:    vcmp.i16 eq, q2, zr
95; CHECK-BE-NEXT:    vpsel q1, q4, q1
96; CHECK-BE-NEXT:    vstr p0, [sp, #4] @ 4-byte Spill
97; CHECK-BE-NEXT:    vrev64.16 q0, q1
98; CHECK-BE-NEXT:    bl ext_i16
99; CHECK-BE-NEXT:    vldr p0, [sp, #4] @ 4-byte Reload
100; CHECK-BE-NEXT:    vrev64.16 q1, q0
101; CHECK-BE-NEXT:    vpsel q1, q4, q1
102; CHECK-BE-NEXT:    vrev64.16 q0, q1
103; CHECK-BE-NEXT:    add sp, #8
104; CHECK-BE-NEXT:    vpop {d8, d9}
105; CHECK-BE-NEXT:    pop {r7, pc}
106entry:
107  %c = icmp eq <8 x i16> %src, zeroinitializer
108  %s1 = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
109  %ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %s1)
110  %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ext
111  ret <8 x i16> %s
112}
113
114define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) {
115; CHECK-LE-LABEL: shuffle1_v16i8:
116; CHECK-LE:       @ %bb.0: @ %entry
117; CHECK-LE-NEXT:    .save {r7, lr}
118; CHECK-LE-NEXT:    push {r7, lr}
119; CHECK-LE-NEXT:    .vsave {d8, d9}
120; CHECK-LE-NEXT:    vpush {d8, d9}
121; CHECK-LE-NEXT:    .pad #8
122; CHECK-LE-NEXT:    sub sp, #8
123; CHECK-LE-NEXT:    vcmp.i8 eq, q0, zr
124; CHECK-LE-NEXT:    vmov.i32 q0, #0x0
125; CHECK-LE-NEXT:    vpsel q0, q1, q0
126; CHECK-LE-NEXT:    vmov q4, q1
127; CHECK-LE-NEXT:    vstr p0, [sp, #4] @ 4-byte Spill
128; CHECK-LE-NEXT:    bl ext_i8
129; CHECK-LE-NEXT:    vldr p0, [sp, #4] @ 4-byte Reload
130; CHECK-LE-NEXT:    vpsel q0, q4, q0
131; CHECK-LE-NEXT:    add sp, #8
132; CHECK-LE-NEXT:    vpop {d8, d9}
133; CHECK-LE-NEXT:    pop {r7, pc}
134;
135; CHECK-BE-LABEL: shuffle1_v16i8:
136; CHECK-BE:       @ %bb.0: @ %entry
137; CHECK-BE-NEXT:    .save {r7, lr}
138; CHECK-BE-NEXT:    push {r7, lr}
139; CHECK-BE-NEXT:    .vsave {d8, d9}
140; CHECK-BE-NEXT:    vpush {d8, d9}
141; CHECK-BE-NEXT:    .pad #8
142; CHECK-BE-NEXT:    sub sp, #8
143; CHECK-BE-NEXT:    vrev64.8 q4, q1
144; CHECK-BE-NEXT:    vmov.i32 q1, #0x0
145; CHECK-BE-NEXT:    vrev64.8 q2, q0
146; CHECK-BE-NEXT:    vrev32.8 q1, q1
147; CHECK-BE-NEXT:    vcmp.i8 eq, q2, zr
148; CHECK-BE-NEXT:    vpsel q1, q4, q1
149; CHECK-BE-NEXT:    vstr p0, [sp, #4] @ 4-byte Spill
150; CHECK-BE-NEXT:    vrev64.8 q0, q1
151; CHECK-BE-NEXT:    bl ext_i8
152; CHECK-BE-NEXT:    vldr p0, [sp, #4] @ 4-byte Reload
153; CHECK-BE-NEXT:    vrev64.8 q1, q0
154; CHECK-BE-NEXT:    vpsel q1, q4, q1
155; CHECK-BE-NEXT:    vrev64.8 q0, q1
156; CHECK-BE-NEXT:    add sp, #8
157; CHECK-BE-NEXT:    vpop {d8, d9}
158; CHECK-BE-NEXT:    pop {r7, pc}
159entry:
160  %c = icmp eq <16 x i8> %src, zeroinitializer
161  %s1 = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
162  %ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %s1)
163  %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ext
164  ret <16 x i8> %s
165}
166