1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck --check-prefix=LE %s
3; RUN: llc -mtriple=thumbebv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck --check-prefix=BE %s
4
5define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_s8(<16 x i8> %a) {
6; LE-LABEL: test_vmovlbq_s8:
7; LE:       @ %bb.0: @ %entry
8; LE-NEXT:    vmovlb.s8 q0, q0
9; LE-NEXT:    bx lr
10;
11; BE-LABEL: test_vmovlbq_s8:
12; BE:       @ %bb.0: @ %entry
13; BE-NEXT:    vrev64.8 q1, q0
14; BE-NEXT:    vmovlb.s8 q1, q1
15; BE-NEXT:    vrev64.16 q0, q1
16; BE-NEXT:    bx lr
17entry:
18  %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
19  %1 = sext <8 x i8> %0 to <8 x i16>
20  ret <8 x i16> %1
21}
22
23define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_s16(<8 x i16> %a) {
24; LE-LABEL: test_vmovlbq_s16:
25; LE:       @ %bb.0: @ %entry
26; LE-NEXT:    vmovlb.s16 q0, q0
27; LE-NEXT:    bx lr
28;
29; BE-LABEL: test_vmovlbq_s16:
30; BE:       @ %bb.0: @ %entry
31; BE-NEXT:    vrev64.16 q1, q0
32; BE-NEXT:    vmovlb.s16 q1, q1
33; BE-NEXT:    vrev64.32 q0, q1
34; BE-NEXT:    bx lr
35entry:
36  %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
37  %1 = sext <4 x i16> %0 to <4 x i32>
38  ret <4 x i32> %1
39}
40
41define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_u8(<16 x i8> %a) {
42; LE-LABEL: test_vmovlbq_u8:
43; LE:       @ %bb.0: @ %entry
44; LE-NEXT:    vmovlb.u8 q0, q0
45; LE-NEXT:    bx lr
46;
47; BE-LABEL: test_vmovlbq_u8:
48; BE:       @ %bb.0: @ %entry
49; BE-NEXT:    vrev64.8 q1, q0
50; BE-NEXT:    vmovlb.u8 q1, q1
51; BE-NEXT:    vrev64.16 q0, q1
52; BE-NEXT:    bx lr
53entry:
54  %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
55  %1 = zext <8 x i8> %0 to <8 x i16>
56  ret <8 x i16> %1
57}
58
59define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_u16(<8 x i16> %a) {
60; LE-LABEL: test_vmovlbq_u16:
61; LE:       @ %bb.0: @ %entry
62; LE-NEXT:    vmovlb.u16 q0, q0
63; LE-NEXT:    bx lr
64;
65; BE-LABEL: test_vmovlbq_u16:
66; BE:       @ %bb.0: @ %entry
67; BE-NEXT:    vrev64.16 q1, q0
68; BE-NEXT:    vmovlb.u16 q1, q1
69; BE-NEXT:    vrev64.32 q0, q1
70; BE-NEXT:    bx lr
71entry:
72  %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
73  %1 = zext <4 x i16> %0 to <4 x i32>
74  ret <4 x i32> %1
75}
76
77define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_s8(<16 x i8> %a) {
78; LE-LABEL: test_vmovltq_s8:
79; LE:       @ %bb.0: @ %entry
80; LE-NEXT:    vmovlt.s8 q0, q0
81; LE-NEXT:    bx lr
82;
83; BE-LABEL: test_vmovltq_s8:
84; BE:       @ %bb.0: @ %entry
85; BE-NEXT:    vrev64.8 q1, q0
86; BE-NEXT:    vmovlt.s8 q1, q1
87; BE-NEXT:    vrev64.16 q0, q1
88; BE-NEXT:    bx lr
89entry:
90  %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
91  %1 = sext <8 x i8> %0 to <8 x i16>
92  ret <8 x i16> %1
93}
94
95define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_s16(<8 x i16> %a) {
96; LE-LABEL: test_vmovltq_s16:
97; LE:       @ %bb.0: @ %entry
98; LE-NEXT:    vmovlt.s16 q0, q0
99; LE-NEXT:    bx lr
100;
101; BE-LABEL: test_vmovltq_s16:
102; BE:       @ %bb.0: @ %entry
103; BE-NEXT:    vrev64.16 q1, q0
104; BE-NEXT:    vmovlt.s16 q1, q1
105; BE-NEXT:    vrev64.32 q0, q1
106; BE-NEXT:    bx lr
107entry:
108  %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
109  %1 = sext <4 x i16> %0 to <4 x i32>
110  ret <4 x i32> %1
111}
112
113define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_u8(<16 x i8> %a) {
114; LE-LABEL: test_vmovltq_u8:
115; LE:       @ %bb.0: @ %entry
116; LE-NEXT:    vmovlt.u8 q0, q0
117; LE-NEXT:    bx lr
118;
119; BE-LABEL: test_vmovltq_u8:
120; BE:       @ %bb.0: @ %entry
121; BE-NEXT:    vrev64.8 q1, q0
122; BE-NEXT:    vmovlt.u8 q1, q1
123; BE-NEXT:    vrev64.16 q0, q1
124; BE-NEXT:    bx lr
125entry:
126  %0 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
127  %1 = zext <8 x i8> %0 to <8 x i16>
128  ret <8 x i16> %1
129}
130
131define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_u16(<8 x i16> %a) {
132; LE-LABEL: test_vmovltq_u16:
133; LE:       @ %bb.0: @ %entry
134; LE-NEXT:    vmovlt.u16 q0, q0
135; LE-NEXT:    bx lr
136;
137; BE-LABEL: test_vmovltq_u16:
138; BE:       @ %bb.0: @ %entry
139; BE-NEXT:    vrev64.16 q1, q0
140; BE-NEXT:    vmovlt.u16 q1, q1
141; BE-NEXT:    vrev64.32 q0, q1
142; BE-NEXT:    bx lr
143entry:
144  %0 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
145  %1 = zext <4 x i16> %0 to <4 x i32>
146  ret <4 x i32> %1
147}
148
149define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
150; LE-LABEL: test_vmovlbq_m_s8:
151; LE:       @ %bb.0: @ %entry
152; LE-NEXT:    vmsr p0, r0
153; LE-NEXT:    vpst
154; LE-NEXT:    vmovlbt.s8 q0, q1
155; LE-NEXT:    bx lr
156;
157; BE-LABEL: test_vmovlbq_m_s8:
158; BE:       @ %bb.0: @ %entry
159; BE-NEXT:    vrev64.16 q2, q0
160; BE-NEXT:    vrev64.8 q0, q1
161; BE-NEXT:    vmsr p0, r0
162; BE-NEXT:    vpst
163; BE-NEXT:    vmovlbt.s8 q2, q0
164; BE-NEXT:    vrev64.16 q0, q2
165; BE-NEXT:    bx lr
166entry:
167  %0 = zext i16 %p to i32
168  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
169  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
170  ret <8 x i16> %2
171}
172
173define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
174; LE-LABEL: test_vmovlbq_m_s16:
175; LE:       @ %bb.0: @ %entry
176; LE-NEXT:    vmsr p0, r0
177; LE-NEXT:    vpst
178; LE-NEXT:    vmovlbt.s16 q0, q1
179; LE-NEXT:    bx lr
180;
181; BE-LABEL: test_vmovlbq_m_s16:
182; BE:       @ %bb.0: @ %entry
183; BE-NEXT:    vrev64.32 q2, q0
184; BE-NEXT:    vrev64.16 q0, q1
185; BE-NEXT:    vmsr p0, r0
186; BE-NEXT:    vpst
187; BE-NEXT:    vmovlbt.s16 q2, q0
188; BE-NEXT:    vrev64.32 q0, q2
189; BE-NEXT:    bx lr
190entry:
191  %0 = zext i16 %p to i32
192  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
193  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
194  ret <4 x i32> %2
195}
196
197define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
198; LE-LABEL: test_vmovlbq_m_u8:
199; LE:       @ %bb.0: @ %entry
200; LE-NEXT:    vmsr p0, r0
201; LE-NEXT:    vpst
202; LE-NEXT:    vmovlbt.u8 q0, q1
203; LE-NEXT:    bx lr
204;
205; BE-LABEL: test_vmovlbq_m_u8:
206; BE:       @ %bb.0: @ %entry
207; BE-NEXT:    vrev64.16 q2, q0
208; BE-NEXT:    vrev64.8 q0, q1
209; BE-NEXT:    vmsr p0, r0
210; BE-NEXT:    vpst
211; BE-NEXT:    vmovlbt.u8 q2, q0
212; BE-NEXT:    vrev64.16 q0, q2
213; BE-NEXT:    bx lr
214entry:
215  %0 = zext i16 %p to i32
216  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
217  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
218  ret <8 x i16> %2
219}
220
221define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
222; LE-LABEL: test_vmovlbq_m_u16:
223; LE:       @ %bb.0: @ %entry
224; LE-NEXT:    vmsr p0, r0
225; LE-NEXT:    vpst
226; LE-NEXT:    vmovlbt.u16 q0, q1
227; LE-NEXT:    bx lr
228;
229; BE-LABEL: test_vmovlbq_m_u16:
230; BE:       @ %bb.0: @ %entry
231; BE-NEXT:    vrev64.32 q2, q0
232; BE-NEXT:    vrev64.16 q0, q1
233; BE-NEXT:    vmsr p0, r0
234; BE-NEXT:    vpst
235; BE-NEXT:    vmovlbt.u16 q2, q0
236; BE-NEXT:    vrev64.32 q0, q2
237; BE-NEXT:    bx lr
238entry:
239  %0 = zext i16 %p to i32
240  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
241  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
242  ret <4 x i32> %2
243}
244
245define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
246; LE-LABEL: test_vmovltq_m_s8:
247; LE:       @ %bb.0: @ %entry
248; LE-NEXT:    vmsr p0, r0
249; LE-NEXT:    vpst
250; LE-NEXT:    vmovltt.s8 q0, q1
251; LE-NEXT:    bx lr
252;
253; BE-LABEL: test_vmovltq_m_s8:
254; BE:       @ %bb.0: @ %entry
255; BE-NEXT:    vrev64.16 q2, q0
256; BE-NEXT:    vrev64.8 q0, q1
257; BE-NEXT:    vmsr p0, r0
258; BE-NEXT:    vpst
259; BE-NEXT:    vmovltt.s8 q2, q0
260; BE-NEXT:    vrev64.16 q0, q2
261; BE-NEXT:    bx lr
262entry:
263  %0 = zext i16 %p to i32
264  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
265  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
266  ret <8 x i16> %2
267}
268
269define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
270; LE-LABEL: test_vmovltq_m_s16:
271; LE:       @ %bb.0: @ %entry
272; LE-NEXT:    vmsr p0, r0
273; LE-NEXT:    vpst
274; LE-NEXT:    vmovltt.s16 q0, q1
275; LE-NEXT:    bx lr
276;
277; BE-LABEL: test_vmovltq_m_s16:
278; BE:       @ %bb.0: @ %entry
279; BE-NEXT:    vrev64.32 q2, q0
280; BE-NEXT:    vrev64.16 q0, q1
281; BE-NEXT:    vmsr p0, r0
282; BE-NEXT:    vpst
283; BE-NEXT:    vmovltt.s16 q2, q0
284; BE-NEXT:    vrev64.32 q0, q2
285; BE-NEXT:    bx lr
286entry:
287  %0 = zext i16 %p to i32
288  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
289  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
290  ret <4 x i32> %2
291}
292
293define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) {
294; LE-LABEL: test_vmovltq_m_u8:
295; LE:       @ %bb.0: @ %entry
296; LE-NEXT:    vmsr p0, r0
297; LE-NEXT:    vpst
298; LE-NEXT:    vmovltt.u8 q0, q1
299; LE-NEXT:    bx lr
300;
301; BE-LABEL: test_vmovltq_m_u8:
302; BE:       @ %bb.0: @ %entry
303; BE-NEXT:    vrev64.16 q2, q0
304; BE-NEXT:    vrev64.8 q0, q1
305; BE-NEXT:    vmsr p0, r0
306; BE-NEXT:    vpst
307; BE-NEXT:    vmovltt.u8 q2, q0
308; BE-NEXT:    vrev64.16 q0, q2
309; BE-NEXT:    bx lr
310entry:
311  %0 = zext i16 %p to i32
312  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
313  %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
314  ret <8 x i16> %2
315}
316
317define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) {
318; LE-LABEL: test_vmovltq_m_u16:
319; LE:       @ %bb.0: @ %entry
320; LE-NEXT:    vmsr p0, r0
321; LE-NEXT:    vpst
322; LE-NEXT:    vmovltt.u16 q0, q1
323; LE-NEXT:    bx lr
324;
325; BE-LABEL: test_vmovltq_m_u16:
326; BE:       @ %bb.0: @ %entry
327; BE-NEXT:    vrev64.32 q2, q0
328; BE-NEXT:    vrev64.16 q0, q1
329; BE-NEXT:    vmsr p0, r0
330; BE-NEXT:    vpst
331; BE-NEXT:    vmovltt.u16 q2, q0
332; BE-NEXT:    vrev64.32 q0, q2
333; BE-NEXT:    bx lr
334entry:
335  %0 = zext i16 %p to i32
336  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
337  %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
338  ret <4 x i32> %2
339}
340
341declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
342declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
343declare <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, <8 x i1>, <8 x i16>)
344declare <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, <4 x i1>, <4 x i32>)
345