1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc i32 @test_vaddvq_s8(<16 x i8> %a) {
5; CHECK-LABEL: test_vaddvq_s8:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vaddv.s8 r0, q0
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %a, i32 0)
11  ret i32 %0
12}
13
14define arm_aapcs_vfpcc i32 @test_vaddvq_s16(<8 x i16> %a) {
15; CHECK-LABEL: test_vaddvq_s16:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vaddv.s16 r0, q0
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %a, i32 0)
21  ret i32 %0
22}
23
24define arm_aapcs_vfpcc i32 @test_vaddvq_s32(<4 x i32> %a) {
25; CHECK-LABEL: test_vaddvq_s32:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vaddv.s32 r0, q0
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %a, i32 0)
31  ret i32 %0
32}
33
34define arm_aapcs_vfpcc i32 @test_vaddvq_u8(<16 x i8> %a) {
35; CHECK-LABEL: test_vaddvq_u8:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vaddv.u8 r0, q0
38; CHECK-NEXT:    bx lr
39entry:
40  %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %a, i32 1)
41  ret i32 %0
42}
43
44define arm_aapcs_vfpcc i32 @test_vaddvq_u16(<8 x i16> %a) {
45; CHECK-LABEL: test_vaddvq_u16:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vaddv.u16 r0, q0
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %a, i32 1)
51  ret i32 %0
52}
53
54define arm_aapcs_vfpcc i32 @test_vaddvq_u32(<4 x i32> %a) {
55; CHECK-LABEL: test_vaddvq_u32:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vaddv.u32 r0, q0
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %a, i32 1)
61  ret i32 %0
62}
63
64define arm_aapcs_vfpcc i32 @test_vaddvaq_s8(i32 %a, <16 x i8> %b) {
65; CHECK-LABEL: test_vaddvaq_s8:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vaddva.s8 r0, q0
68; CHECK-NEXT:    bx lr
69entry:
70  %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %b, i32 0)
71  %1 = add i32 %0, %a
72  ret i32 %1
73}
74
75define arm_aapcs_vfpcc i32 @test_vaddvaq_s16(i32 %a, <8 x i16> %b) {
76; CHECK-LABEL: test_vaddvaq_s16:
77; CHECK:       @ %bb.0: @ %entry
78; CHECK-NEXT:    vaddva.s16 r0, q0
79; CHECK-NEXT:    bx lr
80entry:
81  %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %b, i32 0)
82  %1 = add i32 %0, %a
83  ret i32 %1
84}
85
86define arm_aapcs_vfpcc i32 @test_vaddvaq_s32(i32 %a, <4 x i32> %b) {
87; CHECK-LABEL: test_vaddvaq_s32:
88; CHECK:       @ %bb.0: @ %entry
89; CHECK-NEXT:    vaddva.s32 r0, q0
90; CHECK-NEXT:    bx lr
91entry:
92  %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %b, i32 0)
93  %1 = add i32 %0, %a
94  ret i32 %1
95}
96
97define arm_aapcs_vfpcc i32 @test_vaddvaq_u8(i32 %a, <16 x i8> %b) {
98; CHECK-LABEL: test_vaddvaq_u8:
99; CHECK:       @ %bb.0: @ %entry
100; CHECK-NEXT:    vaddva.u8 r0, q0
101; CHECK-NEXT:    bx lr
102entry:
103  %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %b, i32 1)
104  %1 = add i32 %0, %a
105  ret i32 %1
106}
107
108define arm_aapcs_vfpcc i32 @test_vaddvaq_u16(i32 %a, <8 x i16> %b) {
109; CHECK-LABEL: test_vaddvaq_u16:
110; CHECK:       @ %bb.0: @ %entry
111; CHECK-NEXT:    vaddva.u16 r0, q0
112; CHECK-NEXT:    bx lr
113entry:
114  %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %b, i32 1)
115  %1 = add i32 %0, %a
116  ret i32 %1
117}
118
119define arm_aapcs_vfpcc i32 @test_vaddvaq_u32(i32 %a, <4 x i32> %b) {
120; CHECK-LABEL: test_vaddvaq_u32:
121; CHECK:       @ %bb.0: @ %entry
122; CHECK-NEXT:    vaddva.u32 r0, q0
123; CHECK-NEXT:    bx lr
124entry:
125  %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %b, i32 1)
126  %1 = add i32 %0, %a
127  ret i32 %1
128}
129
130define arm_aapcs_vfpcc i32 @test_vaddvq_p_s8(<16 x i8> %a, i16 zeroext %p) {
131; CHECK-LABEL: test_vaddvq_p_s8:
132; CHECK:       @ %bb.0: @ %entry
133; CHECK-NEXT:    vmsr p0, r0
134; CHECK-NEXT:    vpst
135; CHECK-NEXT:    vaddvt.s8 r0, q0
136; CHECK-NEXT:    bx lr
137entry:
138  %0 = zext i16 %p to i32
139  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
140  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %a, i32 0, <16 x i1> %1)
141  ret i32 %2
142}
143
144define arm_aapcs_vfpcc i32 @test_vaddvq_p_s16(<8 x i16> %a, i16 zeroext %p) {
145; CHECK-LABEL: test_vaddvq_p_s16:
146; CHECK:       @ %bb.0: @ %entry
147; CHECK-NEXT:    vmsr p0, r0
148; CHECK-NEXT:    vpst
149; CHECK-NEXT:    vaddvt.s16 r0, q0
150; CHECK-NEXT:    bx lr
151entry:
152  %0 = zext i16 %p to i32
153  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
154  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %a, i32 0, <8 x i1> %1)
155  ret i32 %2
156}
157
158define arm_aapcs_vfpcc i32 @test_vaddvq_p_s32(<4 x i32> %a, i16 zeroext %p) {
159; CHECK-LABEL: test_vaddvq_p_s32:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vmsr p0, r0
162; CHECK-NEXT:    vpst
163; CHECK-NEXT:    vaddvt.s32 r0, q0
164; CHECK-NEXT:    bx lr
165entry:
166  %0 = zext i16 %p to i32
167  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
168  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1)
169  ret i32 %2
170}
171
172define arm_aapcs_vfpcc i32 @test_vaddvq_p_u8(<16 x i8> %a, i16 zeroext %p) {
173; CHECK-LABEL: test_vaddvq_p_u8:
174; CHECK:       @ %bb.0: @ %entry
175; CHECK-NEXT:    vmsr p0, r0
176; CHECK-NEXT:    vpst
177; CHECK-NEXT:    vaddvt.u8 r0, q0
178; CHECK-NEXT:    bx lr
179entry:
180  %0 = zext i16 %p to i32
181  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
182  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1)
183  ret i32 %2
184}
185
186define arm_aapcs_vfpcc i32 @test_vaddvq_p_u16(<8 x i16> %a, i16 zeroext %p) {
187; CHECK-LABEL: test_vaddvq_p_u16:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vmsr p0, r0
190; CHECK-NEXT:    vpst
191; CHECK-NEXT:    vaddvt.u16 r0, q0
192; CHECK-NEXT:    bx lr
193entry:
194  %0 = zext i16 %p to i32
195  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
196  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %a, i32 1, <8 x i1> %1)
197  ret i32 %2
198}
199
200define arm_aapcs_vfpcc i32 @test_vaddvq_p_u32(<4 x i32> %a, i16 zeroext %p) {
201; CHECK-LABEL: test_vaddvq_p_u32:
202; CHECK:       @ %bb.0: @ %entry
203; CHECK-NEXT:    vmsr p0, r0
204; CHECK-NEXT:    vpst
205; CHECK-NEXT:    vaddvt.u32 r0, q0
206; CHECK-NEXT:    bx lr
207entry:
208  %0 = zext i16 %p to i32
209  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
210  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1)
211  ret i32 %2
212}
213
214define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s8(i32 %a, <16 x i8> %b, i16 zeroext %p) {
215; CHECK-LABEL: test_vaddvaq_p_s8:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vmsr p0, r1
218; CHECK-NEXT:    vpst
219; CHECK-NEXT:    vaddvat.s8 r0, q0
220; CHECK-NEXT:    bx lr
221entry:
222  %0 = zext i16 %p to i32
223  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
224  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %b, i32 0, <16 x i1> %1)
225  %3 = add i32 %2, %a
226  ret i32 %3
227}
228
229define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s16(i32 %a, <8 x i16> %b, i16 zeroext %p) {
230; CHECK-LABEL: test_vaddvaq_p_s16:
231; CHECK:       @ %bb.0: @ %entry
232; CHECK-NEXT:    vmsr p0, r1
233; CHECK-NEXT:    vpst
234; CHECK-NEXT:    vaddvat.s16 r0, q0
235; CHECK-NEXT:    bx lr
236entry:
237  %0 = zext i16 %p to i32
238  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
239  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %b, i32 0, <8 x i1> %1)
240  %3 = add i32 %2, %a
241  ret i32 %3
242}
243
244define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
245; CHECK-LABEL: test_vaddvaq_p_s32:
246; CHECK:       @ %bb.0: @ %entry
247; CHECK-NEXT:    vmsr p0, r1
248; CHECK-NEXT:    vpst
249; CHECK-NEXT:    vaddvat.s32 r0, q0
250; CHECK-NEXT:    bx lr
251entry:
252  %0 = zext i16 %p to i32
253  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
254  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %b, i32 0, <4 x i1> %1)
255  %3 = add i32 %2, %a
256  ret i32 %3
257}
258
259define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u8(i32 %a, <16 x i8> %b, i16 zeroext %p) {
260; CHECK-LABEL: test_vaddvaq_p_u8:
261; CHECK:       @ %bb.0: @ %entry
262; CHECK-NEXT:    vmsr p0, r1
263; CHECK-NEXT:    vpst
264; CHECK-NEXT:    vaddvat.u8 r0, q0
265; CHECK-NEXT:    bx lr
266entry:
267  %0 = zext i16 %p to i32
268  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
269  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %b, i32 1, <16 x i1> %1)
270  %3 = add i32 %2, %a
271  ret i32 %3
272}
273
274define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u16(i32 %a, <8 x i16> %b, i16 zeroext %p) {
275; CHECK-LABEL: test_vaddvaq_p_u16:
276; CHECK:       @ %bb.0: @ %entry
277; CHECK-NEXT:    vmsr p0, r1
278; CHECK-NEXT:    vpst
279; CHECK-NEXT:    vaddvat.u16 r0, q0
280; CHECK-NEXT:    bx lr
281entry:
282  %0 = zext i16 %p to i32
283  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
284  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %b, i32 1, <8 x i1> %1)
285  %3 = add i32 %2, %a
286  ret i32 %3
287}
288
289define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
290; CHECK-LABEL: test_vaddvaq_p_u32:
291; CHECK:       @ %bb.0: @ %entry
292; CHECK-NEXT:    vmsr p0, r1
293; CHECK-NEXT:    vpst
294; CHECK-NEXT:    vaddvat.u32 r0, q0
295; CHECK-NEXT:    bx lr
296entry:
297  %0 = zext i16 %p to i32
298  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
299  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %b, i32 1, <4 x i1> %1)
300  %3 = add i32 %2, %a
301  ret i32 %3
302}
303
304define arm_aapcs_vfpcc i64 @test_vaddlvq_s32(<4 x i32> %a) {
305; CHECK-LABEL: test_vaddlvq_s32:
306; CHECK:       @ %bb.0: @ %entry
307; CHECK-NEXT:    vaddlv.s32 r0, r1, q0
308; CHECK-NEXT:    bx lr
309entry:
310  %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %a, i32 0)
311  ret i64 %0
312}
313
314define arm_aapcs_vfpcc i64 @test_vaddlvq_u32(<4 x i32> %a) {
315; CHECK-LABEL: test_vaddlvq_u32:
316; CHECK:       @ %bb.0: @ %entry
317; CHECK-NEXT:    vaddlv.u32 r0, r1, q0
318; CHECK-NEXT:    bx lr
319entry:
320  %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %a, i32 1)
321  ret i64 %0
322}
323
324define arm_aapcs_vfpcc i64 @test_vaddlvaq_s32(i64 %a, <4 x i32> %b) {
325; CHECK-LABEL: test_vaddlvaq_s32:
326; CHECK:       @ %bb.0: @ %entry
327; CHECK-NEXT:    vaddlva.s32 r0, r1, q0
328; CHECK-NEXT:    bx lr
329entry:
330  %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %b, i32 0)
331  %1 = add i64 %0, %a
332  ret i64 %1
333}
334
335define arm_aapcs_vfpcc i64 @test_vaddlvaq_u32(i64 %a, <4 x i32> %b) {
336; CHECK-LABEL: test_vaddlvaq_u32:
337; CHECK:       @ %bb.0: @ %entry
338; CHECK-NEXT:    vaddlva.u32 r0, r1, q0
339; CHECK-NEXT:    bx lr
340entry:
341  %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %b, i32 1)
342  %1 = add i64 %0, %a
343  ret i64 %1
344}
345
346define arm_aapcs_vfpcc i64 @test_vaddlvq_p_s32(<4 x i32> %a, i16 zeroext %p) {
347; CHECK-LABEL: test_vaddlvq_p_s32:
348; CHECK:       @ %bb.0: @ %entry
349; CHECK-NEXT:    vmsr p0, r0
350; CHECK-NEXT:    vpst
351; CHECK-NEXT:    vaddlvt.s32 r0, r1, q0
352; CHECK-NEXT:    bx lr
353entry:
354  %0 = zext i16 %p to i32
355  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
356  %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1)
357  ret i64 %2
358}
359
360define arm_aapcs_vfpcc i64 @test_vaddlvq_p_u32(<4 x i32> %a, i16 zeroext %p) {
361; CHECK-LABEL: test_vaddlvq_p_u32:
362; CHECK:       @ %bb.0: @ %entry
363; CHECK-NEXT:    vmsr p0, r0
364; CHECK-NEXT:    vpst
365; CHECK-NEXT:    vaddlvt.u32 r0, r1, q0
366; CHECK-NEXT:    bx lr
367entry:
368  %0 = zext i16 %p to i32
369  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
370  %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1)
371  ret i64 %2
372}
373
374define arm_aapcs_vfpcc i64 @test_vaddlvaq_p_s32(i64 %a, <4 x i32> %b, i16 zeroext %p) {
375; CHECK-LABEL: test_vaddlvaq_p_s32:
376; CHECK:       @ %bb.0: @ %entry
377; CHECK-NEXT:    vmsr p0, r2
378; CHECK-NEXT:    vpst
379; CHECK-NEXT:    vaddlvat.s32 r0, r1, q0
380; CHECK-NEXT:    bx lr
381entry:
382  %0 = zext i16 %p to i32
383  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
384  %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %b, i32 0, <4 x i1> %1)
385  %3 = add i64 %2, %a
386  ret i64 %3
387}
388
389define arm_aapcs_vfpcc i64 @test_vaddlvaq_p_u32(i64 %a, <4 x i32> %b, i16 zeroext %p) {
390; CHECK-LABEL: test_vaddlvaq_p_u32:
391; CHECK:       @ %bb.0: @ %entry
392; CHECK-NEXT:    vmsr p0, r2
393; CHECK-NEXT:    vpst
394; CHECK-NEXT:    vaddlvat.u32 r0, r1, q0
395; CHECK-NEXT:    bx lr
396entry:
397  %0 = zext i16 %p to i32
398  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
399  %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %b, i32 1, <4 x i1> %1)
400  %3 = add i64 %2, %a
401  ret i64 %3
402}
403
404declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
405declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
406declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
407
408declare i32 @llvm.arm.mve.addv.v16i8(<16 x i8>, i32)
409declare i32 @llvm.arm.mve.addv.v8i16(<8 x i16>, i32)
410declare i32 @llvm.arm.mve.addv.v4i32(<4 x i32>, i32)
411declare i64 @llvm.arm.mve.addlv.v4i32(<4 x i32>, i32)
412
413declare i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>)
414declare i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>)
415declare i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
416declare i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
417