1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @test_vqdmladhq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) {
5; CHECK-LABEL: test_vqdmladhq_s8:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vqdmladh.s8 q0, q1, q2
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0)
11  ret <16 x i8> %0
12}
13
14define arm_aapcs_vfpcc <8 x i16> @test_vqdmladhq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) {
15; CHECK-LABEL: test_vqdmladhq_s16:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vqdmladh.s16 q0, q1, q2
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0)
21  ret <8 x i16> %0
22}
23
24define arm_aapcs_vfpcc <4 x i32> @test_vqdmladhq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) {
25; CHECK-LABEL: test_vqdmladhq_s32:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vqdmladh.s32 q0, q1, q2
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0)
31  ret <4 x i32> %0
32}
33
34define arm_aapcs_vfpcc <16 x i8> @test_vqdmladhxq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) {
35; CHECK-LABEL: test_vqdmladhxq_s8:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vqdmladhx.s8 q0, q1, q2
38; CHECK-NEXT:    bx lr
39entry:
40  %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 0)
41  ret <16 x i8> %0
42}
43
44define arm_aapcs_vfpcc <8 x i16> @test_vqdmladhxq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) {
45; CHECK-LABEL: test_vqdmladhxq_s16:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vqdmladhx.s16 q0, q1, q2
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 0)
51  ret <8 x i16> %0
52}
53
54define arm_aapcs_vfpcc <4 x i32> @test_vqdmladhxq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) {
55; CHECK-LABEL: test_vqdmladhxq_s32:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vqdmladhx.s32 q0, q1, q2
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 0)
61  ret <4 x i32> %0
62}
63
64define arm_aapcs_vfpcc <16 x i8> @test_vqdmlsdhq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) {
65; CHECK-LABEL: test_vqdmlsdhq_s8:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vqdmlsdh.s8 q0, q1, q2
68; CHECK-NEXT:    bx lr
69entry:
70  %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1)
71  ret <16 x i8> %0
72}
73
74define arm_aapcs_vfpcc <8 x i16> @test_vqdmlsdhq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) {
75; CHECK-LABEL: test_vqdmlsdhq_s16:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vqdmlsdh.s16 q0, q1, q2
78; CHECK-NEXT:    bx lr
79entry:
80  %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1)
81  ret <8 x i16> %0
82}
83
84define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsdhq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) {
85; CHECK-LABEL: test_vqdmlsdhq_s32:
86; CHECK:       @ %bb.0: @ %entry
87; CHECK-NEXT:    vqdmlsdh.s32 q0, q1, q2
88; CHECK-NEXT:    bx lr
89entry:
90  %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1)
91  ret <4 x i32> %0
92}
93
94define arm_aapcs_vfpcc <16 x i8> @test_vqdmlsdhxq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) {
95; CHECK-LABEL: test_vqdmlsdhxq_s8:
96; CHECK:       @ %bb.0: @ %entry
97; CHECK-NEXT:    vqdmlsdhx.s8 q0, q1, q2
98; CHECK-NEXT:    bx lr
99entry:
100  %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 1)
101  ret <16 x i8> %0
102}
103
104define arm_aapcs_vfpcc <8 x i16> @test_vqdmlsdhxq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) {
105; CHECK-LABEL: test_vqdmlsdhxq_s16:
106; CHECK:       @ %bb.0: @ %entry
107; CHECK-NEXT:    vqdmlsdhx.s16 q0, q1, q2
108; CHECK-NEXT:    bx lr
109entry:
110  %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 1)
111  ret <8 x i16> %0
112}
113
114define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsdhxq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) {
115; CHECK-LABEL: test_vqdmlsdhxq_s32:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vqdmlsdhx.s32 q0, q1, q2
118; CHECK-NEXT:    bx lr
119entry:
120  %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 1)
121  ret <4 x i32> %0
122}
123
124define arm_aapcs_vfpcc <16 x i8> @test_vqrdmladhq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) {
125; CHECK-LABEL: test_vqrdmladhq_s8:
126; CHECK:       @ %bb.0: @ %entry
127; CHECK-NEXT:    vqrdmladh.s8 q0, q1, q2
128; CHECK-NEXT:    bx lr
129entry:
130  %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0)
131  ret <16 x i8> %0
132}
133
134define arm_aapcs_vfpcc <8 x i16> @test_vqrdmladhq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) {
135; CHECK-LABEL: test_vqrdmladhq_s16:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vqrdmladh.s16 q0, q1, q2
138; CHECK-NEXT:    bx lr
139entry:
140  %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0)
141  ret <8 x i16> %0
142}
143
144define arm_aapcs_vfpcc <4 x i32> @test_vqrdmladhq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) {
145; CHECK-LABEL: test_vqrdmladhq_s32:
146; CHECK:       @ %bb.0: @ %entry
147; CHECK-NEXT:    vqrdmladh.s32 q0, q1, q2
148; CHECK-NEXT:    bx lr
149entry:
150  %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0)
151  ret <4 x i32> %0
152}
153
154define arm_aapcs_vfpcc <16 x i8> @test_vqrdmladhxq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) {
155; CHECK-LABEL: test_vqrdmladhxq_s8:
156; CHECK:       @ %bb.0: @ %entry
157; CHECK-NEXT:    vqrdmladhx.s8 q0, q1, q2
158; CHECK-NEXT:    bx lr
159entry:
160  %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 0)
161  ret <16 x i8> %0
162}
163
164define arm_aapcs_vfpcc <8 x i16> @test_vqrdmladhxq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) {
165; CHECK-LABEL: test_vqrdmladhxq_s16:
166; CHECK:       @ %bb.0: @ %entry
167; CHECK-NEXT:    vqrdmladhx.s16 q0, q1, q2
168; CHECK-NEXT:    bx lr
169entry:
170  %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 0)
171  ret <8 x i16> %0
172}
173
174define arm_aapcs_vfpcc <4 x i32> @test_vqrdmladhxq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) {
175; CHECK-LABEL: test_vqrdmladhxq_s32:
176; CHECK:       @ %bb.0: @ %entry
177; CHECK-NEXT:    vqrdmladhx.s32 q0, q1, q2
178; CHECK-NEXT:    bx lr
179entry:
180  %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 0)
181  ret <4 x i32> %0
182}
183
184define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlsdhq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) {
185; CHECK-LABEL: test_vqrdmlsdhq_s8:
186; CHECK:       @ %bb.0: @ %entry
187; CHECK-NEXT:    vqrdmlsdh.s8 q0, q1, q2
188; CHECK-NEXT:    bx lr
189entry:
190  %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1)
191  ret <16 x i8> %0
192}
193
194define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlsdhq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) {
195; CHECK-LABEL: test_vqrdmlsdhq_s16:
196; CHECK:       @ %bb.0: @ %entry
197; CHECK-NEXT:    vqrdmlsdh.s16 q0, q1, q2
198; CHECK-NEXT:    bx lr
199entry:
200  %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1)
201  ret <8 x i16> %0
202}
203
204define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlsdhq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) {
205; CHECK-LABEL: test_vqrdmlsdhq_s32:
206; CHECK:       @ %bb.0: @ %entry
207; CHECK-NEXT:    vqrdmlsdh.s32 q0, q1, q2
208; CHECK-NEXT:    bx lr
209entry:
210  %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1)
211  ret <4 x i32> %0
212}
213
214define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlsdhxq_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b) {
215; CHECK-LABEL: test_vqrdmlsdhxq_s8:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vqrdmlsdhx.s8 q0, q1, q2
218; CHECK-NEXT:    bx lr
219entry:
220  %0 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 1)
221  ret <16 x i8> %0
222}
223
224define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlsdhxq_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b) {
225; CHECK-LABEL: test_vqrdmlsdhxq_s16:
226; CHECK:       @ %bb.0: @ %entry
227; CHECK-NEXT:    vqrdmlsdhx.s16 q0, q1, q2
228; CHECK-NEXT:    bx lr
229entry:
230  %0 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 1)
231  ret <8 x i16> %0
232}
233
234define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlsdhxq_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b) {
235; CHECK-LABEL: test_vqrdmlsdhxq_s32:
236; CHECK:       @ %bb.0: @ %entry
237; CHECK-NEXT:    vqrdmlsdhx.s32 q0, q1, q2
238; CHECK-NEXT:    bx lr
239entry:
240  %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 1)
241  ret <4 x i32> %0
242}
243
244define arm_aapcs_vfpcc <16 x i8> @test_vqdmladhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
245; CHECK-LABEL: test_vqdmladhq_m_s8:
246; CHECK:       @ %bb.0: @ %entry
247; CHECK-NEXT:    vmsr p0, r0
248; CHECK-NEXT:    vpst
249; CHECK-NEXT:    vqdmladht.s8 q0, q1, q2
250; CHECK-NEXT:    bx lr
251entry:
252  %0 = zext i16 %p to i32
253  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
254  %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0, <16 x i1> %1)
255  ret <16 x i8> %2
256}
257
258define arm_aapcs_vfpcc <8 x i16> @test_vqdmladhq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
259; CHECK-LABEL: test_vqdmladhq_m_s16:
260; CHECK:       @ %bb.0: @ %entry
261; CHECK-NEXT:    vmsr p0, r0
262; CHECK-NEXT:    vpst
263; CHECK-NEXT:    vqdmladht.s16 q0, q1, q2
264; CHECK-NEXT:    bx lr
265entry:
266  %0 = zext i16 %p to i32
267  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
268  %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0, <8 x i1> %1)
269  ret <8 x i16> %2
270}
271
272define arm_aapcs_vfpcc <4 x i32> @test_vqdmladhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
273; CHECK-LABEL: test_vqdmladhq_m_s32:
274; CHECK:       @ %bb.0: @ %entry
275; CHECK-NEXT:    vmsr p0, r0
276; CHECK-NEXT:    vpst
277; CHECK-NEXT:    vqdmladht.s32 q0, q1, q2
278; CHECK-NEXT:    bx lr
279entry:
280  %0 = zext i16 %p to i32
281  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
282  %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0, <4 x i1> %1)
283  ret <4 x i32> %2
284}
285
286define arm_aapcs_vfpcc <16 x i8> @test_vqdmladhxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
287; CHECK-LABEL: test_vqdmladhxq_m_s8:
288; CHECK:       @ %bb.0: @ %entry
289; CHECK-NEXT:    vmsr p0, r0
290; CHECK-NEXT:    vpst
291; CHECK-NEXT:    vqdmladhxt.s8 q0, q1, q2
292; CHECK-NEXT:    bx lr
293entry:
294  %0 = zext i16 %p to i32
295  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
296  %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 0, <16 x i1> %1)
297  ret <16 x i8> %2
298}
299
300define arm_aapcs_vfpcc <8 x i16> @test_vqdmladhxq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
301; CHECK-LABEL: test_vqdmladhxq_m_s16:
302; CHECK:       @ %bb.0: @ %entry
303; CHECK-NEXT:    vmsr p0, r0
304; CHECK-NEXT:    vpst
305; CHECK-NEXT:    vqdmladhxt.s16 q0, q1, q2
306; CHECK-NEXT:    bx lr
307entry:
308  %0 = zext i16 %p to i32
309  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
310  %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 0, <8 x i1> %1)
311  ret <8 x i16> %2
312}
313
314define arm_aapcs_vfpcc <4 x i32> @test_vqdmladhxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
315; CHECK-LABEL: test_vqdmladhxq_m_s32:
316; CHECK:       @ %bb.0: @ %entry
317; CHECK-NEXT:    vmsr p0, r0
318; CHECK-NEXT:    vpst
319; CHECK-NEXT:    vqdmladhxt.s32 q0, q1, q2
320; CHECK-NEXT:    bx lr
321entry:
322  %0 = zext i16 %p to i32
323  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
324  %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 0, <4 x i1> %1)
325  ret <4 x i32> %2
326}
327
328define arm_aapcs_vfpcc <16 x i8> @test_vqdmlsdhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
329; CHECK-LABEL: test_vqdmlsdhq_m_s8:
330; CHECK:       @ %bb.0: @ %entry
331; CHECK-NEXT:    vmsr p0, r0
332; CHECK-NEXT:    vpst
333; CHECK-NEXT:    vqdmlsdht.s8 q0, q1, q2
334; CHECK-NEXT:    bx lr
335entry:
336  %0 = zext i16 %p to i32
337  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
338  %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1, <16 x i1> %1)
339  ret <16 x i8> %2
340}
341
342define arm_aapcs_vfpcc <8 x i16> @test_vqdmlsdhq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
343; CHECK-LABEL: test_vqdmlsdhq_m_s16:
344; CHECK:       @ %bb.0: @ %entry
345; CHECK-NEXT:    vmsr p0, r0
346; CHECK-NEXT:    vpst
347; CHECK-NEXT:    vqdmlsdht.s16 q0, q1, q2
348; CHECK-NEXT:    bx lr
349entry:
350  %0 = zext i16 %p to i32
351  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
352  %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1, <8 x i1> %1)
353  ret <8 x i16> %2
354}
355
356define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsdhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
357; CHECK-LABEL: test_vqdmlsdhq_m_s32:
358; CHECK:       @ %bb.0: @ %entry
359; CHECK-NEXT:    vmsr p0, r0
360; CHECK-NEXT:    vpst
361; CHECK-NEXT:    vqdmlsdht.s32 q0, q1, q2
362; CHECK-NEXT:    bx lr
363entry:
364  %0 = zext i16 %p to i32
365  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
366  %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1, <4 x i1> %1)
367  ret <4 x i32> %2
368}
369
370define arm_aapcs_vfpcc <16 x i8> @test_vqdmlsdhxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
371; CHECK-LABEL: test_vqdmlsdhxq_m_s8:
372; CHECK:       @ %bb.0: @ %entry
373; CHECK-NEXT:    vmsr p0, r0
374; CHECK-NEXT:    vpst
375; CHECK-NEXT:    vqdmlsdhxt.s8 q0, q1, q2
376; CHECK-NEXT:    bx lr
377entry:
378  %0 = zext i16 %p to i32
379  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
380  %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 1, <16 x i1> %1)
381  ret <16 x i8> %2
382}
383
384define arm_aapcs_vfpcc <8 x i16> @test_vqdmlsdhxq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
385; CHECK-LABEL: test_vqdmlsdhxq_m_s16:
386; CHECK:       @ %bb.0: @ %entry
387; CHECK-NEXT:    vmsr p0, r0
388; CHECK-NEXT:    vpst
389; CHECK-NEXT:    vqdmlsdhxt.s16 q0, q1, q2
390; CHECK-NEXT:    bx lr
391entry:
392  %0 = zext i16 %p to i32
393  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
394  %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 1, <8 x i1> %1)
395  ret <8 x i16> %2
396}
397
398define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsdhxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
399; CHECK-LABEL: test_vqdmlsdhxq_m_s32:
400; CHECK:       @ %bb.0: @ %entry
401; CHECK-NEXT:    vmsr p0, r0
402; CHECK-NEXT:    vpst
403; CHECK-NEXT:    vqdmlsdhxt.s32 q0, q1, q2
404; CHECK-NEXT:    bx lr
405entry:
406  %0 = zext i16 %p to i32
407  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
408  %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 1, <4 x i1> %1)
409  ret <4 x i32> %2
410}
411
412define arm_aapcs_vfpcc <16 x i8> @test_vqrdmladhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
413; CHECK-LABEL: test_vqrdmladhq_m_s8:
414; CHECK:       @ %bb.0: @ %entry
415; CHECK-NEXT:    vmsr p0, r0
416; CHECK-NEXT:    vpst
417; CHECK-NEXT:    vqrdmladht.s8 q0, q1, q2
418; CHECK-NEXT:    bx lr
419entry:
420  %0 = zext i16 %p to i32
421  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
422  %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0, <16 x i1> %1)
423  ret <16 x i8> %2
424}
425
426define arm_aapcs_vfpcc <8 x i16> @test_vqrdmladhq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
427; CHECK-LABEL: test_vqrdmladhq_m_s16:
428; CHECK:       @ %bb.0: @ %entry
429; CHECK-NEXT:    vmsr p0, r0
430; CHECK-NEXT:    vpst
431; CHECK-NEXT:    vqrdmladht.s16 q0, q1, q2
432; CHECK-NEXT:    bx lr
433entry:
434  %0 = zext i16 %p to i32
435  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
436  %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0, <8 x i1> %1)
437  ret <8 x i16> %2
438}
439
440define arm_aapcs_vfpcc <4 x i32> @test_vqrdmladhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
441; CHECK-LABEL: test_vqrdmladhq_m_s32:
442; CHECK:       @ %bb.0: @ %entry
443; CHECK-NEXT:    vmsr p0, r0
444; CHECK-NEXT:    vpst
445; CHECK-NEXT:    vqrdmladht.s32 q0, q1, q2
446; CHECK-NEXT:    bx lr
447entry:
448  %0 = zext i16 %p to i32
449  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
450  %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0, <4 x i1> %1)
451  ret <4 x i32> %2
452}
453
454define arm_aapcs_vfpcc <16 x i8> @test_vqrdmladhxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
455; CHECK-LABEL: test_vqrdmladhxq_m_s8:
456; CHECK:       @ %bb.0: @ %entry
457; CHECK-NEXT:    vmsr p0, r0
458; CHECK-NEXT:    vpst
459; CHECK-NEXT:    vqrdmladhxt.s8 q0, q1, q2
460; CHECK-NEXT:    bx lr
461entry:
462  %0 = zext i16 %p to i32
463  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
464  %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 0, <16 x i1> %1)
465  ret <16 x i8> %2
466}
467
468define arm_aapcs_vfpcc <8 x i16> @test_vqrdmladhxq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
469; CHECK-LABEL: test_vqrdmladhxq_m_s16:
470; CHECK:       @ %bb.0: @ %entry
471; CHECK-NEXT:    vmsr p0, r0
472; CHECK-NEXT:    vpst
473; CHECK-NEXT:    vqrdmladhxt.s16 q0, q1, q2
474; CHECK-NEXT:    bx lr
475entry:
476  %0 = zext i16 %p to i32
477  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
478  %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 0, <8 x i1> %1)
479  ret <8 x i16> %2
480}
481
482define arm_aapcs_vfpcc <4 x i32> @test_vqrdmladhxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
483; CHECK-LABEL: test_vqrdmladhxq_m_s32:
484; CHECK:       @ %bb.0: @ %entry
485; CHECK-NEXT:    vmsr p0, r0
486; CHECK-NEXT:    vpst
487; CHECK-NEXT:    vqrdmladhxt.s32 q0, q1, q2
488; CHECK-NEXT:    bx lr
489entry:
490  %0 = zext i16 %p to i32
491  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
492  %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 0, <4 x i1> %1)
493  ret <4 x i32> %2
494}
495
496define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlsdhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
497; CHECK-LABEL: test_vqrdmlsdhq_m_s8:
498; CHECK:       @ %bb.0: @ %entry
499; CHECK-NEXT:    vmsr p0, r0
500; CHECK-NEXT:    vpst
501; CHECK-NEXT:    vqrdmlsdht.s8 q0, q1, q2
502; CHECK-NEXT:    bx lr
503entry:
504  %0 = zext i16 %p to i32
505  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
506  %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1, <16 x i1> %1)
507  ret <16 x i8> %2
508}
509
510define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlsdhq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
511; CHECK-LABEL: test_vqrdmlsdhq_m_s16:
512; CHECK:       @ %bb.0: @ %entry
513; CHECK-NEXT:    vmsr p0, r0
514; CHECK-NEXT:    vpst
515; CHECK-NEXT:    vqrdmlsdht.s16 q0, q1, q2
516; CHECK-NEXT:    bx lr
517entry:
518  %0 = zext i16 %p to i32
519  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
520  %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1, <8 x i1> %1)
521  ret <8 x i16> %2
522}
523
524define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlsdhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
525; CHECK-LABEL: test_vqrdmlsdhq_m_s32:
526; CHECK:       @ %bb.0: @ %entry
527; CHECK-NEXT:    vmsr p0, r0
528; CHECK-NEXT:    vpst
529; CHECK-NEXT:    vqrdmlsdht.s32 q0, q1, q2
530; CHECK-NEXT:    bx lr
531entry:
532  %0 = zext i16 %p to i32
533  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
534  %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1, <4 x i1> %1)
535  ret <4 x i32> %2
536}
537
538define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlsdhxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
539; CHECK-LABEL: test_vqrdmlsdhxq_m_s8:
540; CHECK:       @ %bb.0: @ %entry
541; CHECK-NEXT:    vmsr p0, r0
542; CHECK-NEXT:    vpst
543; CHECK-NEXT:    vqrdmlsdhxt.s8 q0, q1, q2
544; CHECK-NEXT:    bx lr
545entry:
546  %0 = zext i16 %p to i32
547  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
548  %2 = tail call <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 1, <16 x i1> %1)
549  ret <16 x i8> %2
550}
551
552define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlsdhxq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
553; CHECK-LABEL: test_vqrdmlsdhxq_m_s16:
554; CHECK:       @ %bb.0: @ %entry
555; CHECK-NEXT:    vmsr p0, r0
556; CHECK-NEXT:    vpst
557; CHECK-NEXT:    vqrdmlsdhxt.s16 q0, q1, q2
558; CHECK-NEXT:    bx lr
559entry:
560  %0 = zext i16 %p to i32
561  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
562  %2 = tail call <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 1, <8 x i1> %1)
563  ret <8 x i16> %2
564}
565
566define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlsdhxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
567; CHECK-LABEL: test_vqrdmlsdhxq_m_s32:
568; CHECK:       @ %bb.0: @ %entry
569; CHECK-NEXT:    vmsr p0, r0
570; CHECK-NEXT:    vpst
571; CHECK-NEXT:    vqrdmlsdhxt.s32 q0, q1, q2
572; CHECK-NEXT:    bx lr
573entry:
574  %0 = zext i16 %p to i32
575  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
576  %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 1, <4 x i1> %1)
577  ret <4 x i32> %2
578}
579
580declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
581declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
582declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
583
584declare <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, i32, i32, i32)
585declare <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, i32, i32, i32)
586declare <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32>, <4 x i32>, <4 x i32>, i32, i32, i32)
587declare <16 x i8> @llvm.arm.mve.vqdmlad.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i8>, i32, i32, i32, <16 x i1>)
588declare <8 x i16> @llvm.arm.mve.vqdmlad.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i16>, i32, i32, i32, <8 x i1>)
589declare <4 x i32> @llvm.arm.mve.vqdmlad.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i32>, i32, i32, i32, <4 x i1>)
590