1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc signext i8 @test_vminvq_s8(i8 signext %a, <16 x i8> %b) {
5; CHECK-LABEL: test_vminvq_s8:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vminv.s8 r0, q0
8; CHECK-NEXT:    sxtb r0, r0
9; CHECK-NEXT:    bx lr
10entry:
11  %0 = zext i8 %a to i32
12  %1 = tail call i32 @llvm.arm.mve.minv.v16i8(i32 %0, <16 x i8> %b, i32 0)
13  %2 = trunc i32 %1 to i8
14  ret i8 %2
15}
16
17define arm_aapcs_vfpcc signext i16 @test_vminvq_s16(i16 signext %a, <8 x i16> %b) {
18; CHECK-LABEL: test_vminvq_s16:
19; CHECK:       @ %bb.0: @ %entry
20; CHECK-NEXT:    vminv.s16 r0, q0
21; CHECK-NEXT:    sxth r0, r0
22; CHECK-NEXT:    bx lr
23entry:
24  %0 = zext i16 %a to i32
25  %1 = tail call i32 @llvm.arm.mve.minv.v8i16(i32 %0, <8 x i16> %b, i32 0)
26  %2 = trunc i32 %1 to i16
27  ret i16 %2
28}
29
30define arm_aapcs_vfpcc i32 @test_vminvq_s32(i32 %a, <4 x i32> %b) {
31; CHECK-LABEL: test_vminvq_s32:
32; CHECK:       @ %bb.0: @ %entry
33; CHECK-NEXT:    vminv.s32 r0, q0
34; CHECK-NEXT:    bx lr
35entry:
36  %0 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %a, <4 x i32> %b, i32 0)
37  ret i32 %0
38}
39
40define arm_aapcs_vfpcc zeroext i8 @test_vminvq_u8(i8 zeroext %a, <16 x i8> %b) {
41; CHECK-LABEL: test_vminvq_u8:
42; CHECK:       @ %bb.0: @ %entry
43; CHECK-NEXT:    vminv.u8 r0, q0
44; CHECK-NEXT:    uxtb r0, r0
45; CHECK-NEXT:    bx lr
46entry:
47  %0 = zext i8 %a to i32
48  %1 = tail call i32 @llvm.arm.mve.minv.v16i8(i32 %0, <16 x i8> %b, i32 1)
49  %2 = trunc i32 %1 to i8
50  ret i8 %2
51}
52
53define arm_aapcs_vfpcc zeroext i16 @test_vminvq_u16(i16 zeroext %a, <8 x i16> %b) {
54; CHECK-LABEL: test_vminvq_u16:
55; CHECK:       @ %bb.0: @ %entry
56; CHECK-NEXT:    vminv.u16 r0, q0
57; CHECK-NEXT:    uxth r0, r0
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = zext i16 %a to i32
61  %1 = tail call i32 @llvm.arm.mve.minv.v8i16(i32 %0, <8 x i16> %b, i32 1)
62  %2 = trunc i32 %1 to i16
63  ret i16 %2
64}
65
66define arm_aapcs_vfpcc i32 @test_vminvq_u32(i32 %a, <4 x i32> %b) {
67; CHECK-LABEL: test_vminvq_u32:
68; CHECK:       @ %bb.0: @ %entry
69; CHECK-NEXT:    vminv.u32 r0, q0
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %a, <4 x i32> %b, i32 1)
73  ret i32 %0
74}
75
76define arm_aapcs_vfpcc signext i8 @test_vmaxvq_s8(i8 signext %a, <16 x i8> %b) {
77; CHECK-LABEL: test_vmaxvq_s8:
78; CHECK:       @ %bb.0: @ %entry
79; CHECK-NEXT:    vmaxv.s8 r0, q0
80; CHECK-NEXT:    sxtb r0, r0
81; CHECK-NEXT:    bx lr
82entry:
83  %0 = zext i8 %a to i32
84  %1 = tail call i32 @llvm.arm.mve.maxv.v16i8(i32 %0, <16 x i8> %b, i32 0)
85  %2 = trunc i32 %1 to i8
86  ret i8 %2
87}
88
89define arm_aapcs_vfpcc signext i16 @test_vmaxvq_s16(i16 signext %a, <8 x i16> %b) {
90; CHECK-LABEL: test_vmaxvq_s16:
91; CHECK:       @ %bb.0: @ %entry
92; CHECK-NEXT:    vmaxv.s16 r0, q0
93; CHECK-NEXT:    sxth r0, r0
94; CHECK-NEXT:    bx lr
95entry:
96  %0 = zext i16 %a to i32
97  %1 = tail call i32 @llvm.arm.mve.maxv.v8i16(i32 %0, <8 x i16> %b, i32 0)
98  %2 = trunc i32 %1 to i16
99  ret i16 %2
100}
101
102define arm_aapcs_vfpcc i32 @test_vmaxvq_s32(i32 %a, <4 x i32> %b) {
103; CHECK-LABEL: test_vmaxvq_s32:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vmaxv.s32 r0, q0
106; CHECK-NEXT:    bx lr
107entry:
108  %0 = tail call i32 @llvm.arm.mve.maxv.v4i32(i32 %a, <4 x i32> %b, i32 0)
109  ret i32 %0
110}
111
112define arm_aapcs_vfpcc zeroext i8 @test_vmaxvq_u8(i8 zeroext %a, <16 x i8> %b) {
113; CHECK-LABEL: test_vmaxvq_u8:
114; CHECK:       @ %bb.0: @ %entry
115; CHECK-NEXT:    vmaxv.u8 r0, q0
116; CHECK-NEXT:    uxtb r0, r0
117; CHECK-NEXT:    bx lr
118entry:
119  %0 = zext i8 %a to i32
120  %1 = tail call i32 @llvm.arm.mve.maxv.v16i8(i32 %0, <16 x i8> %b, i32 1)
121  %2 = trunc i32 %1 to i8
122  ret i8 %2
123}
124
125define arm_aapcs_vfpcc zeroext i16 @test_vmaxvq_u16(i16 zeroext %a, <8 x i16> %b) {
126; CHECK-LABEL: test_vmaxvq_u16:
127; CHECK:       @ %bb.0: @ %entry
128; CHECK-NEXT:    vmaxv.u16 r0, q0
129; CHECK-NEXT:    uxth r0, r0
130; CHECK-NEXT:    bx lr
131entry:
132  %0 = zext i16 %a to i32
133  %1 = tail call i32 @llvm.arm.mve.maxv.v8i16(i32 %0, <8 x i16> %b, i32 1)
134  %2 = trunc i32 %1 to i16
135  ret i16 %2
136}
137
138define arm_aapcs_vfpcc i32 @test_vmaxvq_u32(i32 %a, <4 x i32> %b) {
139; CHECK-LABEL: test_vmaxvq_u32:
140; CHECK:       @ %bb.0: @ %entry
141; CHECK-NEXT:    vmaxv.u32 r0, q0
142; CHECK-NEXT:    bx lr
143entry:
144  %0 = tail call i32 @llvm.arm.mve.maxv.v4i32(i32 %a, <4 x i32> %b, i32 1)
145  ret i32 %0
146}
147
148define arm_aapcs_vfpcc zeroext i8 @test_vminavq_s8(i8 zeroext %a, <16 x i8> %b) {
149; CHECK-LABEL: test_vminavq_s8:
150; CHECK:       @ %bb.0: @ %entry
151; CHECK-NEXT:    vminav.s8 r0, q0
152; CHECK-NEXT:    uxtb r0, r0
153; CHECK-NEXT:    bx lr
154entry:
155  %0 = zext i8 %a to i32
156  %1 = tail call i32 @llvm.arm.mve.minav.v16i8(i32 %0, <16 x i8> %b)
157  %2 = trunc i32 %1 to i8
158  ret i8 %2
159}
160
161define arm_aapcs_vfpcc zeroext i16 @test_vminavq_s16(i16 zeroext %a, <8 x i16> %b) {
162; CHECK-LABEL: test_vminavq_s16:
163; CHECK:       @ %bb.0: @ %entry
164; CHECK-NEXT:    vminav.s16 r0, q0
165; CHECK-NEXT:    uxth r0, r0
166; CHECK-NEXT:    bx lr
167entry:
168  %0 = zext i16 %a to i32
169  %1 = tail call i32 @llvm.arm.mve.minav.v8i16(i32 %0, <8 x i16> %b)
170  %2 = trunc i32 %1 to i16
171  ret i16 %2
172}
173
174define arm_aapcs_vfpcc i32 @test_vminavq_s32(i32 %a, <4 x i32> %b) {
175; CHECK-LABEL: test_vminavq_s32:
176; CHECK:       @ %bb.0: @ %entry
177; CHECK-NEXT:    vminav.s32 r0, q0
178; CHECK-NEXT:    bx lr
179entry:
180  %0 = tail call i32 @llvm.arm.mve.minav.v4i32(i32 %a, <4 x i32> %b)
181  ret i32 %0
182}
183
184define arm_aapcs_vfpcc zeroext i8 @test_vmaxavq_s8(i8 zeroext %a, <16 x i8> %b) {
185; CHECK-LABEL: test_vmaxavq_s8:
186; CHECK:       @ %bb.0: @ %entry
187; CHECK-NEXT:    vmaxav.s8 r0, q0
188; CHECK-NEXT:    uxtb r0, r0
189; CHECK-NEXT:    bx lr
190entry:
191  %0 = zext i8 %a to i32
192  %1 = tail call i32 @llvm.arm.mve.maxav.v16i8(i32 %0, <16 x i8> %b)
193  %2 = trunc i32 %1 to i8
194  ret i8 %2
195}
196
197define arm_aapcs_vfpcc zeroext i16 @test_vmaxavq_s16(i16 zeroext %a, <8 x i16> %b) {
198; CHECK-LABEL: test_vmaxavq_s16:
199; CHECK:       @ %bb.0: @ %entry
200; CHECK-NEXT:    vmaxav.s16 r0, q0
201; CHECK-NEXT:    uxth r0, r0
202; CHECK-NEXT:    bx lr
203entry:
204  %0 = zext i16 %a to i32
205  %1 = tail call i32 @llvm.arm.mve.maxav.v8i16(i32 %0, <8 x i16> %b)
206  %2 = trunc i32 %1 to i16
207  ret i16 %2
208}
209
210define arm_aapcs_vfpcc i32 @test_vmaxavq_s32(i32 %a, <4 x i32> %b) {
211; CHECK-LABEL: test_vmaxavq_s32:
212; CHECK:       @ %bb.0: @ %entry
213; CHECK-NEXT:    vmaxav.s32 r0, q0
214; CHECK-NEXT:    bx lr
215entry:
216  %0 = tail call i32 @llvm.arm.mve.maxav.v4i32(i32 %a, <4 x i32> %b)
217  ret i32 %0
218}
219
220define arm_aapcs_vfpcc float @test_vminnmvq_f16(float %a.coerce, <8 x half> %b) {
221; CHECK-LABEL: test_vminnmvq_f16:
222; CHECK:       @ %bb.0: @ %entry
223; CHECK-NEXT:    vmov r0, s0
224; CHECK-NEXT:    vminnmv.f16 r0, q1
225; CHECK-NEXT:    vmov s0, r0
226; CHECK-NEXT:    vmov.f16 r0, s0
227; CHECK-NEXT:    vmov s0, r0
228; CHECK-NEXT:    bx lr
229entry:
230  %0 = bitcast float %a.coerce to i32
231  %tmp.0.extract.trunc = trunc i32 %0 to i16
232  %1 = bitcast i16 %tmp.0.extract.trunc to half
233  %2 = tail call half @llvm.arm.mve.minnmv.f16.v8f16(half %1, <8 x half> %b)
234  %3 = bitcast half %2 to i16
235  %tmp2.0.insert.ext = zext i16 %3 to i32
236  %4 = bitcast i32 %tmp2.0.insert.ext to float
237  ret float %4
238}
239
240define arm_aapcs_vfpcc float @test_vminnmvq_f32(float %a, <4 x float> %b) {
241; CHECK-LABEL: test_vminnmvq_f32:
242; CHECK:       @ %bb.0: @ %entry
243; CHECK-NEXT:    vmov r0, s0
244; CHECK-NEXT:    vminnmv.f32 r0, q1
245; CHECK-NEXT:    vmov s0, r0
246; CHECK-NEXT:    bx lr
247entry:
248  %0 = tail call float @llvm.arm.mve.minnmv.f32.v4f32(float %a, <4 x float> %b)
249  ret float %0
250}
251
252define arm_aapcs_vfpcc float @test_vminnmavq_f16(float %a.coerce, <8 x half> %b) {
253; CHECK-LABEL: test_vminnmavq_f16:
254; CHECK:       @ %bb.0: @ %entry
255; CHECK-NEXT:    vmov r0, s0
256; CHECK-NEXT:    vminnmav.f16 r0, q1
257; CHECK-NEXT:    vmov s0, r0
258; CHECK-NEXT:    vmov.f16 r0, s0
259; CHECK-NEXT:    vmov s0, r0
260; CHECK-NEXT:    bx lr
261entry:
262  %0 = bitcast float %a.coerce to i32
263  %tmp.0.extract.trunc = trunc i32 %0 to i16
264  %1 = bitcast i16 %tmp.0.extract.trunc to half
265  %2 = tail call half @llvm.arm.mve.minnmav.f16.v8f16(half %1, <8 x half> %b)
266  %3 = bitcast half %2 to i16
267  %tmp2.0.insert.ext = zext i16 %3 to i32
268  %4 = bitcast i32 %tmp2.0.insert.ext to float
269  ret float %4
270}
271
272define arm_aapcs_vfpcc float @test_vminnmavq_f32(float %a, <4 x float> %b) {
273; CHECK-LABEL: test_vminnmavq_f32:
274; CHECK:       @ %bb.0: @ %entry
275; CHECK-NEXT:    vmov r0, s0
276; CHECK-NEXT:    vminnmav.f32 r0, q1
277; CHECK-NEXT:    vmov s0, r0
278; CHECK-NEXT:    bx lr
279entry:
280  %0 = tail call float @llvm.arm.mve.minnmav.f32.v4f32(float %a, <4 x float> %b)
281  ret float %0
282}
283
284define arm_aapcs_vfpcc float @test_vmaxnmvq_f16(float %a.coerce, <8 x half> %b) {
285; CHECK-LABEL: test_vmaxnmvq_f16:
286; CHECK:       @ %bb.0: @ %entry
287; CHECK-NEXT:    vmov r0, s0
288; CHECK-NEXT:    vmaxnmv.f16 r0, q1
289; CHECK-NEXT:    vmov s0, r0
290; CHECK-NEXT:    vmov.f16 r0, s0
291; CHECK-NEXT:    vmov s0, r0
292; CHECK-NEXT:    bx lr
293entry:
294  %0 = bitcast float %a.coerce to i32
295  %tmp.0.extract.trunc = trunc i32 %0 to i16
296  %1 = bitcast i16 %tmp.0.extract.trunc to half
297  %2 = tail call half @llvm.arm.mve.maxnmv.f16.v8f16(half %1, <8 x half> %b)
298  %3 = bitcast half %2 to i16
299  %tmp2.0.insert.ext = zext i16 %3 to i32
300  %4 = bitcast i32 %tmp2.0.insert.ext to float
301  ret float %4
302}
303
304define arm_aapcs_vfpcc float @test_vmaxnmvq_f32(float %a, <4 x float> %b) {
305; CHECK-LABEL: test_vmaxnmvq_f32:
306; CHECK:       @ %bb.0: @ %entry
307; CHECK-NEXT:    vmov r0, s0
308; CHECK-NEXT:    vmaxnmv.f32 r0, q1
309; CHECK-NEXT:    vmov s0, r0
310; CHECK-NEXT:    bx lr
311entry:
312  %0 = tail call float @llvm.arm.mve.maxnmv.f32.v4f32(float %a, <4 x float> %b)
313  ret float %0
314}
315
316define arm_aapcs_vfpcc float @test_vmaxnmavq_f16(float %a.coerce, <8 x half> %b) {
317; CHECK-LABEL: test_vmaxnmavq_f16:
318; CHECK:       @ %bb.0: @ %entry
319; CHECK-NEXT:    vmov r0, s0
320; CHECK-NEXT:    vmaxnmav.f16 r0, q1
321; CHECK-NEXT:    vmov s0, r0
322; CHECK-NEXT:    vmov.f16 r0, s0
323; CHECK-NEXT:    vmov s0, r0
324; CHECK-NEXT:    bx lr
325entry:
326  %0 = bitcast float %a.coerce to i32
327  %tmp.0.extract.trunc = trunc i32 %0 to i16
328  %1 = bitcast i16 %tmp.0.extract.trunc to half
329  %2 = tail call half @llvm.arm.mve.maxnmav.f16.v8f16(half %1, <8 x half> %b)
330  %3 = bitcast half %2 to i16
331  %tmp2.0.insert.ext = zext i16 %3 to i32
332  %4 = bitcast i32 %tmp2.0.insert.ext to float
333  ret float %4
334}
335
336define arm_aapcs_vfpcc float @test_vmaxnmavq_f32(float %a, <4 x float> %b) {
337; CHECK-LABEL: test_vmaxnmavq_f32:
338; CHECK:       @ %bb.0: @ %entry
339; CHECK-NEXT:    vmov r0, s0
340; CHECK-NEXT:    vmaxnmav.f32 r0, q1
341; CHECK-NEXT:    vmov s0, r0
342; CHECK-NEXT:    bx lr
343entry:
344  %0 = tail call float @llvm.arm.mve.maxnmav.f32.v4f32(float %a, <4 x float> %b)
345  ret float %0
346}
347
348define arm_aapcs_vfpcc signext i8 @test_vminvq_p_s8(i8 signext %a, <16 x i8> %b, i16 zeroext %p) {
349; CHECK-LABEL: test_vminvq_p_s8:
350; CHECK:       @ %bb.0: @ %entry
351; CHECK-NEXT:    vmsr p0, r1
352; CHECK-NEXT:    vpst
353; CHECK-NEXT:    vminvt.s8 r0, q0
354; CHECK-NEXT:    sxtb r0, r0
355; CHECK-NEXT:    bx lr
356entry:
357  %0 = zext i8 %a to i32
358  %1 = zext i16 %p to i32
359  %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
360  %3 = tail call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 0, <16 x i1> %2)
361  %4 = trunc i32 %3 to i8
362  ret i8 %4
363}
364
365define arm_aapcs_vfpcc signext i16 @test_vminvq_p_s16(i16 signext %a, <8 x i16> %b, i16 zeroext %p) {
366; CHECK-LABEL: test_vminvq_p_s16:
367; CHECK:       @ %bb.0: @ %entry
368; CHECK-NEXT:    vmsr p0, r1
369; CHECK-NEXT:    vpst
370; CHECK-NEXT:    vminvt.s16 r0, q0
371; CHECK-NEXT:    sxth r0, r0
372; CHECK-NEXT:    bx lr
373entry:
374  %0 = zext i16 %a to i32
375  %1 = zext i16 %p to i32
376  %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
377  %3 = tail call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 0, <8 x i1> %2)
378  %4 = trunc i32 %3 to i16
379  ret i16 %4
380}
381
382define arm_aapcs_vfpcc i32 @test_vminvq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
383; CHECK-LABEL: test_vminvq_p_s32:
384; CHECK:       @ %bb.0: @ %entry
385; CHECK-NEXT:    vmsr p0, r1
386; CHECK-NEXT:    vpst
387; CHECK-NEXT:    vminvt.s32 r0, q0
388; CHECK-NEXT:    bx lr
389entry:
390  %0 = zext i16 %p to i32
391  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
392  %2 = tail call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 0, <4 x i1> %1)
393  ret i32 %2
394}
395
396define arm_aapcs_vfpcc zeroext i8 @test_vminvq_p_u8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
397; CHECK-LABEL: test_vminvq_p_u8:
398; CHECK:       @ %bb.0: @ %entry
399; CHECK-NEXT:    vmsr p0, r1
400; CHECK-NEXT:    vpst
401; CHECK-NEXT:    vminvt.u8 r0, q0
402; CHECK-NEXT:    uxtb r0, r0
403; CHECK-NEXT:    bx lr
404entry:
405  %0 = zext i8 %a to i32
406  %1 = zext i16 %p to i32
407  %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
408  %3 = tail call i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 1, <16 x i1> %2)
409  %4 = trunc i32 %3 to i8
410  ret i8 %4
411}
412
413define arm_aapcs_vfpcc zeroext i16 @test_vminvq_p_u16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
414; CHECK-LABEL: test_vminvq_p_u16:
415; CHECK:       @ %bb.0: @ %entry
416; CHECK-NEXT:    vmsr p0, r1
417; CHECK-NEXT:    vpst
418; CHECK-NEXT:    vminvt.u16 r0, q0
419; CHECK-NEXT:    uxth r0, r0
420; CHECK-NEXT:    bx lr
421entry:
422  %0 = zext i16 %a to i32
423  %1 = zext i16 %p to i32
424  %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
425  %3 = tail call i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 1, <8 x i1> %2)
426  %4 = trunc i32 %3 to i16
427  ret i16 %4
428}
429
430define arm_aapcs_vfpcc i32 @test_vminvq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
431; CHECK-LABEL: test_vminvq_p_u32:
432; CHECK:       @ %bb.0: @ %entry
433; CHECK-NEXT:    vmsr p0, r1
434; CHECK-NEXT:    vpst
435; CHECK-NEXT:    vminvt.u32 r0, q0
436; CHECK-NEXT:    bx lr
437entry:
438  %0 = zext i16 %p to i32
439  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
440  %2 = tail call i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 1, <4 x i1> %1)
441  ret i32 %2
442}
443
444define arm_aapcs_vfpcc signext i8 @test_vmaxvq_p_s8(i8 signext %a, <16 x i8> %b, i16 zeroext %p) {
445; CHECK-LABEL: test_vmaxvq_p_s8:
446; CHECK:       @ %bb.0: @ %entry
447; CHECK-NEXT:    vmsr p0, r1
448; CHECK-NEXT:    vpst
449; CHECK-NEXT:    vmaxvt.s8 r0, q0
450; CHECK-NEXT:    sxtb r0, r0
451; CHECK-NEXT:    bx lr
452entry:
453  %0 = zext i8 %a to i32
454  %1 = zext i16 %p to i32
455  %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
456  %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 0, <16 x i1> %2)
457  %4 = trunc i32 %3 to i8
458  ret i8 %4
459}
460
461define arm_aapcs_vfpcc signext i16 @test_vmaxvq_p_s16(i16 signext %a, <8 x i16> %b, i16 zeroext %p) {
462; CHECK-LABEL: test_vmaxvq_p_s16:
463; CHECK:       @ %bb.0: @ %entry
464; CHECK-NEXT:    vmsr p0, r1
465; CHECK-NEXT:    vpst
466; CHECK-NEXT:    vmaxvt.s16 r0, q0
467; CHECK-NEXT:    sxth r0, r0
468; CHECK-NEXT:    bx lr
469entry:
470  %0 = zext i16 %a to i32
471  %1 = zext i16 %p to i32
472  %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
473  %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 0, <8 x i1> %2)
474  %4 = trunc i32 %3 to i16
475  ret i16 %4
476}
477
478define arm_aapcs_vfpcc i32 @test_vmaxvq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
479; CHECK-LABEL: test_vmaxvq_p_s32:
480; CHECK:       @ %bb.0: @ %entry
481; CHECK-NEXT:    vmsr p0, r1
482; CHECK-NEXT:    vpst
483; CHECK-NEXT:    vmaxvt.s32 r0, q0
484; CHECK-NEXT:    bx lr
485entry:
486  %0 = zext i16 %p to i32
487  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
488  %2 = tail call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 0, <4 x i1> %1)
489  ret i32 %2
490}
491
492define arm_aapcs_vfpcc zeroext i8 @test_vmaxvq_p_u8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
493; CHECK-LABEL: test_vmaxvq_p_u8:
494; CHECK:       @ %bb.0: @ %entry
495; CHECK-NEXT:    vmsr p0, r1
496; CHECK-NEXT:    vpst
497; CHECK-NEXT:    vmaxvt.u8 r0, q0
498; CHECK-NEXT:    uxtb r0, r0
499; CHECK-NEXT:    bx lr
500entry:
501  %0 = zext i8 %a to i32
502  %1 = zext i16 %p to i32
503  %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
504  %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, i32 1, <16 x i1> %2)
505  %4 = trunc i32 %3 to i8
506  ret i8 %4
507}
508
509define arm_aapcs_vfpcc zeroext i16 @test_vmaxvq_p_u16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
510; CHECK-LABEL: test_vmaxvq_p_u16:
511; CHECK:       @ %bb.0: @ %entry
512; CHECK-NEXT:    vmsr p0, r1
513; CHECK-NEXT:    vpst
514; CHECK-NEXT:    vmaxvt.u16 r0, q0
515; CHECK-NEXT:    uxth r0, r0
516; CHECK-NEXT:    bx lr
517entry:
518  %0 = zext i16 %a to i32
519  %1 = zext i16 %p to i32
520  %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
521  %3 = tail call i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, i32 1, <8 x i1> %2)
522  %4 = trunc i32 %3 to i16
523  ret i16 %4
524}
525
526define arm_aapcs_vfpcc i32 @test_vmaxvq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
527; CHECK-LABEL: test_vmaxvq_p_u32:
528; CHECK:       @ %bb.0: @ %entry
529; CHECK-NEXT:    vmsr p0, r1
530; CHECK-NEXT:    vpst
531; CHECK-NEXT:    vmaxvt.u32 r0, q0
532; CHECK-NEXT:    bx lr
533entry:
534  %0 = zext i16 %p to i32
535  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
536  %2 = tail call i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, i32 1, <4 x i1> %1)
537  ret i32 %2
538}
539
540define arm_aapcs_vfpcc zeroext i8 @test_vminavq_p_s8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
541; CHECK-LABEL: test_vminavq_p_s8:
542; CHECK:       @ %bb.0: @ %entry
543; CHECK-NEXT:    vmsr p0, r1
544; CHECK-NEXT:    vpst
545; CHECK-NEXT:    vminavt.s8 r0, q0
546; CHECK-NEXT:    uxtb r0, r0
547; CHECK-NEXT:    bx lr
548entry:
549  %0 = zext i8 %a to i32
550  %1 = zext i16 %p to i32
551  %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
552  %3 = tail call i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, <16 x i1> %2)
553  %4 = trunc i32 %3 to i8
554  ret i8 %4
555}
556
557define arm_aapcs_vfpcc zeroext i16 @test_vminavq_p_s16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
558; CHECK-LABEL: test_vminavq_p_s16:
559; CHECK:       @ %bb.0: @ %entry
560; CHECK-NEXT:    vmsr p0, r1
561; CHECK-NEXT:    vpst
562; CHECK-NEXT:    vminavt.s16 r0, q0
563; CHECK-NEXT:    uxth r0, r0
564; CHECK-NEXT:    bx lr
565entry:
566  %0 = zext i16 %a to i32
567  %1 = zext i16 %p to i32
568  %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
569  %3 = tail call i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, <8 x i1> %2)
570  %4 = trunc i32 %3 to i16
571  ret i16 %4
572}
573
574define arm_aapcs_vfpcc i32 @test_vminavq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
575; CHECK-LABEL: test_vminavq_p_s32:
576; CHECK:       @ %bb.0: @ %entry
577; CHECK-NEXT:    vmsr p0, r1
578; CHECK-NEXT:    vpst
579; CHECK-NEXT:    vminavt.s32 r0, q0
580; CHECK-NEXT:    bx lr
581entry:
582  %0 = zext i16 %p to i32
583  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
584  %2 = tail call i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, <4 x i1> %1)
585  ret i32 %2
586}
587
588define arm_aapcs_vfpcc zeroext i8 @test_vmaxavq_p_s8(i8 zeroext %a, <16 x i8> %b, i16 zeroext %p) {
589; CHECK-LABEL: test_vmaxavq_p_s8:
590; CHECK:       @ %bb.0: @ %entry
591; CHECK-NEXT:    vmsr p0, r1
592; CHECK-NEXT:    vpst
593; CHECK-NEXT:    vmaxavt.s8 r0, q0
594; CHECK-NEXT:    uxtb r0, r0
595; CHECK-NEXT:    bx lr
596entry:
597  %0 = zext i8 %a to i32
598  %1 = zext i16 %p to i32
599  %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
600  %3 = tail call i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32 %0, <16 x i8> %b, <16 x i1> %2)
601  %4 = trunc i32 %3 to i8
602  ret i8 %4
603}
604
605define arm_aapcs_vfpcc zeroext i16 @test_vmaxavq_p_s16(i16 zeroext %a, <8 x i16> %b, i16 zeroext %p) {
606; CHECK-LABEL: test_vmaxavq_p_s16:
607; CHECK:       @ %bb.0: @ %entry
608; CHECK-NEXT:    vmsr p0, r1
609; CHECK-NEXT:    vpst
610; CHECK-NEXT:    vmaxavt.s16 r0, q0
611; CHECK-NEXT:    uxth r0, r0
612; CHECK-NEXT:    bx lr
613entry:
614  %0 = zext i16 %a to i32
615  %1 = zext i16 %p to i32
616  %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
617  %3 = tail call i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32 %0, <8 x i16> %b, <8 x i1> %2)
618  %4 = trunc i32 %3 to i16
619  ret i16 %4
620}
621
622define arm_aapcs_vfpcc i32 @test_vmaxavq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
623; CHECK-LABEL: test_vmaxavq_p_s32:
624; CHECK:       @ %bb.0: @ %entry
625; CHECK-NEXT:    vmsr p0, r1
626; CHECK-NEXT:    vpst
627; CHECK-NEXT:    vmaxavt.s32 r0, q0
628; CHECK-NEXT:    bx lr
629entry:
630  %0 = zext i16 %p to i32
631  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
632  %2 = tail call i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32 %a, <4 x i32> %b, <4 x i1> %1)
633  ret i32 %2
634}
635
636define arm_aapcs_vfpcc float @test_vminnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
637; CHECK-LABEL: test_vminnmvq_p_f16:
638; CHECK:       @ %bb.0: @ %entry
639; CHECK-NEXT:    vmov r1, s0
640; CHECK-NEXT:    vmsr p0, r0
641; CHECK-NEXT:    vpst
642; CHECK-NEXT:    vminnmvt.f16 r1, q1
643; CHECK-NEXT:    vmov s0, r1
644; CHECK-NEXT:    vmov.f16 r0, s0
645; CHECK-NEXT:    vmov s0, r0
646; CHECK-NEXT:    bx lr
647entry:
648  %0 = bitcast float %a.coerce to i32
649  %tmp.0.extract.trunc = trunc i32 %0 to i16
650  %1 = bitcast i16 %tmp.0.extract.trunc to half
651  %2 = zext i16 %p to i32
652  %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
653  %4 = tail call half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
654  %5 = bitcast half %4 to i16
655  %tmp2.0.insert.ext = zext i16 %5 to i32
656  %6 = bitcast i32 %tmp2.0.insert.ext to float
657  ret float %6
658}
659
660define arm_aapcs_vfpcc float @test_vminnmvq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
661; CHECK-LABEL: test_vminnmvq_p_f32:
662; CHECK:       @ %bb.0: @ %entry
663; CHECK-NEXT:    vmsr p0, r0
664; CHECK-NEXT:    vmov r0, s0
665; CHECK-NEXT:    vpst
666; CHECK-NEXT:    vminnmvt.f32 r0, q1
667; CHECK-NEXT:    vmov s0, r0
668; CHECK-NEXT:    bx lr
669entry:
670  %0 = zext i16 %p to i32
671  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
672  %2 = tail call float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
673  ret float %2
674}
675
676define arm_aapcs_vfpcc float @test_vminnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
677; CHECK-LABEL: test_vminnmavq_p_f16:
678; CHECK:       @ %bb.0: @ %entry
679; CHECK-NEXT:    vmov r1, s0
680; CHECK-NEXT:    vmsr p0, r0
681; CHECK-NEXT:    vpst
682; CHECK-NEXT:    vminnmavt.f16 r1, q1
683; CHECK-NEXT:    vmov s0, r1
684; CHECK-NEXT:    vmov.f16 r0, s0
685; CHECK-NEXT:    vmov s0, r0
686; CHECK-NEXT:    bx lr
687entry:
688  %0 = bitcast float %a.coerce to i32
689  %tmp.0.extract.trunc = trunc i32 %0 to i16
690  %1 = bitcast i16 %tmp.0.extract.trunc to half
691  %2 = zext i16 %p to i32
692  %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
693  %4 = tail call half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
694  %5 = bitcast half %4 to i16
695  %tmp2.0.insert.ext = zext i16 %5 to i32
696  %6 = bitcast i32 %tmp2.0.insert.ext to float
697  ret float %6
698}
699
700define arm_aapcs_vfpcc float @test_vminnmavq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
701; CHECK-LABEL: test_vminnmavq_p_f32:
702; CHECK:       @ %bb.0: @ %entry
703; CHECK-NEXT:    vmsr p0, r0
704; CHECK-NEXT:    vmov r0, s0
705; CHECK-NEXT:    vpst
706; CHECK-NEXT:    vminnmavt.f32 r0, q1
707; CHECK-NEXT:    vmov s0, r0
708; CHECK-NEXT:    bx lr
709entry:
710  %0 = zext i16 %p to i32
711  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
712  %2 = tail call float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
713  ret float %2
714}
715
716define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
717; CHECK-LABEL: test_vmaxnmvq_p_f16:
718; CHECK:       @ %bb.0: @ %entry
719; CHECK-NEXT:    vmov r1, s0
720; CHECK-NEXT:    vmsr p0, r0
721; CHECK-NEXT:    vpst
722; CHECK-NEXT:    vmaxnmvt.f16 r1, q1
723; CHECK-NEXT:    vmov s0, r1
724; CHECK-NEXT:    vmov.f16 r0, s0
725; CHECK-NEXT:    vmov s0, r0
726; CHECK-NEXT:    bx lr
727entry:
728  %0 = bitcast float %a.coerce to i32
729  %tmp.0.extract.trunc = trunc i32 %0 to i16
730  %1 = bitcast i16 %tmp.0.extract.trunc to half
731  %2 = zext i16 %p to i32
732  %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
733  %4 = tail call half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
734  %5 = bitcast half %4 to i16
735  %tmp2.0.insert.ext = zext i16 %5 to i32
736  %6 = bitcast i32 %tmp2.0.insert.ext to float
737  ret float %6
738}
739
740define arm_aapcs_vfpcc float @test_vmaxnmvq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
741; CHECK-LABEL: test_vmaxnmvq_p_f32:
742; CHECK:       @ %bb.0: @ %entry
743; CHECK-NEXT:    vmsr p0, r0
744; CHECK-NEXT:    vmov r0, s0
745; CHECK-NEXT:    vpst
746; CHECK-NEXT:    vmaxnmvt.f32 r0, q1
747; CHECK-NEXT:    vmov s0, r0
748; CHECK-NEXT:    bx lr
749entry:
750  %0 = zext i16 %p to i32
751  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
752  %2 = tail call float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
753  ret float %2
754}
755
756define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f16(float %a.coerce, <8 x half> %b, i16 zeroext %p) {
757; CHECK-LABEL: test_vmaxnmavq_p_f16:
758; CHECK:       @ %bb.0: @ %entry
759; CHECK-NEXT:    vmov r1, s0
760; CHECK-NEXT:    vmsr p0, r0
761; CHECK-NEXT:    vpst
762; CHECK-NEXT:    vmaxnmavt.f16 r1, q1
763; CHECK-NEXT:    vmov s0, r1
764; CHECK-NEXT:    vmov.f16 r0, s0
765; CHECK-NEXT:    vmov s0, r0
766; CHECK-NEXT:    bx lr
767entry:
768  %0 = bitcast float %a.coerce to i32
769  %tmp.0.extract.trunc = trunc i32 %0 to i16
770  %1 = bitcast i16 %tmp.0.extract.trunc to half
771  %2 = zext i16 %p to i32
772  %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
773  %4 = tail call half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half %1, <8 x half> %b, <8 x i1> %3)
774  %5 = bitcast half %4 to i16
775  %tmp2.0.insert.ext = zext i16 %5 to i32
776  %6 = bitcast i32 %tmp2.0.insert.ext to float
777  ret float %6
778}
779
780define arm_aapcs_vfpcc float @test_vmaxnmavq_p_f32(float %a, <4 x float> %b, i16 zeroext %p) {
781; CHECK-LABEL: test_vmaxnmavq_p_f32:
782; CHECK:       @ %bb.0: @ %entry
783; CHECK-NEXT:    vmsr p0, r0
784; CHECK-NEXT:    vmov r0, s0
785; CHECK-NEXT:    vpst
786; CHECK-NEXT:    vmaxnmavt.f32 r0, q1
787; CHECK-NEXT:    vmov s0, r0
788; CHECK-NEXT:    bx lr
789entry:
790  %0 = zext i16 %p to i32
791  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
792  %2 = tail call float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float %a, <4 x float> %b, <4 x i1> %1)
793  ret float %2
794}
795
796declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
797declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
798declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
799
800declare i32 @llvm.arm.mve.minv.v16i8(i32, <16 x i8>, i32)
801declare i32 @llvm.arm.mve.minv.v8i16(i32, <8 x i16>, i32)
802declare i32 @llvm.arm.mve.minv.v4i32(i32, <4 x i32>, i32)
803declare i32 @llvm.arm.mve.maxv.v16i8(i32, <16 x i8>, i32)
804declare i32 @llvm.arm.mve.maxv.v8i16(i32, <8 x i16>, i32)
805declare i32 @llvm.arm.mve.maxv.v4i32(i32, <4 x i32>, i32)
806declare i32 @llvm.arm.mve.minav.v16i8(i32, <16 x i8>)
807declare i32 @llvm.arm.mve.minav.v8i16(i32, <8 x i16>)
808declare i32 @llvm.arm.mve.minav.v4i32(i32, <4 x i32>)
809declare i32 @llvm.arm.mve.maxav.v16i8(i32, <16 x i8>)
810declare i32 @llvm.arm.mve.maxav.v8i16(i32, <8 x i16>)
811declare i32 @llvm.arm.mve.maxav.v4i32(i32, <4 x i32>)
812declare i32 @llvm.arm.mve.minv.predicated.v16i8.v16i1(i32, <16 x i8>, i32, <16 x i1>)
813declare i32 @llvm.arm.mve.minv.predicated.v8i16.v8i1(i32, <8 x i16>, i32, <8 x i1>)
814declare i32 @llvm.arm.mve.minv.predicated.v4i32.v4i1(i32, <4 x i32>, i32, <4 x i1>)
815declare i32 @llvm.arm.mve.maxv.predicated.v16i8.v16i1(i32, <16 x i8>, i32, <16 x i1>)
816declare i32 @llvm.arm.mve.maxv.predicated.v8i16.v8i1(i32, <8 x i16>, i32, <8 x i1>)
817declare i32 @llvm.arm.mve.maxv.predicated.v4i32.v4i1(i32, <4 x i32>, i32, <4 x i1>)
818declare i32 @llvm.arm.mve.minav.predicated.v16i8.v16i1(i32, <16 x i8>, <16 x i1>)
819declare i32 @llvm.arm.mve.minav.predicated.v8i16.v8i1(i32, <8 x i16>, <8 x i1>)
820declare i32 @llvm.arm.mve.minav.predicated.v4i32.v4i1(i32, <4 x i32>, <4 x i1>)
821declare i32 @llvm.arm.mve.maxav.predicated.v16i8.v16i1(i32, <16 x i8>, <16 x i1>)
822declare i32 @llvm.arm.mve.maxav.predicated.v8i16.v8i1(i32, <8 x i16>, <8 x i1>)
823declare i32 @llvm.arm.mve.maxav.predicated.v4i32.v4i1(i32, <4 x i32>, <4 x i1>)
824
825declare half @llvm.arm.mve.minnmv.f16.v8f16(half, <8 x half>)
826declare half @llvm.arm.mve.minnmav.f16.v8f16(half, <8 x half>)
827declare half @llvm.arm.mve.maxnmv.f16.v8f16(half, <8 x half>)
828declare half @llvm.arm.mve.maxnmav.f16.v8f16(half, <8 x half>)
829declare half @llvm.arm.mve.minnmv.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
830declare half @llvm.arm.mve.minnmav.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
831declare half @llvm.arm.mve.maxnmv.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
832declare half @llvm.arm.mve.maxnmav.predicated.f16.v8f16.v8i1(half, <8 x half>, <8 x i1>)
833
834declare float @llvm.arm.mve.minnmv.f32.v4f32(float, <4 x float>)
835declare float @llvm.arm.mve.minnmav.f32.v4f32(float, <4 x float>)
836declare float @llvm.arm.mve.maxnmv.f32.v4f32(float, <4 x float>)
837declare float @llvm.arm.mve.maxnmav.f32.v4f32(float, <4 x float>)
838declare float @llvm.arm.mve.minnmv.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
839declare float @llvm.arm.mve.minnmav.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
840declare float @llvm.arm.mve.maxnmv.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
841declare float @llvm.arm.mve.maxnmav.predicated.f32.v4f32.v4i1(float, <4 x float>, <4 x i1>)
842