1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
2
3define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp {
4; CHECK-LABEL: vmin_u8x8:
5; CHECK: uminv.8b        b[[REG:[0-9]+]], v0
6; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
7; CHECK-NOT: and
8; CHECK: cbz     [[REG2]],
9entry:
10  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) nounwind
11  %tmp = trunc i32 %vminv.i to i8
12  %tobool = icmp eq i8 %tmp, 0
13  br i1 %tobool, label %return, label %if.then
14
15if.then:
16  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
17  br label %return
18
19return:
20  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
21  ret i32 %retval.0
22}
23
24declare i32 @bar(...)
25
26define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp {
27; CHECK-LABEL: vmin_u4x16:
28; CHECK: uminv.4h        h[[REG:[0-9]+]], v0
29; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
30; CHECK-NOT: and
31; CHECK: cbz     [[REG2]],
32entry:
33  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) nounwind
34  %tmp = trunc i32 %vminv.i to i16
35  %tobool = icmp eq i16 %tmp, 0
36  br i1 %tobool, label %return, label %if.then
37
38if.then:
39  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
40  br label %return
41
42return:
43  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
44  ret i32 %retval.0
45}
46
47define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp {
48; CHECK-LABEL: vmin_u8x16:
49; CHECK: uminv.8h        h[[REG:[0-9]+]], v0
50; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
51; CHECK-NOT: and
52; CHECK: cbz     [[REG2]],
53entry:
54  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) nounwind
55  %tmp = trunc i32 %vminv.i to i16
56  %tobool = icmp eq i16 %tmp, 0
57  br i1 %tobool, label %return, label %if.then
58
59if.then:
60  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
61  br label %return
62
63return:
64  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
65  ret i32 %retval.0
66}
67
68define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp {
69; CHECK-LABEL: vmin_u16x8:
70; CHECK: uminv.16b        b[[REG:[0-9]+]], v0
71; CHECK: fmov     [[REG2:w[0-9]+]], s[[REG]]
72; CHECK-NOT: and
73; CHECK: cbz     [[REG2]],
74entry:
75  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) nounwind
76  %tmp = trunc i32 %vminv.i to i8
77  %tobool = icmp eq i8 %tmp, 0
78  br i1 %tobool, label %return, label %if.then
79
80if.then:
81  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
82  br label %return
83
84return:
85  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
86  ret i32 %retval.0
87}
88
89define <8 x i8> @test_vminv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
90; CHECK-LABEL: test_vminv_u8_used_by_laneop:
91; CHECK: uminv.8b b[[REGNUM:[0-9]+]], v1
92; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
93; CHECK-NEXT: ret
94entry:
95  %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a2)
96  %1 = trunc i32 %0 to i8
97  %2 = insertelement <8 x i8> %a1, i8 %1, i32 3
98  ret <8 x i8> %2
99}
100
101define <4 x i16> @test_vminv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
102; CHECK-LABEL: test_vminv_u16_used_by_laneop:
103; CHECK: uminv.4h h[[REGNUM:[0-9]+]], v1
104; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
105; CHECK-NEXT: ret
106entry:
107  %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a2)
108  %1 = trunc i32 %0 to i16
109  %2 = insertelement <4 x i16> %a1, i16 %1, i32 3
110  ret <4 x i16> %2
111}
112
113define <2 x i32> @test_vminv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
114; CHECK-LABEL: test_vminv_u32_used_by_laneop:
115; CHECK: uminp.2s v[[REGNUM:[0-9]+]], v1, v1
116; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]
117; CHECK-NEXT: ret
118entry:
119  %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a2)
120  %1 = insertelement <2 x i32> %a1, i32 %0, i32 1
121  ret <2 x i32> %1
122}
123
124define <16 x i8> @test_vminvq_u8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {
125; CHECK-LABEL: test_vminvq_u8_used_by_laneop:
126; CHECK: uminv.16b b[[REGNUM:[0-9]+]], v1
127; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
128; CHECK-NEXT: ret
129entry:
130  %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a2)
131  %1 = trunc i32 %0 to i8
132  %2 = insertelement <16 x i8> %a1, i8 %1, i32 3
133  ret <16 x i8> %2
134}
135
136define <8 x i16> @test_vminvq_u16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {
137; CHECK-LABEL: test_vminvq_u16_used_by_laneop:
138; CHECK: uminv.8h h[[REGNUM:[0-9]+]], v1
139; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
140; CHECK-NEXT: ret
141entry:
142  %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a2)
143  %1 = trunc i32 %0 to i16
144  %2 = insertelement <8 x i16> %a1, i16 %1, i32 3
145  ret <8 x i16> %2
146}
147
148define <4 x i32> @test_vminvq_u32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {
149; CHECK-LABEL: test_vminvq_u32_used_by_laneop:
150; CHECK: uminv.4s s[[REGNUM:[0-9]+]], v1
151; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]
152; CHECK-NEXT: ret
153entry:
154  %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a2)
155  %1 = insertelement <4 x i32> %a1, i32 %0, i32 3
156  ret <4 x i32> %1
157}
158declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone
159declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone
160declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone
161declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone
162declare i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32>) nounwind readnone
163declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>) nounwind readnone
164