1; RUN: llc < %s -mtriple=aarch64-linux--gnu -aarch64-neon-syntax=generic | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4
5declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
6declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
7declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
8declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
9declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
10declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
11
12declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
13declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
14declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
15declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
16declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
17declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
18
19declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)
20declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
21
22; CHECK-LABEL: smax_B
23; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
24define i8 @smax_B(<16 x i8>* nocapture readonly %arr)  {
25  %arr.load = load <16 x i8>, <16 x i8>* %arr
26  %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %arr.load)
27  ret i8 %r
28}
29
30; CHECK-LABEL: smax_H
31; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
32define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
33  %arr.load = load <8 x i16>, <8 x i16>* %arr
34  %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %arr.load)
35  ret i16 %r
36}
37
38; CHECK-LABEL: smax_S
39; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
40define i32 @smax_S(<4 x i32> * nocapture readonly %arr)  {
41  %arr.load = load <4 x i32>, <4 x i32>* %arr
42  %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %arr.load)
43  ret i32 %r
44}
45
46; CHECK-LABEL: umax_B
47; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
48define i8 @umax_B(<16 x i8>* nocapture readonly %arr)  {
49  %arr.load = load <16 x i8>, <16 x i8>* %arr
50  %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %arr.load)
51  ret i8 %r
52}
53
54; CHECK-LABEL: umax_H
55; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
56define i16 @umax_H(<8 x i16>* nocapture readonly %arr)  {
57  %arr.load = load <8 x i16>, <8 x i16>* %arr
58  %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %arr.load)
59  ret i16 %r
60}
61
62; CHECK-LABEL: umax_S
63; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
64define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
65  %arr.load = load <4 x i32>, <4 x i32>* %arr
66  %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %arr.load)
67  ret i32 %r
68}
69
70; CHECK-LABEL: smin_B
71; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
72define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
73  %arr.load = load <16 x i8>, <16 x i8>* %arr
74  %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %arr.load)
75  ret i8 %r
76}
77
78; CHECK-LABEL: smin_H
79; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
80define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
81  %arr.load = load <8 x i16>, <8 x i16>* %arr
82  %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %arr.load)
83  ret i16 %r
84}
85
86; CHECK-LABEL: smin_S
87; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
88define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
89  %arr.load = load <4 x i32>, <4 x i32>* %arr
90  %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %arr.load)
91  ret i32 %r
92}
93
94; CHECK-LABEL: umin_B
95; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
96define i8 @umin_B(<16 x i8>* nocapture readonly %arr)  {
97  %arr.load = load <16 x i8>, <16 x i8>* %arr
98  %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %arr.load)
99  ret i8 %r
100}
101
102; CHECK-LABEL: umin_H
103; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
104define i16 @umin_H(<8 x i16>* nocapture readonly %arr)  {
105  %arr.load = load <8 x i16>, <8 x i16>* %arr
106  %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %arr.load)
107  ret i16 %r
108}
109
110; CHECK-LABEL: umin_S
111; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
112define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
113  %arr.load = load <4 x i32>, <4 x i32>* %arr
114  %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %arr.load)
115  ret i32 %r
116}
117
118; CHECK-LABEL: fmaxnm_S
119; CHECK: fmaxnmv
120define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
121  %arr.load  = load <4 x float>, <4 x float>* %arr
122  %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %arr.load)
123  ret float %r
124}
125
126; CHECK-LABEL: fminnm_S
127; CHECK: fminnmv
128define float @fminnm_S(<4 x float>* nocapture readonly %arr) {
129  %arr.load  = load <4 x float>, <4 x float>* %arr
130  %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %arr.load)
131  ret float %r
132}
133
134declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
135
136define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr)  {
137; CHECK-LABEL: oversized_umax_256
138; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
139; CHECK: umaxv {{h[0-9]+}}, [[V0]]
140  %arr.load = load <16 x i16>, <16 x i16>* %arr
141  %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %arr.load)
142  ret i16 %r
143}
144
145declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
146
147define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr)  {
148; CHECK-LABEL: oversized_umax_512
149; CHECK: umax v
150; CHECK-NEXT: umax v
151; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
152; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]]
153  %arr.load = load <16 x i32>, <16 x i32>* %arr
154  %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %arr.load)
155  ret i32 %r
156}
157
158declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
159
160define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr)  {
161; CHECK-LABEL: oversized_umin_256
162; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
163; CHECK: uminv {{h[0-9]+}}, [[V0]]
164  %arr.load = load <16 x i16>, <16 x i16>* %arr
165  %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %arr.load)
166  ret i16 %r
167}
168
169declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
170
171define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr)  {
172; CHECK-LABEL: oversized_umin_512
173; CHECK: umin v
174; CHECK-NEXT: umin v
175; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
176; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]]
177  %arr.load = load <16 x i32>, <16 x i32>* %arr
178  %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %arr.load)
179  ret i32 %r
180}
181
182declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
183
184define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr)  {
185; CHECK-LABEL: oversized_smax_256
186; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
187; CHECK: smaxv {{h[0-9]+}}, [[V0]]
188  %arr.load = load <16 x i16>, <16 x i16>* %arr
189  %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %arr.load)
190  ret i16 %r
191}
192
193declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
194
195define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr)  {
196; CHECK-LABEL: oversized_smax_512
197; CHECK: smax v
198; CHECK-NEXT: smax v
199; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
200; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]]
201  %arr.load = load <16 x i32>, <16 x i32>* %arr
202  %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %arr.load)
203  ret i32 %r
204}
205
206declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
207
208define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr)  {
209; CHECK-LABEL: oversized_smin_256
210; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
211; CHECK: sminv {{h[0-9]+}}, [[V0]]
212  %arr.load = load <16 x i16>, <16 x i16>* %arr
213  %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %arr.load)
214  ret i16 %r
215}
216
217declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
218
219define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr)  {
220; CHECK-LABEL: oversized_smin_512
221; CHECK: smin v
222; CHECK-NEXT: smin v
223; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
224; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]]
225  %arr.load = load <16 x i32>, <16 x i32>* %arr
226  %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %arr.load)
227  ret i32 %r
228}
229