1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
2
3define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4;CHECK-LABEL: smax_8b:
5;CHECK: smax.8b
6	%tmp1 = load <8 x i8>, <8 x i8>* %A
7	%tmp2 = load <8 x i8>, <8 x i8>* %B
8	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
9	ret <8 x i8> %tmp3
10}
11
12define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
13;CHECK-LABEL: smax_16b:
14;CHECK: smax.16b
15	%tmp1 = load <16 x i8>, <16 x i8>* %A
16	%tmp2 = load <16 x i8>, <16 x i8>* %B
17	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
18	ret <16 x i8> %tmp3
19}
20
21define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
22;CHECK-LABEL: smax_4h:
23;CHECK: smax.4h
24	%tmp1 = load <4 x i16>, <4 x i16>* %A
25	%tmp2 = load <4 x i16>, <4 x i16>* %B
26	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
27	ret <4 x i16> %tmp3
28}
29
30define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
31;CHECK-LABEL: smax_8h:
32;CHECK: smax.8h
33	%tmp1 = load <8 x i16>, <8 x i16>* %A
34	%tmp2 = load <8 x i16>, <8 x i16>* %B
35	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
36	ret <8 x i16> %tmp3
37}
38
39define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
40;CHECK-LABEL: smax_2s:
41;CHECK: smax.2s
42	%tmp1 = load <2 x i32>, <2 x i32>* %A
43	%tmp2 = load <2 x i32>, <2 x i32>* %B
44	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
45	ret <2 x i32> %tmp3
46}
47
48define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
49;CHECK-LABEL: smax_4s:
50;CHECK: smax.4s
51	%tmp1 = load <4 x i32>, <4 x i32>* %A
52	%tmp2 = load <4 x i32>, <4 x i32>* %B
53	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
54	ret <4 x i32> %tmp3
55}
56
57declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
58declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
59declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
60declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
61declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
62declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
63
64define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
65;CHECK-LABEL: umax_8b:
66;CHECK: umax.8b
67	%tmp1 = load <8 x i8>, <8 x i8>* %A
68	%tmp2 = load <8 x i8>, <8 x i8>* %B
69	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
70	ret <8 x i8> %tmp3
71}
72
73define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
74;CHECK-LABEL: umax_16b:
75;CHECK: umax.16b
76	%tmp1 = load <16 x i8>, <16 x i8>* %A
77	%tmp2 = load <16 x i8>, <16 x i8>* %B
78	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
79	ret <16 x i8> %tmp3
80}
81
82define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
83;CHECK-LABEL: umax_4h:
84;CHECK: umax.4h
85	%tmp1 = load <4 x i16>, <4 x i16>* %A
86	%tmp2 = load <4 x i16>, <4 x i16>* %B
87	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
88	ret <4 x i16> %tmp3
89}
90
91define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
92;CHECK-LABEL: umax_8h:
93;CHECK: umax.8h
94	%tmp1 = load <8 x i16>, <8 x i16>* %A
95	%tmp2 = load <8 x i16>, <8 x i16>* %B
96	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
97	ret <8 x i16> %tmp3
98}
99
100define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
101;CHECK-LABEL: umax_2s:
102;CHECK: umax.2s
103	%tmp1 = load <2 x i32>, <2 x i32>* %A
104	%tmp2 = load <2 x i32>, <2 x i32>* %B
105	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
106	ret <2 x i32> %tmp3
107}
108
109define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
110;CHECK-LABEL: umax_4s:
111;CHECK: umax.4s
112	%tmp1 = load <4 x i32>, <4 x i32>* %A
113	%tmp2 = load <4 x i32>, <4 x i32>* %B
114	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
115	ret <4 x i32> %tmp3
116}
117
118declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
119declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
120declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
121declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
122declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
123declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
124
125define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
126;CHECK-LABEL: smin_8b:
127;CHECK: smin.8b
128	%tmp1 = load <8 x i8>, <8 x i8>* %A
129	%tmp2 = load <8 x i8>, <8 x i8>* %B
130	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
131	ret <8 x i8> %tmp3
132}
133
134define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
135;CHECK-LABEL: smin_16b:
136;CHECK: smin.16b
137	%tmp1 = load <16 x i8>, <16 x i8>* %A
138	%tmp2 = load <16 x i8>, <16 x i8>* %B
139	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
140	ret <16 x i8> %tmp3
141}
142
143define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
144;CHECK-LABEL: smin_4h:
145;CHECK: smin.4h
146	%tmp1 = load <4 x i16>, <4 x i16>* %A
147	%tmp2 = load <4 x i16>, <4 x i16>* %B
148	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
149	ret <4 x i16> %tmp3
150}
151
152define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
153;CHECK-LABEL: smin_8h:
154;CHECK: smin.8h
155	%tmp1 = load <8 x i16>, <8 x i16>* %A
156	%tmp2 = load <8 x i16>, <8 x i16>* %B
157	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
158	ret <8 x i16> %tmp3
159}
160
161define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
162;CHECK-LABEL: smin_2s:
163;CHECK: smin.2s
164	%tmp1 = load <2 x i32>, <2 x i32>* %A
165	%tmp2 = load <2 x i32>, <2 x i32>* %B
166	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
167	ret <2 x i32> %tmp3
168}
169
170define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
171;CHECK-LABEL: smin_4s:
172;CHECK: smin.4s
173	%tmp1 = load <4 x i32>, <4 x i32>* %A
174	%tmp2 = load <4 x i32>, <4 x i32>* %B
175	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
176	ret <4 x i32> %tmp3
177}
178
179declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
180declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
181declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
182declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
183declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
184declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
185
186define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
187;CHECK-LABEL: umin_8b:
188;CHECK: umin.8b
189	%tmp1 = load <8 x i8>, <8 x i8>* %A
190	%tmp2 = load <8 x i8>, <8 x i8>* %B
191	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
192	ret <8 x i8> %tmp3
193}
194
195define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
196;CHECK-LABEL: umin_16b:
197;CHECK: umin.16b
198	%tmp1 = load <16 x i8>, <16 x i8>* %A
199	%tmp2 = load <16 x i8>, <16 x i8>* %B
200	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
201	ret <16 x i8> %tmp3
202}
203
204define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
205;CHECK-LABEL: umin_4h:
206;CHECK: umin.4h
207	%tmp1 = load <4 x i16>, <4 x i16>* %A
208	%tmp2 = load <4 x i16>, <4 x i16>* %B
209	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
210	ret <4 x i16> %tmp3
211}
212
213define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
214;CHECK-LABEL: umin_8h:
215;CHECK: umin.8h
216	%tmp1 = load <8 x i16>, <8 x i16>* %A
217	%tmp2 = load <8 x i16>, <8 x i16>* %B
218	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
219	ret <8 x i16> %tmp3
220}
221
222define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
223;CHECK-LABEL: umin_2s:
224;CHECK: umin.2s
225	%tmp1 = load <2 x i32>, <2 x i32>* %A
226	%tmp2 = load <2 x i32>, <2 x i32>* %B
227	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
228	ret <2 x i32> %tmp3
229}
230
231define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
232;CHECK-LABEL: umin_4s:
233;CHECK: umin.4s
234	%tmp1 = load <4 x i32>, <4 x i32>* %A
235	%tmp2 = load <4 x i32>, <4 x i32>* %B
236	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
237	ret <4 x i32> %tmp3
238}
239
240declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
241declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
242declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
243declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
244declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
245declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
246
247; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
248
249define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
250;CHECK-LABEL: smaxp_8b:
251;CHECK: smaxp.8b
252	%tmp1 = load <8 x i8>, <8 x i8>* %A
253	%tmp2 = load <8 x i8>, <8 x i8>* %B
254	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
255	ret <8 x i8> %tmp3
256}
257
258define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
259;CHECK-LABEL: smaxp_16b:
260;CHECK: smaxp.16b
261	%tmp1 = load <16 x i8>, <16 x i8>* %A
262	%tmp2 = load <16 x i8>, <16 x i8>* %B
263	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
264	ret <16 x i8> %tmp3
265}
266
267define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
268;CHECK-LABEL: smaxp_4h:
269;CHECK: smaxp.4h
270	%tmp1 = load <4 x i16>, <4 x i16>* %A
271	%tmp2 = load <4 x i16>, <4 x i16>* %B
272	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
273	ret <4 x i16> %tmp3
274}
275
276define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
277;CHECK-LABEL: smaxp_8h:
278;CHECK: smaxp.8h
279	%tmp1 = load <8 x i16>, <8 x i16>* %A
280	%tmp2 = load <8 x i16>, <8 x i16>* %B
281	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
282	ret <8 x i16> %tmp3
283}
284
285define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
286;CHECK-LABEL: smaxp_2s:
287;CHECK: smaxp.2s
288	%tmp1 = load <2 x i32>, <2 x i32>* %A
289	%tmp2 = load <2 x i32>, <2 x i32>* %B
290	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
291	ret <2 x i32> %tmp3
292}
293
294define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
295;CHECK-LABEL: smaxp_4s:
296;CHECK: smaxp.4s
297	%tmp1 = load <4 x i32>, <4 x i32>* %A
298	%tmp2 = load <4 x i32>, <4 x i32>* %B
299	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
300	ret <4 x i32> %tmp3
301}
302
303declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
304declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
305declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
306declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
307declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
308declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
309
310define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
311;CHECK-LABEL: umaxp_8b:
312;CHECK: umaxp.8b
313	%tmp1 = load <8 x i8>, <8 x i8>* %A
314	%tmp2 = load <8 x i8>, <8 x i8>* %B
315	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
316	ret <8 x i8> %tmp3
317}
318
319define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
320;CHECK-LABEL: umaxp_16b:
321;CHECK: umaxp.16b
322	%tmp1 = load <16 x i8>, <16 x i8>* %A
323	%tmp2 = load <16 x i8>, <16 x i8>* %B
324	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
325	ret <16 x i8> %tmp3
326}
327
328define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
329;CHECK-LABEL: umaxp_4h:
330;CHECK: umaxp.4h
331	%tmp1 = load <4 x i16>, <4 x i16>* %A
332	%tmp2 = load <4 x i16>, <4 x i16>* %B
333	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
334	ret <4 x i16> %tmp3
335}
336
337define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
338;CHECK-LABEL: umaxp_8h:
339;CHECK: umaxp.8h
340	%tmp1 = load <8 x i16>, <8 x i16>* %A
341	%tmp2 = load <8 x i16>, <8 x i16>* %B
342	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
343	ret <8 x i16> %tmp3
344}
345
346define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
347;CHECK-LABEL: umaxp_2s:
348;CHECK: umaxp.2s
349	%tmp1 = load <2 x i32>, <2 x i32>* %A
350	%tmp2 = load <2 x i32>, <2 x i32>* %B
351	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
352	ret <2 x i32> %tmp3
353}
354
355define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
356;CHECK-LABEL: umaxp_4s:
357;CHECK: umaxp.4s
358	%tmp1 = load <4 x i32>, <4 x i32>* %A
359	%tmp2 = load <4 x i32>, <4 x i32>* %B
360	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
361	ret <4 x i32> %tmp3
362}
363
364declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
365declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
366declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
367declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
368declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
369declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
370
371; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
372
373define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
374;CHECK-LABEL: sminp_8b:
375;CHECK: sminp.8b
376	%tmp1 = load <8 x i8>, <8 x i8>* %A
377	%tmp2 = load <8 x i8>, <8 x i8>* %B
378	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
379	ret <8 x i8> %tmp3
380}
381
382define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
383;CHECK-LABEL: sminp_16b:
384;CHECK: sminp.16b
385	%tmp1 = load <16 x i8>, <16 x i8>* %A
386	%tmp2 = load <16 x i8>, <16 x i8>* %B
387	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
388	ret <16 x i8> %tmp3
389}
390
391define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
392;CHECK-LABEL: sminp_4h:
393;CHECK: sminp.4h
394	%tmp1 = load <4 x i16>, <4 x i16>* %A
395	%tmp2 = load <4 x i16>, <4 x i16>* %B
396	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
397	ret <4 x i16> %tmp3
398}
399
400define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
401;CHECK-LABEL: sminp_8h:
402;CHECK: sminp.8h
403	%tmp1 = load <8 x i16>, <8 x i16>* %A
404	%tmp2 = load <8 x i16>, <8 x i16>* %B
405	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
406	ret <8 x i16> %tmp3
407}
408
409define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
410;CHECK-LABEL: sminp_2s:
411;CHECK: sminp.2s
412	%tmp1 = load <2 x i32>, <2 x i32>* %A
413	%tmp2 = load <2 x i32>, <2 x i32>* %B
414	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
415	ret <2 x i32> %tmp3
416}
417
418define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
419;CHECK-LABEL: sminp_4s:
420;CHECK: sminp.4s
421	%tmp1 = load <4 x i32>, <4 x i32>* %A
422	%tmp2 = load <4 x i32>, <4 x i32>* %B
423	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
424	ret <4 x i32> %tmp3
425}
426
427declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
428declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
429declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
430declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
431declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
432declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
433
434define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
435;CHECK-LABEL: uminp_8b:
436;CHECK: uminp.8b
437	%tmp1 = load <8 x i8>, <8 x i8>* %A
438	%tmp2 = load <8 x i8>, <8 x i8>* %B
439	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
440	ret <8 x i8> %tmp3
441}
442
443define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
444;CHECK-LABEL: uminp_16b:
445;CHECK: uminp.16b
446	%tmp1 = load <16 x i8>, <16 x i8>* %A
447	%tmp2 = load <16 x i8>, <16 x i8>* %B
448	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
449	ret <16 x i8> %tmp3
450}
451
452define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
453;CHECK-LABEL: uminp_4h:
454;CHECK: uminp.4h
455	%tmp1 = load <4 x i16>, <4 x i16>* %A
456	%tmp2 = load <4 x i16>, <4 x i16>* %B
457	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
458	ret <4 x i16> %tmp3
459}
460
461define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
462;CHECK-LABEL: uminp_8h:
463;CHECK: uminp.8h
464	%tmp1 = load <8 x i16>, <8 x i16>* %A
465	%tmp2 = load <8 x i16>, <8 x i16>* %B
466	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
467	ret <8 x i16> %tmp3
468}
469
470define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
471;CHECK-LABEL: uminp_2s:
472;CHECK: uminp.2s
473	%tmp1 = load <2 x i32>, <2 x i32>* %A
474	%tmp2 = load <2 x i32>, <2 x i32>* %B
475	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
476	ret <2 x i32> %tmp3
477}
478
479define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
480;CHECK-LABEL: uminp_4s:
481;CHECK: uminp.4s
482	%tmp1 = load <4 x i32>, <4 x i32>* %A
483	%tmp2 = load <4 x i32>, <4 x i32>* %B
484	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
485	ret <4 x i32> %tmp3
486}
487
488declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
489declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
490declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
491declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
492declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
493declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
494
495define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
496;CHECK-LABEL: fmax_2s:
497;CHECK: fmax.2s
498	%tmp1 = load <2 x float>, <2 x float>* %A
499	%tmp2 = load <2 x float>, <2 x float>* %B
500	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
501	ret <2 x float> %tmp3
502}
503
504define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
505;CHECK-LABEL: fmax_4s:
506;CHECK: fmax.4s
507	%tmp1 = load <4 x float>, <4 x float>* %A
508	%tmp2 = load <4 x float>, <4 x float>* %B
509	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
510	ret <4 x float> %tmp3
511}
512
513define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
514;CHECK-LABEL: fmax_2d:
515;CHECK: fmax.2d
516	%tmp1 = load <2 x double>, <2 x double>* %A
517	%tmp2 = load <2 x double>, <2 x double>* %B
518	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
519	ret <2 x double> %tmp3
520}
521
522declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
523declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
524declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
525
526define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
527;CHECK-LABEL: fmaxp_2s:
528;CHECK: fmaxp.2s
529	%tmp1 = load <2 x float>, <2 x float>* %A
530	%tmp2 = load <2 x float>, <2 x float>* %B
531	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
532	ret <2 x float> %tmp3
533}
534
535define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
536;CHECK-LABEL: fmaxp_4s:
537;CHECK: fmaxp.4s
538	%tmp1 = load <4 x float>, <4 x float>* %A
539	%tmp2 = load <4 x float>, <4 x float>* %B
540	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
541	ret <4 x float> %tmp3
542}
543
544define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
545;CHECK-LABEL: fmaxp_2d:
546;CHECK: fmaxp.2d
547	%tmp1 = load <2 x double>, <2 x double>* %A
548	%tmp2 = load <2 x double>, <2 x double>* %B
549	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
550	ret <2 x double> %tmp3
551}
552
553declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
554declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
555declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
556
557define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
558;CHECK-LABEL: fmin_2s:
559;CHECK: fmin.2s
560	%tmp1 = load <2 x float>, <2 x float>* %A
561	%tmp2 = load <2 x float>, <2 x float>* %B
562	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
563	ret <2 x float> %tmp3
564}
565
566define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
567;CHECK-LABEL: fmin_4s:
568;CHECK: fmin.4s
569	%tmp1 = load <4 x float>, <4 x float>* %A
570	%tmp2 = load <4 x float>, <4 x float>* %B
571	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
572	ret <4 x float> %tmp3
573}
574
575define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
576;CHECK-LABEL: fmin_2d:
577;CHECK: fmin.2d
578	%tmp1 = load <2 x double>, <2 x double>* %A
579	%tmp2 = load <2 x double>, <2 x double>* %B
580	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
581	ret <2 x double> %tmp3
582}
583
584declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
585declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
586declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
587
588define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
589;CHECK-LABEL: fminp_2s:
590;CHECK: fminp.2s
591	%tmp1 = load <2 x float>, <2 x float>* %A
592	%tmp2 = load <2 x float>, <2 x float>* %B
593	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
594	ret <2 x float> %tmp3
595}
596
597define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
598;CHECK-LABEL: fminp_4s:
599;CHECK: fminp.4s
600	%tmp1 = load <4 x float>, <4 x float>* %A
601	%tmp2 = load <4 x float>, <4 x float>* %B
602	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
603	ret <4 x float> %tmp3
604}
605
606define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
607;CHECK-LABEL: fminp_2d:
608;CHECK: fminp.2d
609	%tmp1 = load <2 x double>, <2 x double>* %A
610	%tmp2 = load <2 x double>, <2 x double>* %B
611	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
612	ret <2 x double> %tmp3
613}
614
615declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
616declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
617declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
618
619define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
620;CHECK-LABEL: fminnmp_2s:
621;CHECK: fminnmp.2s
622	%tmp1 = load <2 x float>, <2 x float>* %A
623	%tmp2 = load <2 x float>, <2 x float>* %B
624	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
625	ret <2 x float> %tmp3
626}
627
628define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
629;CHECK-LABEL: fminnmp_4s:
630;CHECK: fminnmp.4s
631	%tmp1 = load <4 x float>, <4 x float>* %A
632	%tmp2 = load <4 x float>, <4 x float>* %B
633	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
634	ret <4 x float> %tmp3
635}
636
637define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
638;CHECK-LABEL: fminnmp_2d:
639;CHECK: fminnmp.2d
640	%tmp1 = load <2 x double>, <2 x double>* %A
641	%tmp2 = load <2 x double>, <2 x double>* %B
642	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
643	ret <2 x double> %tmp3
644}
645
646declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
647declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
648declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
649
650define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
651;CHECK-LABEL: fmaxnmp_2s:
652;CHECK: fmaxnmp.2s
653	%tmp1 = load <2 x float>, <2 x float>* %A
654	%tmp2 = load <2 x float>, <2 x float>* %B
655	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
656	ret <2 x float> %tmp3
657}
658
659define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
660;CHECK-LABEL: fmaxnmp_4s:
661;CHECK: fmaxnmp.4s
662	%tmp1 = load <4 x float>, <4 x float>* %A
663	%tmp2 = load <4 x float>, <4 x float>* %B
664	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
665	ret <4 x float> %tmp3
666}
667
668define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
669;CHECK-LABEL: fmaxnmp_2d:
670;CHECK: fmaxnmp.2d
671	%tmp1 = load <2 x double>, <2 x double>* %A
672	%tmp2 = load <2 x double>, <2 x double>* %B
673	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
674	ret <2 x double> %tmp3
675}
676
677declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
678declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
679declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
680