1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP
4
5define arm_aapcs_vfpcc float @fmin_v2f32(<2 x float> %x) {
6; CHECK-LABEL: fmin_v2f32:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vminnm.f32 s0, s0, s1
9; CHECK-NEXT:    bx lr
10entry:
11  %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
12  ret float %z
13}
14
15define arm_aapcs_vfpcc float @fmin_v4f32(<4 x float> %x) {
16; CHECK-FP-LABEL: fmin_v4f32:
17; CHECK-FP:       @ %bb.0: @ %entry
18; CHECK-FP-NEXT:    vminnm.f32 s4, s2, s3
19; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s1
20; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s4
21; CHECK-FP-NEXT:    bx lr
22;
23; CHECK-NOFP-LABEL: fmin_v4f32:
24; CHECK-NOFP:       @ %bb.0: @ %entry
25; CHECK-NOFP-NEXT:    vminnm.f32 s4, s0, s1
26; CHECK-NOFP-NEXT:    vminnm.f32 s4, s4, s2
27; CHECK-NOFP-NEXT:    vminnm.f32 s0, s4, s3
28; CHECK-NOFP-NEXT:    bx lr
29entry:
30  %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
31  ret float %z
32}
33
34; FIXME fminnum (vector) -> fminnum (scalar)  ?
35define arm_aapcs_vfpcc float @fmin_v8f32(<8 x float> %x) {
36; CHECK-FP-LABEL: fmin_v8f32:
37; CHECK-FP:       @ %bb.0: @ %entry
38; CHECK-FP-NEXT:    vminnm.f32 q0, q0, q1
39; CHECK-FP-NEXT:    vminnm.f32 s4, s2, s3
40; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s1
41; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s4
42; CHECK-FP-NEXT:    bx lr
43;
44; CHECK-NOFP-LABEL: fmin_v8f32:
45; CHECK-NOFP:       @ %bb.0: @ %entry
46; CHECK-NOFP-NEXT:    vcmp.f32 s5, s1
47; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
48; CHECK-NOFP-NEXT:    vcmp.f32 s4, s0
49; CHECK-NOFP-NEXT:    vselgt.f32 s8, s1, s5
50; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
51; CHECK-NOFP-NEXT:    vcmp.f32 s6, s2
52; CHECK-NOFP-NEXT:    vselgt.f32 s10, s0, s4
53; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
54; CHECK-NOFP-NEXT:    vcmp.f32 s7, s3
55; CHECK-NOFP-NEXT:    vselgt.f32 s12, s2, s6
56; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
57; CHECK-NOFP-NEXT:    vselgt.f32 s0, s3, s7
58; CHECK-NOFP-NEXT:    vminnm.f32 s2, s10, s8
59; CHECK-NOFP-NEXT:    vminnm.f32 s2, s2, s12
60; CHECK-NOFP-NEXT:    vminnm.f32 s0, s2, s0
61; CHECK-NOFP-NEXT:    bx lr
62entry:
63  %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
64  ret float %z
65}
66
67define arm_aapcs_vfpcc half @fmin_v4f16(<4 x half> %x) {
68; CHECK-FP-LABEL: fmin_v4f16:
69; CHECK-FP:       @ %bb.0: @ %entry
70; CHECK-FP-NEXT:    vmovx.f16 s4, s1
71; CHECK-FP-NEXT:    vmovx.f16 s6, s0
72; CHECK-FP-NEXT:    vminnm.f16 s4, s1, s4
73; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s6
74; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
75; CHECK-FP-NEXT:    bx lr
76;
77; CHECK-NOFP-LABEL: fmin_v4f16:
78; CHECK-NOFP:       @ %bb.0: @ %entry
79; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
80; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
81; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
82; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
83; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
84; CHECK-NOFP-NEXT:    bx lr
85entry:
86  %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
87  ret half %z
88}
89
90define arm_aapcs_vfpcc half @fmin_v8f16(<8 x half> %x) {
91; CHECK-FP-LABEL: fmin_v8f16:
92; CHECK-FP:       @ %bb.0: @ %entry
93; CHECK-FP-NEXT:    vrev32.16 q1, q0
94; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
95; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
96; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
97; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
98; CHECK-FP-NEXT:    bx lr
99;
100; CHECK-NOFP-LABEL: fmin_v8f16:
101; CHECK-NOFP:       @ %bb.0: @ %entry
102; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
103; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
104; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
105; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
106; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
107; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
108; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
109; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s2
110; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
111; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s3
112; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
113; CHECK-NOFP-NEXT:    bx lr
114entry:
115  %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
116  ret half %z
117}
118
119define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) {
120; CHECK-FP-LABEL: fmin_v16f16:
121; CHECK-FP:       @ %bb.0: @ %entry
122; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
123; CHECK-FP-NEXT:    vrev32.16 q1, q0
124; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
125; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
126; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
127; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
128; CHECK-FP-NEXT:    bx lr
129;
130; CHECK-NOFP-LABEL: fmin_v16f16:
131; CHECK-NOFP:       @ %bb.0: @ %entry
132; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
133; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
134; CHECK-NOFP-NEXT:    vcmp.f16 s8, s10
135; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
136; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
137; CHECK-NOFP-NEXT:    vcmp.f16 s4, s0
138; CHECK-NOFP-NEXT:    vselgt.f16 s8, s10, s8
139; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
140; CHECK-NOFP-NEXT:    vcmp.f16 s5, s1
141; CHECK-NOFP-NEXT:    vselgt.f16 s10, s0, s4
142; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
143; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
144; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
145; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
146; CHECK-NOFP-NEXT:    vselgt.f16 s10, s1, s5
147; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
148; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
149; CHECK-NOFP-NEXT:    vcmp.f16 s10, s12
150; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
151; CHECK-NOFP-NEXT:    vcmp.f16 s6, s2
152; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
153; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
154; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
155; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
156; CHECK-NOFP-NEXT:    vselgt.f16 s10, s2, s6
157; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
158; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
159; CHECK-NOFP-NEXT:    vcmp.f16 s10, s12
160; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
161; CHECK-NOFP-NEXT:    vcmp.f16 s7, s3
162; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
163; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
164; CHECK-NOFP-NEXT:    vcmp.f16 s4, s0
165; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
166; CHECK-NOFP-NEXT:    vselgt.f16 s10, s3, s7
167; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
168; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
169; CHECK-NOFP-NEXT:    vselgt.f16 s0, s0, s4
170; CHECK-NOFP-NEXT:    vminnm.f16 s0, s8, s0
171; CHECK-NOFP-NEXT:    bx lr
172entry:
173  %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
174  ret half %z
175}
176
177define arm_aapcs_vfpcc double @fmin_v1f64(<1 x double> %x) {
178; CHECK-LABEL: fmin_v1f64:
179; CHECK:       @ %bb.0: @ %entry
180; CHECK-NEXT:    bx lr
181entry:
182  %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
183  ret double %z
184}
185
186define arm_aapcs_vfpcc double @fmin_v2f64(<2 x double> %x) {
187; CHECK-LABEL: fmin_v2f64:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vminnm.f64 d0, d0, d1
190; CHECK-NEXT:    bx lr
191entry:
192  %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
193  ret double %z
194}
195
196define arm_aapcs_vfpcc double @fmin_v4f64(<4 x double> %x) {
197; CHECK-LABEL: fmin_v4f64:
198; CHECK:       @ %bb.0: @ %entry
199; CHECK-NEXT:    vcmp.f64 d3, d1
200; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
201; CHECK-NEXT:    vcmp.f64 d2, d0
202; CHECK-NEXT:    vselgt.f64 d4, d1, d3
203; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
204; CHECK-NEXT:    vselgt.f64 d0, d0, d2
205; CHECK-NEXT:    vminnm.f64 d0, d0, d4
206; CHECK-NEXT:    bx lr
207entry:
208  %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
209  ret double %z
210}
211
212define arm_aapcs_vfpcc float @fmin_v2f32_nofast(<2 x float> %x) {
213; CHECK-LABEL: fmin_v2f32_nofast:
214; CHECK:       @ %bb.0: @ %entry
215; CHECK-NEXT:    vminnm.f32 s0, s0, s1
216; CHECK-NEXT:    bx lr
217entry:
218  %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
219  ret float %z
220}
221
222define arm_aapcs_vfpcc float @fmin_v4f32_nofast(<4 x float> %x) {
223; CHECK-FP-LABEL: fmin_v4f32_nofast:
224; CHECK-FP:       @ %bb.0: @ %entry
225; CHECK-FP-NEXT:    vminnm.f32 s4, s2, s3
226; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s1
227; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s4
228; CHECK-FP-NEXT:    bx lr
229;
230; CHECK-NOFP-LABEL: fmin_v4f32_nofast:
231; CHECK-NOFP:       @ %bb.0: @ %entry
232; CHECK-NOFP-NEXT:    vminnm.f32 s4, s0, s1
233; CHECK-NOFP-NEXT:    vminnm.f32 s4, s4, s2
234; CHECK-NOFP-NEXT:    vminnm.f32 s0, s4, s3
235; CHECK-NOFP-NEXT:    bx lr
236entry:
237  %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
238  ret float %z
239}
240
241define arm_aapcs_vfpcc float @fmin_v8f32_nofast(<8 x float> %x) {
242; CHECK-FP-LABEL: fmin_v8f32_nofast:
243; CHECK-FP:       @ %bb.0: @ %entry
244; CHECK-FP-NEXT:    vminnm.f32 q0, q0, q1
245; CHECK-FP-NEXT:    vminnm.f32 s4, s2, s3
246; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s1
247; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s4
248; CHECK-FP-NEXT:    bx lr
249;
250; CHECK-NOFP-LABEL: fmin_v8f32_nofast:
251; CHECK-NOFP:       @ %bb.0: @ %entry
252; CHECK-NOFP-NEXT:    vminnm.f32 s10, s0, s4
253; CHECK-NOFP-NEXT:    vminnm.f32 s8, s1, s5
254; CHECK-NOFP-NEXT:    vminnm.f32 s8, s10, s8
255; CHECK-NOFP-NEXT:    vminnm.f32 s10, s2, s6
256; CHECK-NOFP-NEXT:    vminnm.f32 s8, s8, s10
257; CHECK-NOFP-NEXT:    vminnm.f32 s0, s3, s7
258; CHECK-NOFP-NEXT:    vminnm.f32 s0, s8, s0
259; CHECK-NOFP-NEXT:    bx lr
260entry:
261  %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
262  ret float %z
263}
264
265define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) {
266; CHECK-FP-LABEL: fmin_v4f16_nofast:
267; CHECK-FP:       @ %bb.0: @ %entry
268; CHECK-FP-NEXT:    vmovx.f16 s4, s1
269; CHECK-FP-NEXT:    vmovx.f16 s6, s0
270; CHECK-FP-NEXT:    vminnm.f16 s4, s1, s4
271; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s6
272; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
273; CHECK-FP-NEXT:    bx lr
274;
275; CHECK-NOFP-LABEL: fmin_v4f16_nofast:
276; CHECK-NOFP:       @ %bb.0: @ %entry
277; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
278; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
279; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
280; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
281; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
282; CHECK-NOFP-NEXT:    bx lr
283entry:
284  %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
285  ret half %z
286}
287
288define arm_aapcs_vfpcc half @fmin_v8f16_nofast(<8 x half> %x) {
289; CHECK-FP-LABEL: fmin_v8f16_nofast:
290; CHECK-FP:       @ %bb.0: @ %entry
291; CHECK-FP-NEXT:    vrev32.16 q1, q0
292; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
293; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
294; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
295; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
296; CHECK-FP-NEXT:    bx lr
297;
298; CHECK-NOFP-LABEL: fmin_v8f16_nofast:
299; CHECK-NOFP:       @ %bb.0: @ %entry
300; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
301; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
302; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
303; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
304; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
305; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
306; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
307; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s2
308; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
309; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s3
310; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
311; CHECK-NOFP-NEXT:    bx lr
312entry:
313  %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
314  ret half %z
315}
316
317define arm_aapcs_vfpcc half @fmin_v16f16_nofast(<16 x half> %x) {
318; CHECK-FP-LABEL: fmin_v16f16_nofast:
319; CHECK-FP:       @ %bb.0: @ %entry
320; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
321; CHECK-FP-NEXT:    vrev32.16 q1, q0
322; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
323; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
324; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
325; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
326; CHECK-FP-NEXT:    bx lr
327;
328; CHECK-NOFP-LABEL: fmin_v16f16_nofast:
329; CHECK-NOFP:       @ %bb.0: @ %entry
330; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
331; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
332; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
333; CHECK-NOFP-NEXT:    vminnm.f16 s10, s0, s4
334; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
335; CHECK-NOFP-NEXT:    vminnm.f16 s10, s1, s5
336; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
337; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
338; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
339; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
340; CHECK-NOFP-NEXT:    vminnm.f16 s10, s12, s10
341; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
342; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
343; CHECK-NOFP-NEXT:    vminnm.f16 s10, s2, s6
344; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
345; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
346; CHECK-NOFP-NEXT:    vminnm.f16 s10, s12, s10
347; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
348; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
349; CHECK-NOFP-NEXT:    vminnm.f16 s10, s3, s7
350; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
351; CHECK-NOFP-NEXT:    vminnm.f16 s0, s0, s4
352; CHECK-NOFP-NEXT:    vminnm.f16 s0, s8, s0
353; CHECK-NOFP-NEXT:    bx lr
354entry:
355  %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
356  ret half %z
357}
358
359define arm_aapcs_vfpcc double @fmin_v1f64_nofast(<1 x double> %x) {
360; CHECK-LABEL: fmin_v1f64_nofast:
361; CHECK:       @ %bb.0: @ %entry
362; CHECK-NEXT:    bx lr
363entry:
364  %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
365  ret double %z
366}
367
368define arm_aapcs_vfpcc double @fmin_v2f64_nofast(<2 x double> %x) {
369; CHECK-LABEL: fmin_v2f64_nofast:
370; CHECK:       @ %bb.0: @ %entry
371; CHECK-NEXT:    vminnm.f64 d0, d0, d1
372; CHECK-NEXT:    bx lr
373entry:
374  %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
375  ret double %z
376}
377
378define arm_aapcs_vfpcc double @fmin_v4f64_nofast(<4 x double> %x) {
379; CHECK-LABEL: fmin_v4f64_nofast:
380; CHECK:       @ %bb.0: @ %entry
381; CHECK-NEXT:    vminnm.f64 d4, d1, d3
382; CHECK-NEXT:    vminnm.f64 d0, d0, d2
383; CHECK-NEXT:    vminnm.f64 d0, d0, d4
384; CHECK-NEXT:    bx lr
385entry:
386  %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
387  ret double %z
388}
389
390define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) {
391; CHECK-LABEL: fmin_v2f32_acc:
392; CHECK:       @ %bb.0: @ %entry
393; CHECK-NEXT:    vminnm.f32 s0, s0, s1
394; CHECK-NEXT:    vminnm.f32 s0, s4, s0
395; CHECK-NEXT:    bx lr
396entry:
397  %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
398  %c = fcmp fast olt float %y, %z
399  %r = select i1 %c, float %y, float %z
400  ret float %r
401}
402
403define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) {
404; CHECK-FP-LABEL: fmin_v4f32_acc:
405; CHECK-FP:       @ %bb.0: @ %entry
406; CHECK-FP-NEXT:    vminnm.f32 s6, s2, s3
407; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s1
408; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s6
409; CHECK-FP-NEXT:    vminnm.f32 s0, s4, s0
410; CHECK-FP-NEXT:    bx lr
411;
412; CHECK-NOFP-LABEL: fmin_v4f32_acc:
413; CHECK-NOFP:       @ %bb.0: @ %entry
414; CHECK-NOFP-NEXT:    vminnm.f32 s6, s0, s1
415; CHECK-NOFP-NEXT:    vminnm.f32 s6, s6, s2
416; CHECK-NOFP-NEXT:    vminnm.f32 s0, s6, s3
417; CHECK-NOFP-NEXT:    vminnm.f32 s0, s4, s0
418; CHECK-NOFP-NEXT:    bx lr
419entry:
420  %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
421  %c = fcmp fast olt float %y, %z
422  %r = select i1 %c, float %y, float %z
423  ret float %r
424}
425
426define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
427; CHECK-FP-LABEL: fmin_v8f32_acc:
428; CHECK-FP:       @ %bb.0: @ %entry
429; CHECK-FP-NEXT:    vminnm.f32 q0, q0, q1
430; CHECK-FP-NEXT:    vminnm.f32 s4, s2, s3
431; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s1
432; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s4
433; CHECK-FP-NEXT:    vminnm.f32 s0, s8, s0
434; CHECK-FP-NEXT:    bx lr
435;
436; CHECK-NOFP-LABEL: fmin_v8f32_acc:
437; CHECK-NOFP:       @ %bb.0: @ %entry
438; CHECK-NOFP-NEXT:    vcmp.f32 s5, s1
439; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
440; CHECK-NOFP-NEXT:    vcmp.f32 s4, s0
441; CHECK-NOFP-NEXT:    vselgt.f32 s10, s1, s5
442; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
443; CHECK-NOFP-NEXT:    vcmp.f32 s6, s2
444; CHECK-NOFP-NEXT:    vselgt.f32 s12, s0, s4
445; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
446; CHECK-NOFP-NEXT:    vcmp.f32 s7, s3
447; CHECK-NOFP-NEXT:    vselgt.f32 s14, s2, s6
448; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
449; CHECK-NOFP-NEXT:    vselgt.f32 s0, s3, s7
450; CHECK-NOFP-NEXT:    vminnm.f32 s2, s12, s10
451; CHECK-NOFP-NEXT:    vminnm.f32 s2, s2, s14
452; CHECK-NOFP-NEXT:    vminnm.f32 s0, s2, s0
453; CHECK-NOFP-NEXT:    vminnm.f32 s0, s8, s0
454; CHECK-NOFP-NEXT:    bx lr
455entry:
456  %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
457  %c = fcmp fast olt float %y, %z
458  %r = select i1 %c, float %y, float %z
459  ret float %r
460}
461
462define arm_aapcs_vfpcc void @fmin_v4f16_acc(<4 x half> %x, half* %yy) {
463; CHECK-FP-LABEL: fmin_v4f16_acc:
464; CHECK-FP:       @ %bb.0: @ %entry
465; CHECK-FP-NEXT:    vmovx.f16 s4, s1
466; CHECK-FP-NEXT:    vmovx.f16 s6, s0
467; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s6
468; CHECK-FP-NEXT:    vminnm.f16 s4, s1, s4
469; CHECK-FP-NEXT:    vldr.16 s2, [r0]
470; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
471; CHECK-FP-NEXT:    vminnm.f16 s0, s2, s0
472; CHECK-FP-NEXT:    vstr.16 s0, [r0]
473; CHECK-FP-NEXT:    bx lr
474;
475; CHECK-NOFP-LABEL: fmin_v4f16_acc:
476; CHECK-NOFP:       @ %bb.0: @ %entry
477; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
478; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
479; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
480; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
481; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
482; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
483; CHECK-NOFP-NEXT:    vminnm.f16 s0, s2, s0
484; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
485; CHECK-NOFP-NEXT:    bx lr
486entry:
487  %y = load half, half* %yy
488  %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
489  %c = fcmp fast olt half %y, %z
490  %r = select i1 %c, half %y, half %z
491  store half %r, half* %yy
492  ret void
493}
494
495define arm_aapcs_vfpcc void @fmin_v2f16_acc(<2 x half> %x, half* %yy) {
496; CHECK-LABEL: fmin_v2f16_acc:
497; CHECK:       @ %bb.0: @ %entry
498; CHECK-NEXT:    vmovx.f16 s4, s0
499; CHECK-NEXT:    vminnm.f16 s0, s0, s4
500; CHECK-NEXT:    vldr.16 s2, [r0]
501; CHECK-NEXT:    vminnm.f16 s0, s2, s0
502; CHECK-NEXT:    vstr.16 s0, [r0]
503; CHECK-NEXT:    bx lr
504entry:
505  %y = load half, half* %yy
506  %z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x)
507  %c = fcmp fast olt half %y, %z
508  %r = select i1 %c, half %y, half %z
509  store half %r, half* %yy
510  ret void
511}
512
513define arm_aapcs_vfpcc void @fmin_v8f16_acc(<8 x half> %x, half* %yy) {
514; CHECK-FP-LABEL: fmin_v8f16_acc:
515; CHECK-FP:       @ %bb.0: @ %entry
516; CHECK-FP-NEXT:    vrev32.16 q1, q0
517; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
518; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
519; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
520; CHECK-FP-NEXT:    vldr.16 s2, [r0]
521; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
522; CHECK-FP-NEXT:    vminnm.f16 s0, s2, s0
523; CHECK-FP-NEXT:    vstr.16 s0, [r0]
524; CHECK-FP-NEXT:    bx lr
525;
526; CHECK-NOFP-LABEL: fmin_v8f16_acc:
527; CHECK-NOFP:       @ %bb.0: @ %entry
528; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
529; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
530; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
531; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
532; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
533; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
534; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
535; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s2
536; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
537; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
538; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s3
539; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
540; CHECK-NOFP-NEXT:    vminnm.f16 s0, s2, s0
541; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
542; CHECK-NOFP-NEXT:    bx lr
543entry:
544  %y = load half, half* %yy
545  %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
546  %c = fcmp fast olt half %y, %z
547  %r = select i1 %c, half %y, half %z
548  store half %r, half* %yy
549  ret void
550}
551
552define arm_aapcs_vfpcc void @fmin_v16f16_acc(<16 x half> %x, half* %yy) {
553; CHECK-FP-LABEL: fmin_v16f16_acc:
554; CHECK-FP:       @ %bb.0: @ %entry
555; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
556; CHECK-FP-NEXT:    vrev32.16 q1, q0
557; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
558; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
559; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
560; CHECK-FP-NEXT:    vldr.16 s2, [r0]
561; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
562; CHECK-FP-NEXT:    vminnm.f16 s0, s2, s0
563; CHECK-FP-NEXT:    vstr.16 s0, [r0]
564; CHECK-FP-NEXT:    bx lr
565;
566; CHECK-NOFP-LABEL: fmin_v16f16_acc:
567; CHECK-NOFP:       @ %bb.0: @ %entry
568; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
569; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
570; CHECK-NOFP-NEXT:    vcmp.f16 s8, s10
571; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
572; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
573; CHECK-NOFP-NEXT:    vcmp.f16 s4, s0
574; CHECK-NOFP-NEXT:    vselgt.f16 s8, s10, s8
575; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
576; CHECK-NOFP-NEXT:    vcmp.f16 s5, s1
577; CHECK-NOFP-NEXT:    vselgt.f16 s10, s0, s4
578; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
579; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
580; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
581; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
582; CHECK-NOFP-NEXT:    vselgt.f16 s10, s1, s5
583; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
584; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
585; CHECK-NOFP-NEXT:    vcmp.f16 s10, s12
586; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
587; CHECK-NOFP-NEXT:    vcmp.f16 s6, s2
588; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
589; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
590; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
591; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
592; CHECK-NOFP-NEXT:    vselgt.f16 s10, s2, s6
593; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
594; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
595; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
596; CHECK-NOFP-NEXT:    vcmp.f16 s10, s12
597; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
598; CHECK-NOFP-NEXT:    vcmp.f16 s7, s3
599; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
600; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
601; CHECK-NOFP-NEXT:    vcmp.f16 s4, s0
602; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
603; CHECK-NOFP-NEXT:    vselgt.f16 s10, s3, s7
604; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
605; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
606; CHECK-NOFP-NEXT:    vselgt.f16 s0, s0, s4
607; CHECK-NOFP-NEXT:    vminnm.f16 s0, s8, s0
608; CHECK-NOFP-NEXT:    vminnm.f16 s0, s2, s0
609; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
610; CHECK-NOFP-NEXT:    bx lr
611entry:
612  %y = load half, half* %yy
613  %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
614  %c = fcmp fast olt half %y, %z
615  %r = select i1 %c, half %y, half %z
616  store half %r, half* %yy
617  ret void
618}
619
620define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) {
621; CHECK-LABEL: fmin_v1f64_acc:
622; CHECK:       @ %bb.0: @ %entry
623; CHECK-NEXT:    vminnm.f64 d0, d1, d0
624; CHECK-NEXT:    bx lr
625entry:
626  %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
627  %c = fcmp fast olt double %y, %z
628  %r = select i1 %c, double %y, double %z
629  ret double %r
630}
631
632define arm_aapcs_vfpcc double @fmin_v2f64_acc(<2 x double> %x, double %y) {
633; CHECK-LABEL: fmin_v2f64_acc:
634; CHECK:       @ %bb.0: @ %entry
635; CHECK-NEXT:    vminnm.f64 d0, d0, d1
636; CHECK-NEXT:    vminnm.f64 d0, d2, d0
637; CHECK-NEXT:    bx lr
638entry:
639  %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
640  %c = fcmp fast olt double %y, %z
641  %r = select i1 %c, double %y, double %z
642  ret double %r
643}
644
645define arm_aapcs_vfpcc double @fmin_v4f64_acc(<4 x double> %x, double %y) {
646; CHECK-LABEL: fmin_v4f64_acc:
647; CHECK:       @ %bb.0: @ %entry
648; CHECK-NEXT:    vcmp.f64 d3, d1
649; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
650; CHECK-NEXT:    vcmp.f64 d2, d0
651; CHECK-NEXT:    vselgt.f64 d5, d1, d3
652; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
653; CHECK-NEXT:    vselgt.f64 d0, d0, d2
654; CHECK-NEXT:    vminnm.f64 d0, d0, d5
655; CHECK-NEXT:    vminnm.f64 d0, d4, d0
656; CHECK-NEXT:    bx lr
657entry:
658  %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
659  %c = fcmp fast olt double %y, %z
660  %r = select i1 %c, double %y, double %z
661  ret double %r
662}
663
664define arm_aapcs_vfpcc float @fmin_v2f32_acc_nofast(<2 x float> %x, float %y) {
665; CHECK-LABEL: fmin_v2f32_acc_nofast:
666; CHECK:       @ %bb.0: @ %entry
667; CHECK-NEXT:    vminnm.f32 s0, s0, s1
668; CHECK-NEXT:    vcmp.f32 s0, s4
669; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
670; CHECK-NEXT:    vselgt.f32 s0, s4, s0
671; CHECK-NEXT:    bx lr
672entry:
673  %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
674  %c = fcmp olt float %y, %z
675  %r = select i1 %c, float %y, float %z
676  ret float %r
677}
678
679define arm_aapcs_vfpcc float @fmin_v4f32_acc_nofast(<4 x float> %x, float %y) {
680; CHECK-FP-LABEL: fmin_v4f32_acc_nofast:
681; CHECK-FP:       @ %bb.0: @ %entry
682; CHECK-FP-NEXT:    vminnm.f32 s6, s2, s3
683; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s1
684; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s6
685; CHECK-FP-NEXT:    vcmp.f32 s0, s4
686; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
687; CHECK-FP-NEXT:    vselgt.f32 s0, s4, s0
688; CHECK-FP-NEXT:    bx lr
689;
690; CHECK-NOFP-LABEL: fmin_v4f32_acc_nofast:
691; CHECK-NOFP:       @ %bb.0: @ %entry
692; CHECK-NOFP-NEXT:    vminnm.f32 s6, s0, s1
693; CHECK-NOFP-NEXT:    vminnm.f32 s6, s6, s2
694; CHECK-NOFP-NEXT:    vminnm.f32 s0, s6, s3
695; CHECK-NOFP-NEXT:    vcmp.f32 s0, s4
696; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
697; CHECK-NOFP-NEXT:    vselgt.f32 s0, s4, s0
698; CHECK-NOFP-NEXT:    bx lr
699entry:
700  %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
701  %c = fcmp olt float %y, %z
702  %r = select i1 %c, float %y, float %z
703  ret float %r
704}
705
706define arm_aapcs_vfpcc float @fmin_v8f32_acc_nofast(<8 x float> %x, float %y) {
707; CHECK-FP-LABEL: fmin_v8f32_acc_nofast:
708; CHECK-FP:       @ %bb.0: @ %entry
709; CHECK-FP-NEXT:    vminnm.f32 q0, q0, q1
710; CHECK-FP-NEXT:    vminnm.f32 s4, s2, s3
711; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s1
712; CHECK-FP-NEXT:    vminnm.f32 s0, s0, s4
713; CHECK-FP-NEXT:    vcmp.f32 s0, s8
714; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
715; CHECK-FP-NEXT:    vselgt.f32 s0, s8, s0
716; CHECK-FP-NEXT:    bx lr
717;
718; CHECK-NOFP-LABEL: fmin_v8f32_acc_nofast:
719; CHECK-NOFP:       @ %bb.0: @ %entry
720; CHECK-NOFP-NEXT:    vminnm.f32 s12, s0, s4
721; CHECK-NOFP-NEXT:    vminnm.f32 s10, s1, s5
722; CHECK-NOFP-NEXT:    vminnm.f32 s10, s12, s10
723; CHECK-NOFP-NEXT:    vminnm.f32 s12, s2, s6
724; CHECK-NOFP-NEXT:    vminnm.f32 s10, s10, s12
725; CHECK-NOFP-NEXT:    vminnm.f32 s0, s3, s7
726; CHECK-NOFP-NEXT:    vminnm.f32 s0, s10, s0
727; CHECK-NOFP-NEXT:    vcmp.f32 s0, s8
728; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
729; CHECK-NOFP-NEXT:    vselgt.f32 s0, s8, s0
730; CHECK-NOFP-NEXT:    bx lr
731entry:
732  %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
733  %c = fcmp olt float %y, %z
734  %r = select i1 %c, float %y, float %z
735  ret float %r
736}
737
738define arm_aapcs_vfpcc void @fmin_v4f16_acc_nofast(<4 x half> %x, half* %yy) {
739; CHECK-FP-LABEL: fmin_v4f16_acc_nofast:
740; CHECK-FP:       @ %bb.0: @ %entry
741; CHECK-FP-NEXT:    vmovx.f16 s4, s1
742; CHECK-FP-NEXT:    vmovx.f16 s6, s0
743; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s6
744; CHECK-FP-NEXT:    vminnm.f16 s4, s1, s4
745; CHECK-FP-NEXT:    vldr.16 s2, [r0]
746; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
747; CHECK-FP-NEXT:    vcmp.f16 s0, s2
748; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
749; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
750; CHECK-FP-NEXT:    vstr.16 s0, [r0]
751; CHECK-FP-NEXT:    bx lr
752;
753; CHECK-NOFP-LABEL: fmin_v4f16_acc_nofast:
754; CHECK-NOFP:       @ %bb.0: @ %entry
755; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
756; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
757; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
758; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
759; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
760; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
761; CHECK-NOFP-NEXT:    vcmp.f16 s0, s2
762; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
763; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
764; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
765; CHECK-NOFP-NEXT:    bx lr
766entry:
767  %y = load half, half* %yy
768  %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
769  %c = fcmp olt half %y, %z
770  %r = select i1 %c, half %y, half %z
771  store half %r, half* %yy
772  ret void
773}
774
775define arm_aapcs_vfpcc void @fmin_v8f16_acc_nofast(<8 x half> %x, half* %yy) {
776; CHECK-FP-LABEL: fmin_v8f16_acc_nofast:
777; CHECK-FP:       @ %bb.0: @ %entry
778; CHECK-FP-NEXT:    vrev32.16 q1, q0
779; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
780; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
781; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
782; CHECK-FP-NEXT:    vldr.16 s2, [r0]
783; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
784; CHECK-FP-NEXT:    vcmp.f16 s0, s2
785; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
786; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
787; CHECK-FP-NEXT:    vstr.16 s0, [r0]
788; CHECK-FP-NEXT:    bx lr
789;
790; CHECK-NOFP-LABEL: fmin_v8f16_acc_nofast:
791; CHECK-NOFP:       @ %bb.0: @ %entry
792; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
793; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
794; CHECK-NOFP-NEXT:    vminnm.f16 s4, s0, s4
795; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
796; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s1
797; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
798; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
799; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s2
800; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
801; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s6
802; CHECK-NOFP-NEXT:    vminnm.f16 s4, s4, s3
803; CHECK-NOFP-NEXT:    vminnm.f16 s0, s4, s0
804; CHECK-NOFP-NEXT:    vcmp.f16 s0, s2
805; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
806; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
807; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
808; CHECK-NOFP-NEXT:    bx lr
809entry:
810  %y = load half, half* %yy
811  %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
812  %c = fcmp olt half %y, %z
813  %r = select i1 %c, half %y, half %z
814  store half %r, half* %yy
815  ret void
816}
817
818define arm_aapcs_vfpcc void @fmin_v16f16_acc_nofast(<16 x half> %x, half* %yy) {
819; CHECK-FP-LABEL: fmin_v16f16_acc_nofast:
820; CHECK-FP:       @ %bb.0: @ %entry
821; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
822; CHECK-FP-NEXT:    vrev32.16 q1, q0
823; CHECK-FP-NEXT:    vminnm.f16 q0, q0, q1
824; CHECK-FP-NEXT:    vminnm.f16 s4, s2, s3
825; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s1
826; CHECK-FP-NEXT:    vldr.16 s2, [r0]
827; CHECK-FP-NEXT:    vminnm.f16 s0, s0, s4
828; CHECK-FP-NEXT:    vcmp.f16 s0, s2
829; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
830; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
831; CHECK-FP-NEXT:    vstr.16 s0, [r0]
832; CHECK-FP-NEXT:    bx lr
833;
834; CHECK-NOFP-LABEL: fmin_v16f16_acc_nofast:
835; CHECK-NOFP:       @ %bb.0: @ %entry
836; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
837; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
838; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
839; CHECK-NOFP-NEXT:    vminnm.f16 s10, s0, s4
840; CHECK-NOFP-NEXT:    vminnm.f16 s8, s10, s8
841; CHECK-NOFP-NEXT:    vminnm.f16 s10, s1, s5
842; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
843; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
844; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
845; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
846; CHECK-NOFP-NEXT:    vminnm.f16 s10, s12, s10
847; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
848; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
849; CHECK-NOFP-NEXT:    vminnm.f16 s10, s2, s6
850; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
851; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
852; CHECK-NOFP-NEXT:    vminnm.f16 s10, s12, s10
853; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
854; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
855; CHECK-NOFP-NEXT:    vminnm.f16 s10, s3, s7
856; CHECK-NOFP-NEXT:    vminnm.f16 s8, s8, s10
857; CHECK-NOFP-NEXT:    vminnm.f16 s0, s0, s4
858; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
859; CHECK-NOFP-NEXT:    vminnm.f16 s0, s8, s0
860; CHECK-NOFP-NEXT:    vcmp.f16 s0, s2
861; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
862; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
863; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
864; CHECK-NOFP-NEXT:    bx lr
865entry:
866  %y = load half, half* %yy
867  %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
868  %c = fcmp olt half %y, %z
869  %r = select i1 %c, half %y, half %z
870  store half %r, half* %yy
871  ret void
872}
873
874define arm_aapcs_vfpcc double @fmin_v1f64_acc_nofast(<1 x double> %x, double %y) {
875; CHECK-LABEL: fmin_v1f64_acc_nofast:
876; CHECK:       @ %bb.0: @ %entry
877; CHECK-NEXT:    vcmp.f64 d0, d1
878; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
879; CHECK-NEXT:    vselgt.f64 d0, d1, d0
880; CHECK-NEXT:    bx lr
881entry:
882  %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
883  %c = fcmp olt double %y, %z
884  %r = select i1 %c, double %y, double %z
885  ret double %r
886}
887
888define arm_aapcs_vfpcc double @fmin_v2f64_acc_nofast(<2 x double> %x, double %y) {
889; CHECK-LABEL: fmin_v2f64_acc_nofast:
890; CHECK:       @ %bb.0: @ %entry
891; CHECK-NEXT:    vminnm.f64 d0, d0, d1
892; CHECK-NEXT:    vcmp.f64 d0, d2
893; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
894; CHECK-NEXT:    vselgt.f64 d0, d2, d0
895; CHECK-NEXT:    bx lr
896entry:
897  %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
898  %c = fcmp olt double %y, %z
899  %r = select i1 %c, double %y, double %z
900  ret double %r
901}
902
903define arm_aapcs_vfpcc double @fmin_v4f64_acc_nofast(<4 x double> %x, double %y) {
904; CHECK-LABEL: fmin_v4f64_acc_nofast:
905; CHECK:       @ %bb.0: @ %entry
906; CHECK-NEXT:    vminnm.f64 d5, d1, d3
907; CHECK-NEXT:    vminnm.f64 d0, d0, d2
908; CHECK-NEXT:    vminnm.f64 d0, d0, d5
909; CHECK-NEXT:    vcmp.f64 d0, d4
910; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
911; CHECK-NEXT:    vselgt.f64 d0, d4, d0
912; CHECK-NEXT:    bx lr
913entry:
914  %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
915  %c = fcmp olt double %y, %z
916  %r = select i1 %c, double %y, double %z
917  ret double %r
918}
919
920define arm_aapcs_vfpcc float @fmax_v2f32(<2 x float> %x) {
921; CHECK-LABEL: fmax_v2f32:
922; CHECK:       @ %bb.0: @ %entry
923; CHECK-NEXT:    vmaxnm.f32 s0, s0, s1
924; CHECK-NEXT:    bx lr
925entry:
926  %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
927  ret float %z
928}
929
930define arm_aapcs_vfpcc float @fmax_v4f32(<4 x float> %x) {
931; CHECK-FP-LABEL: fmax_v4f32:
932; CHECK-FP:       @ %bb.0: @ %entry
933; CHECK-FP-NEXT:    vmaxnm.f32 s4, s2, s3
934; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s1
935; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s4
936; CHECK-FP-NEXT:    bx lr
937;
938; CHECK-NOFP-LABEL: fmax_v4f32:
939; CHECK-NOFP:       @ %bb.0: @ %entry
940; CHECK-NOFP-NEXT:    vmaxnm.f32 s4, s0, s1
941; CHECK-NOFP-NEXT:    vmaxnm.f32 s4, s4, s2
942; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s4, s3
943; CHECK-NOFP-NEXT:    bx lr
944entry:
945  %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
946  ret float %z
947}
948
949define arm_aapcs_vfpcc float @fmax_v8f32(<8 x float> %x) {
950; CHECK-FP-LABEL: fmax_v8f32:
951; CHECK-FP:       @ %bb.0: @ %entry
952; CHECK-FP-NEXT:    vmaxnm.f32 q0, q0, q1
953; CHECK-FP-NEXT:    vmaxnm.f32 s4, s2, s3
954; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s1
955; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s4
956; CHECK-FP-NEXT:    bx lr
957;
958; CHECK-NOFP-LABEL: fmax_v8f32:
959; CHECK-NOFP:       @ %bb.0: @ %entry
960; CHECK-NOFP-NEXT:    vcmp.f32 s1, s5
961; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
962; CHECK-NOFP-NEXT:    vcmp.f32 s0, s4
963; CHECK-NOFP-NEXT:    vselgt.f32 s8, s1, s5
964; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
965; CHECK-NOFP-NEXT:    vcmp.f32 s2, s6
966; CHECK-NOFP-NEXT:    vselgt.f32 s10, s0, s4
967; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
968; CHECK-NOFP-NEXT:    vcmp.f32 s3, s7
969; CHECK-NOFP-NEXT:    vselgt.f32 s12, s2, s6
970; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
971; CHECK-NOFP-NEXT:    vselgt.f32 s0, s3, s7
972; CHECK-NOFP-NEXT:    vmaxnm.f32 s2, s10, s8
973; CHECK-NOFP-NEXT:    vmaxnm.f32 s2, s2, s12
974; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s2, s0
975; CHECK-NOFP-NEXT:    bx lr
976entry:
977  %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
978  ret float %z
979}
980
981define arm_aapcs_vfpcc half @fmax_v4f16(<4 x half> %x) {
982; CHECK-FP-LABEL: fmax_v4f16:
983; CHECK-FP:       @ %bb.0: @ %entry
984; CHECK-FP-NEXT:    vmovx.f16 s4, s1
985; CHECK-FP-NEXT:    vmovx.f16 s6, s0
986; CHECK-FP-NEXT:    vmaxnm.f16 s4, s1, s4
987; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s6
988; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
989; CHECK-FP-NEXT:    bx lr
990;
991; CHECK-NOFP-LABEL: fmax_v4f16:
992; CHECK-NOFP:       @ %bb.0: @ %entry
993; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
994; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
995; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
996; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
997; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
998; CHECK-NOFP-NEXT:    bx lr
999entry:
1000  %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1001  ret half %z
1002}
1003
1004define arm_aapcs_vfpcc half @fmax_v8f16(<8 x half> %x) {
1005; CHECK-FP-LABEL: fmax_v8f16:
1006; CHECK-FP:       @ %bb.0: @ %entry
1007; CHECK-FP-NEXT:    vrev32.16 q1, q0
1008; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1009; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
1010; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
1011; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1012; CHECK-FP-NEXT:    bx lr
1013;
1014; CHECK-NOFP-LABEL: fmax_v8f16:
1015; CHECK-NOFP:       @ %bb.0: @ %entry
1016; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
1017; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
1018; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
1019; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
1020; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
1021; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
1022; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
1023; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s2
1024; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
1025; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s3
1026; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
1027; CHECK-NOFP-NEXT:    bx lr
1028entry:
1029  %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1030  ret half %z
1031}
1032
1033define arm_aapcs_vfpcc half @fmax_v16f16(<16 x half> %x) {
1034; CHECK-FP-LABEL: fmax_v16f16:
1035; CHECK-FP:       @ %bb.0: @ %entry
1036; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1037; CHECK-FP-NEXT:    vrev32.16 q1, q0
1038; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1039; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
1040; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
1041; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1042; CHECK-FP-NEXT:    bx lr
1043;
1044; CHECK-NOFP-LABEL: fmax_v16f16:
1045; CHECK-NOFP:       @ %bb.0: @ %entry
1046; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
1047; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
1048; CHECK-NOFP-NEXT:    vcmp.f16 s10, s8
1049; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
1050; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1051; CHECK-NOFP-NEXT:    vcmp.f16 s0, s4
1052; CHECK-NOFP-NEXT:    vselgt.f16 s8, s10, s8
1053; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1054; CHECK-NOFP-NEXT:    vcmp.f16 s1, s5
1055; CHECK-NOFP-NEXT:    vselgt.f16 s10, s0, s4
1056; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1057; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
1058; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
1059; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
1060; CHECK-NOFP-NEXT:    vselgt.f16 s10, s1, s5
1061; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1062; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
1063; CHECK-NOFP-NEXT:    vcmp.f16 s12, s10
1064; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1065; CHECK-NOFP-NEXT:    vcmp.f16 s2, s6
1066; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
1067; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1068; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1069; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
1070; CHECK-NOFP-NEXT:    vselgt.f16 s10, s2, s6
1071; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1072; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
1073; CHECK-NOFP-NEXT:    vcmp.f16 s12, s10
1074; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1075; CHECK-NOFP-NEXT:    vcmp.f16 s3, s7
1076; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
1077; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1078; CHECK-NOFP-NEXT:    vcmp.f16 s0, s4
1079; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1080; CHECK-NOFP-NEXT:    vselgt.f16 s10, s3, s7
1081; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1082; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1083; CHECK-NOFP-NEXT:    vselgt.f16 s0, s0, s4
1084; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s8, s0
1085; CHECK-NOFP-NEXT:    bx lr
1086entry:
1087  %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1088  ret half %z
1089}
1090
1091define arm_aapcs_vfpcc double @fmax_v1f64(<1 x double> %x) {
1092; CHECK-LABEL: fmax_v1f64:
1093; CHECK:       @ %bb.0: @ %entry
1094; CHECK-NEXT:    bx lr
1095entry:
1096  %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1097  ret double %z
1098}
1099
1100define arm_aapcs_vfpcc double @fmax_v2f64(<2 x double> %x) {
1101; CHECK-LABEL: fmax_v2f64:
1102; CHECK:       @ %bb.0: @ %entry
1103; CHECK-NEXT:    vmaxnm.f64 d0, d0, d1
1104; CHECK-NEXT:    bx lr
1105entry:
1106  %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1107  ret double %z
1108}
1109
1110define arm_aapcs_vfpcc double @fmax_v4f64(<4 x double> %x) {
1111; CHECK-LABEL: fmax_v4f64:
1112; CHECK:       @ %bb.0: @ %entry
1113; CHECK-NEXT:    vcmp.f64 d1, d3
1114; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
1115; CHECK-NEXT:    vcmp.f64 d0, d2
1116; CHECK-NEXT:    vselgt.f64 d4, d1, d3
1117; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
1118; CHECK-NEXT:    vselgt.f64 d0, d0, d2
1119; CHECK-NEXT:    vmaxnm.f64 d0, d0, d4
1120; CHECK-NEXT:    bx lr
1121entry:
1122  %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1123  ret double %z
1124}
1125
1126define arm_aapcs_vfpcc float @fmax_v2f32_nofast(<2 x float> %x) {
1127; CHECK-LABEL: fmax_v2f32_nofast:
1128; CHECK:       @ %bb.0: @ %entry
1129; CHECK-NEXT:    vmaxnm.f32 s0, s0, s1
1130; CHECK-NEXT:    bx lr
1131entry:
1132  %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1133  ret float %z
1134}
1135
1136define arm_aapcs_vfpcc float @fmax_v4f32_nofast(<4 x float> %x) {
1137; CHECK-FP-LABEL: fmax_v4f32_nofast:
1138; CHECK-FP:       @ %bb.0: @ %entry
1139; CHECK-FP-NEXT:    vmaxnm.f32 s4, s2, s3
1140; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s1
1141; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s4
1142; CHECK-FP-NEXT:    bx lr
1143;
1144; CHECK-NOFP-LABEL: fmax_v4f32_nofast:
1145; CHECK-NOFP:       @ %bb.0: @ %entry
1146; CHECK-NOFP-NEXT:    vmaxnm.f32 s4, s0, s1
1147; CHECK-NOFP-NEXT:    vmaxnm.f32 s4, s4, s2
1148; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s4, s3
1149; CHECK-NOFP-NEXT:    bx lr
1150entry:
1151  %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1152  ret float %z
1153}
1154
1155define arm_aapcs_vfpcc float @fmax_v8f32_nofast(<8 x float> %x) {
1156; CHECK-FP-LABEL: fmax_v8f32_nofast:
1157; CHECK-FP:       @ %bb.0: @ %entry
1158; CHECK-FP-NEXT:    vmaxnm.f32 q0, q0, q1
1159; CHECK-FP-NEXT:    vmaxnm.f32 s4, s2, s3
1160; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s1
1161; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s4
1162; CHECK-FP-NEXT:    bx lr
1163;
1164; CHECK-NOFP-LABEL: fmax_v8f32_nofast:
1165; CHECK-NOFP:       @ %bb.0: @ %entry
1166; CHECK-NOFP-NEXT:    vmaxnm.f32 s10, s0, s4
1167; CHECK-NOFP-NEXT:    vmaxnm.f32 s8, s1, s5
1168; CHECK-NOFP-NEXT:    vmaxnm.f32 s8, s10, s8
1169; CHECK-NOFP-NEXT:    vmaxnm.f32 s10, s2, s6
1170; CHECK-NOFP-NEXT:    vmaxnm.f32 s8, s8, s10
1171; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s3, s7
1172; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s8, s0
1173; CHECK-NOFP-NEXT:    bx lr
1174entry:
1175  %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1176  ret float %z
1177}
1178
1179define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) {
1180; CHECK-FP-LABEL: fmax_v4f16_nofast:
1181; CHECK-FP:       @ %bb.0: @ %entry
1182; CHECK-FP-NEXT:    vmovx.f16 s4, s1
1183; CHECK-FP-NEXT:    vmovx.f16 s6, s0
1184; CHECK-FP-NEXT:    vmaxnm.f16 s4, s1, s4
1185; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s6
1186; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1187; CHECK-FP-NEXT:    bx lr
1188;
1189; CHECK-NOFP-LABEL: fmax_v4f16_nofast:
1190; CHECK-NOFP:       @ %bb.0: @ %entry
1191; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
1192; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
1193; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
1194; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
1195; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
1196; CHECK-NOFP-NEXT:    bx lr
1197entry:
1198  %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1199  ret half %z
1200}
1201
1202define arm_aapcs_vfpcc half @fmax_v8f16_nofast(<8 x half> %x) {
1203; CHECK-FP-LABEL: fmax_v8f16_nofast:
1204; CHECK-FP:       @ %bb.0: @ %entry
1205; CHECK-FP-NEXT:    vrev32.16 q1, q0
1206; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1207; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
1208; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
1209; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1210; CHECK-FP-NEXT:    bx lr
1211;
1212; CHECK-NOFP-LABEL: fmax_v8f16_nofast:
1213; CHECK-NOFP:       @ %bb.0: @ %entry
1214; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
1215; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
1216; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
1217; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
1218; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
1219; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
1220; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
1221; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s2
1222; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
1223; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s3
1224; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
1225; CHECK-NOFP-NEXT:    bx lr
1226entry:
1227  %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1228  ret half %z
1229}
1230
1231define arm_aapcs_vfpcc half @fmax_v16f16_nofast(<16 x half> %x) {
1232; CHECK-FP-LABEL: fmax_v16f16_nofast:
1233; CHECK-FP:       @ %bb.0: @ %entry
1234; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1235; CHECK-FP-NEXT:    vrev32.16 q1, q0
1236; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1237; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
1238; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
1239; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1240; CHECK-FP-NEXT:    bx lr
1241;
1242; CHECK-NOFP-LABEL: fmax_v16f16_nofast:
1243; CHECK-NOFP:       @ %bb.0: @ %entry
1244; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
1245; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
1246; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
1247; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s0, s4
1248; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
1249; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s1, s5
1250; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1251; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
1252; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
1253; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
1254; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s12, s10
1255; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
1256; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1257; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s2, s6
1258; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1259; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
1260; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s12, s10
1261; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
1262; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1263; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s3, s7
1264; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1265; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s0, s4
1266; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s8, s0
1267; CHECK-NOFP-NEXT:    bx lr
1268entry:
1269  %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1270  ret half %z
1271}
1272
1273define arm_aapcs_vfpcc double @fmax_v1f64_nofast(<1 x double> %x) {
1274; CHECK-LABEL: fmax_v1f64_nofast:
1275; CHECK:       @ %bb.0: @ %entry
1276; CHECK-NEXT:    bx lr
1277entry:
1278  %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1279  ret double %z
1280}
1281
1282define arm_aapcs_vfpcc double @fmax_v2f64_nofast(<2 x double> %x) {
1283; CHECK-LABEL: fmax_v2f64_nofast:
1284; CHECK:       @ %bb.0: @ %entry
1285; CHECK-NEXT:    vmaxnm.f64 d0, d0, d1
1286; CHECK-NEXT:    bx lr
1287entry:
1288  %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1289  ret double %z
1290}
1291
1292define arm_aapcs_vfpcc double @fmax_v4f64_nofast(<4 x double> %x) {
1293; CHECK-LABEL: fmax_v4f64_nofast:
1294; CHECK:       @ %bb.0: @ %entry
1295; CHECK-NEXT:    vmaxnm.f64 d4, d1, d3
1296; CHECK-NEXT:    vmaxnm.f64 d0, d0, d2
1297; CHECK-NEXT:    vmaxnm.f64 d0, d0, d4
1298; CHECK-NEXT:    bx lr
1299entry:
1300  %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1301  ret double %z
1302}
1303
1304define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) {
1305; CHECK-LABEL: fmax_v2f32_acc:
1306; CHECK:       @ %bb.0: @ %entry
1307; CHECK-NEXT:    vmaxnm.f32 s0, s0, s1
1308; CHECK-NEXT:    vmaxnm.f32 s0, s4, s0
1309; CHECK-NEXT:    bx lr
1310entry:
1311  %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1312  %c = fcmp fast ogt float %y, %z
1313  %r = select i1 %c, float %y, float %z
1314  ret float %r
1315}
1316
1317define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) {
1318; CHECK-FP-LABEL: fmax_v4f32_acc:
1319; CHECK-FP:       @ %bb.0: @ %entry
1320; CHECK-FP-NEXT:    vmaxnm.f32 s6, s2, s3
1321; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s1
1322; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s6
1323; CHECK-FP-NEXT:    vmaxnm.f32 s0, s4, s0
1324; CHECK-FP-NEXT:    bx lr
1325;
1326; CHECK-NOFP-LABEL: fmax_v4f32_acc:
1327; CHECK-NOFP:       @ %bb.0: @ %entry
1328; CHECK-NOFP-NEXT:    vmaxnm.f32 s6, s0, s1
1329; CHECK-NOFP-NEXT:    vmaxnm.f32 s6, s6, s2
1330; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s6, s3
1331; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s4, s0
1332; CHECK-NOFP-NEXT:    bx lr
1333entry:
1334  %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1335  %c = fcmp fast ogt float %y, %z
1336  %r = select i1 %c, float %y, float %z
1337  ret float %r
1338}
1339
1340define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) {
1341; CHECK-FP-LABEL: fmax_v8f32_acc:
1342; CHECK-FP:       @ %bb.0: @ %entry
1343; CHECK-FP-NEXT:    vmaxnm.f32 q0, q0, q1
1344; CHECK-FP-NEXT:    vmaxnm.f32 s4, s2, s3
1345; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s1
1346; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s4
1347; CHECK-FP-NEXT:    vmaxnm.f32 s0, s8, s0
1348; CHECK-FP-NEXT:    bx lr
1349;
1350; CHECK-NOFP-LABEL: fmax_v8f32_acc:
1351; CHECK-NOFP:       @ %bb.0: @ %entry
1352; CHECK-NOFP-NEXT:    vcmp.f32 s1, s5
1353; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1354; CHECK-NOFP-NEXT:    vcmp.f32 s0, s4
1355; CHECK-NOFP-NEXT:    vselgt.f32 s10, s1, s5
1356; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1357; CHECK-NOFP-NEXT:    vcmp.f32 s2, s6
1358; CHECK-NOFP-NEXT:    vselgt.f32 s12, s0, s4
1359; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1360; CHECK-NOFP-NEXT:    vcmp.f32 s3, s7
1361; CHECK-NOFP-NEXT:    vselgt.f32 s14, s2, s6
1362; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1363; CHECK-NOFP-NEXT:    vselgt.f32 s0, s3, s7
1364; CHECK-NOFP-NEXT:    vmaxnm.f32 s2, s12, s10
1365; CHECK-NOFP-NEXT:    vmaxnm.f32 s2, s2, s14
1366; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s2, s0
1367; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s8, s0
1368; CHECK-NOFP-NEXT:    bx lr
1369entry:
1370  %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1371  %c = fcmp fast ogt float %y, %z
1372  %r = select i1 %c, float %y, float %z
1373  ret float %r
1374}
1375
1376define arm_aapcs_vfpcc void @fmax_v2f16_acc(<2 x half> %x, half* %yy) {
1377; CHECK-LABEL: fmax_v2f16_acc:
1378; CHECK:       @ %bb.0: @ %entry
1379; CHECK-NEXT:    vmovx.f16 s4, s0
1380; CHECK-NEXT:    vmaxnm.f16 s0, s0, s4
1381; CHECK-NEXT:    vldr.16 s2, [r0]
1382; CHECK-NEXT:    vmaxnm.f16 s0, s2, s0
1383; CHECK-NEXT:    vstr.16 s0, [r0]
1384; CHECK-NEXT:    bx lr
1385entry:
1386  %y = load half, half* %yy
1387  %z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x)
1388  %c = fcmp fast ogt half %y, %z
1389  %r = select i1 %c, half %y, half %z
1390  store half %r, half* %yy
1391  ret void
1392}
1393
1394define arm_aapcs_vfpcc void @fmax_v4f16_acc(<4 x half> %x, half* %yy) {
1395; CHECK-FP-LABEL: fmax_v4f16_acc:
1396; CHECK-FP:       @ %bb.0: @ %entry
1397; CHECK-FP-NEXT:    vmovx.f16 s4, s1
1398; CHECK-FP-NEXT:    vmovx.f16 s6, s0
1399; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s6
1400; CHECK-FP-NEXT:    vmaxnm.f16 s4, s1, s4
1401; CHECK-FP-NEXT:    vldr.16 s2, [r0]
1402; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1403; CHECK-FP-NEXT:    vmaxnm.f16 s0, s2, s0
1404; CHECK-FP-NEXT:    vstr.16 s0, [r0]
1405; CHECK-FP-NEXT:    bx lr
1406;
1407; CHECK-NOFP-LABEL: fmax_v4f16_acc:
1408; CHECK-NOFP:       @ %bb.0: @ %entry
1409; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
1410; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
1411; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
1412; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
1413; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
1414; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
1415; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s2, s0
1416; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
1417; CHECK-NOFP-NEXT:    bx lr
1418entry:
1419  %y = load half, half* %yy
1420  %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1421  %c = fcmp fast ogt half %y, %z
1422  %r = select i1 %c, half %y, half %z
1423  store half %r, half* %yy
1424  ret void
1425}
1426
1427define arm_aapcs_vfpcc void @fmax_v8f16_acc(<8 x half> %x, half* %yy) {
1428; CHECK-FP-LABEL: fmax_v8f16_acc:
1429; CHECK-FP:       @ %bb.0: @ %entry
1430; CHECK-FP-NEXT:    vrev32.16 q1, q0
1431; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1432; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
1433; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
1434; CHECK-FP-NEXT:    vldr.16 s2, [r0]
1435; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1436; CHECK-FP-NEXT:    vmaxnm.f16 s0, s2, s0
1437; CHECK-FP-NEXT:    vstr.16 s0, [r0]
1438; CHECK-FP-NEXT:    bx lr
1439;
1440; CHECK-NOFP-LABEL: fmax_v8f16_acc:
1441; CHECK-NOFP:       @ %bb.0: @ %entry
1442; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
1443; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
1444; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
1445; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
1446; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
1447; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
1448; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
1449; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s2
1450; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
1451; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
1452; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s3
1453; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
1454; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s2, s0
1455; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
1456; CHECK-NOFP-NEXT:    bx lr
1457entry:
1458  %y = load half, half* %yy
1459  %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1460  %c = fcmp fast ogt half %y, %z
1461  %r = select i1 %c, half %y, half %z
1462  store half %r, half* %yy
1463  ret void
1464}
1465
1466define arm_aapcs_vfpcc void @fmax_v16f16_acc(<16 x half> %x, half* %yy) {
1467; CHECK-FP-LABEL: fmax_v16f16_acc:
1468; CHECK-FP:       @ %bb.0: @ %entry
1469; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1470; CHECK-FP-NEXT:    vrev32.16 q1, q0
1471; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1472; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
1473; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
1474; CHECK-FP-NEXT:    vldr.16 s2, [r0]
1475; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1476; CHECK-FP-NEXT:    vmaxnm.f16 s0, s2, s0
1477; CHECK-FP-NEXT:    vstr.16 s0, [r0]
1478; CHECK-FP-NEXT:    bx lr
1479;
1480; CHECK-NOFP-LABEL: fmax_v16f16_acc:
1481; CHECK-NOFP:       @ %bb.0: @ %entry
1482; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
1483; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
1484; CHECK-NOFP-NEXT:    vcmp.f16 s10, s8
1485; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
1486; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1487; CHECK-NOFP-NEXT:    vcmp.f16 s0, s4
1488; CHECK-NOFP-NEXT:    vselgt.f16 s8, s10, s8
1489; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1490; CHECK-NOFP-NEXT:    vcmp.f16 s1, s5
1491; CHECK-NOFP-NEXT:    vselgt.f16 s10, s0, s4
1492; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1493; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
1494; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
1495; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
1496; CHECK-NOFP-NEXT:    vselgt.f16 s10, s1, s5
1497; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1498; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
1499; CHECK-NOFP-NEXT:    vcmp.f16 s12, s10
1500; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1501; CHECK-NOFP-NEXT:    vcmp.f16 s2, s6
1502; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
1503; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1504; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1505; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
1506; CHECK-NOFP-NEXT:    vselgt.f16 s10, s2, s6
1507; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
1508; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1509; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
1510; CHECK-NOFP-NEXT:    vcmp.f16 s12, s10
1511; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1512; CHECK-NOFP-NEXT:    vcmp.f16 s3, s7
1513; CHECK-NOFP-NEXT:    vselgt.f16 s10, s12, s10
1514; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1515; CHECK-NOFP-NEXT:    vcmp.f16 s0, s4
1516; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1517; CHECK-NOFP-NEXT:    vselgt.f16 s10, s3, s7
1518; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1519; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1520; CHECK-NOFP-NEXT:    vselgt.f16 s0, s0, s4
1521; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s8, s0
1522; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s2, s0
1523; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
1524; CHECK-NOFP-NEXT:    bx lr
1525entry:
1526  %y = load half, half* %yy
1527  %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1528  %c = fcmp fast ogt half %y, %z
1529  %r = select i1 %c, half %y, half %z
1530  store half %r, half* %yy
1531  ret void
1532}
1533
1534define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) {
1535; CHECK-LABEL: fmax_v1f64_acc:
1536; CHECK:       @ %bb.0: @ %entry
1537; CHECK-NEXT:    vmaxnm.f64 d0, d1, d0
1538; CHECK-NEXT:    bx lr
1539entry:
1540  %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1541  %c = fcmp fast ogt double %y, %z
1542  %r = select i1 %c, double %y, double %z
1543  ret double %r
1544}
1545
1546define arm_aapcs_vfpcc double @fmax_v2f64_acc(<2 x double> %x, double %y) {
1547; CHECK-LABEL: fmax_v2f64_acc:
1548; CHECK:       @ %bb.0: @ %entry
1549; CHECK-NEXT:    vmaxnm.f64 d0, d0, d1
1550; CHECK-NEXT:    vmaxnm.f64 d0, d2, d0
1551; CHECK-NEXT:    bx lr
1552entry:
1553  %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1554  %c = fcmp fast ogt double %y, %z
1555  %r = select i1 %c, double %y, double %z
1556  ret double %r
1557}
1558
1559define arm_aapcs_vfpcc double @fmax_v4f64_acc(<4 x double> %x, double %y) {
1560; CHECK-LABEL: fmax_v4f64_acc:
1561; CHECK:       @ %bb.0: @ %entry
1562; CHECK-NEXT:    vcmp.f64 d1, d3
1563; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
1564; CHECK-NEXT:    vcmp.f64 d0, d2
1565; CHECK-NEXT:    vselgt.f64 d5, d1, d3
1566; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
1567; CHECK-NEXT:    vselgt.f64 d0, d0, d2
1568; CHECK-NEXT:    vmaxnm.f64 d0, d0, d5
1569; CHECK-NEXT:    vmaxnm.f64 d0, d4, d0
1570; CHECK-NEXT:    bx lr
1571entry:
1572  %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1573  %c = fcmp fast ogt double %y, %z
1574  %r = select i1 %c, double %y, double %z
1575  ret double %r
1576}
1577
1578define arm_aapcs_vfpcc float @fmax_v2f32_acc_nofast(<2 x float> %x, float %y) {
1579; CHECK-LABEL: fmax_v2f32_acc_nofast:
1580; CHECK:       @ %bb.0: @ %entry
1581; CHECK-NEXT:    vmaxnm.f32 s0, s0, s1
1582; CHECK-NEXT:    vcmp.f32 s4, s0
1583; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
1584; CHECK-NEXT:    vselgt.f32 s0, s4, s0
1585; CHECK-NEXT:    bx lr
1586entry:
1587  %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1588  %c = fcmp ogt float %y, %z
1589  %r = select i1 %c, float %y, float %z
1590  ret float %r
1591}
1592
1593define arm_aapcs_vfpcc float @fmax_v4f32_acc_nofast(<4 x float> %x, float %y) {
1594; CHECK-FP-LABEL: fmax_v4f32_acc_nofast:
1595; CHECK-FP:       @ %bb.0: @ %entry
1596; CHECK-FP-NEXT:    vmaxnm.f32 s6, s2, s3
1597; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s1
1598; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s6
1599; CHECK-FP-NEXT:    vcmp.f32 s4, s0
1600; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
1601; CHECK-FP-NEXT:    vselgt.f32 s0, s4, s0
1602; CHECK-FP-NEXT:    bx lr
1603;
1604; CHECK-NOFP-LABEL: fmax_v4f32_acc_nofast:
1605; CHECK-NOFP:       @ %bb.0: @ %entry
1606; CHECK-NOFP-NEXT:    vmaxnm.f32 s6, s0, s1
1607; CHECK-NOFP-NEXT:    vmaxnm.f32 s6, s6, s2
1608; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s6, s3
1609; CHECK-NOFP-NEXT:    vcmp.f32 s4, s0
1610; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1611; CHECK-NOFP-NEXT:    vselgt.f32 s0, s4, s0
1612; CHECK-NOFP-NEXT:    bx lr
1613entry:
1614  %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1615  %c = fcmp ogt float %y, %z
1616  %r = select i1 %c, float %y, float %z
1617  ret float %r
1618}
1619
1620define arm_aapcs_vfpcc float @fmax_v8f32_acc_nofast(<8 x float> %x, float %y) {
1621; CHECK-FP-LABEL: fmax_v8f32_acc_nofast:
1622; CHECK-FP:       @ %bb.0: @ %entry
1623; CHECK-FP-NEXT:    vmaxnm.f32 q0, q0, q1
1624; CHECK-FP-NEXT:    vmaxnm.f32 s4, s2, s3
1625; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s1
1626; CHECK-FP-NEXT:    vmaxnm.f32 s0, s0, s4
1627; CHECK-FP-NEXT:    vcmp.f32 s8, s0
1628; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
1629; CHECK-FP-NEXT:    vselgt.f32 s0, s8, s0
1630; CHECK-FP-NEXT:    bx lr
1631;
1632; CHECK-NOFP-LABEL: fmax_v8f32_acc_nofast:
1633; CHECK-NOFP:       @ %bb.0: @ %entry
1634; CHECK-NOFP-NEXT:    vmaxnm.f32 s12, s0, s4
1635; CHECK-NOFP-NEXT:    vmaxnm.f32 s10, s1, s5
1636; CHECK-NOFP-NEXT:    vmaxnm.f32 s10, s12, s10
1637; CHECK-NOFP-NEXT:    vmaxnm.f32 s12, s2, s6
1638; CHECK-NOFP-NEXT:    vmaxnm.f32 s10, s10, s12
1639; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s3, s7
1640; CHECK-NOFP-NEXT:    vmaxnm.f32 s0, s10, s0
1641; CHECK-NOFP-NEXT:    vcmp.f32 s8, s0
1642; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1643; CHECK-NOFP-NEXT:    vselgt.f32 s0, s8, s0
1644; CHECK-NOFP-NEXT:    bx lr
1645entry:
1646  %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1647  %c = fcmp ogt float %y, %z
1648  %r = select i1 %c, float %y, float %z
1649  ret float %r
1650}
1651
1652define arm_aapcs_vfpcc void @fmax_v4f16_acc_nofast(<4 x half> %x, half* %yy) {
1653; CHECK-FP-LABEL: fmax_v4f16_acc_nofast:
1654; CHECK-FP:       @ %bb.0: @ %entry
1655; CHECK-FP-NEXT:    vmovx.f16 s4, s1
1656; CHECK-FP-NEXT:    vmovx.f16 s6, s0
1657; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s6
1658; CHECK-FP-NEXT:    vmaxnm.f16 s4, s1, s4
1659; CHECK-FP-NEXT:    vldr.16 s2, [r0]
1660; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1661; CHECK-FP-NEXT:    vcmp.f16 s2, s0
1662; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
1663; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
1664; CHECK-FP-NEXT:    vstr.16 s0, [r0]
1665; CHECK-FP-NEXT:    bx lr
1666;
1667; CHECK-NOFP-LABEL: fmax_v4f16_acc_nofast:
1668; CHECK-NOFP:       @ %bb.0: @ %entry
1669; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
1670; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
1671; CHECK-NOFP-NEXT:    vmovx.f16 s0, s1
1672; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
1673; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
1674; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
1675; CHECK-NOFP-NEXT:    vcmp.f16 s2, s0
1676; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1677; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
1678; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
1679; CHECK-NOFP-NEXT:    bx lr
1680entry:
1681  %y = load half, half* %yy
1682  %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1683  %c = fcmp ogt half %y, %z
1684  %r = select i1 %c, half %y, half %z
1685  store half %r, half* %yy
1686  ret void
1687}
1688
1689define arm_aapcs_vfpcc void @fmax_v8f16_acc_nofast(<8 x half> %x, half* %yy) {
1690; CHECK-FP-LABEL: fmax_v8f16_acc_nofast:
1691; CHECK-FP:       @ %bb.0: @ %entry
1692; CHECK-FP-NEXT:    vrev32.16 q1, q0
1693; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1694; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
1695; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
1696; CHECK-FP-NEXT:    vldr.16 s2, [r0]
1697; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1698; CHECK-FP-NEXT:    vcmp.f16 s2, s0
1699; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
1700; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
1701; CHECK-FP-NEXT:    vstr.16 s0, [r0]
1702; CHECK-FP-NEXT:    bx lr
1703;
1704; CHECK-NOFP-LABEL: fmax_v8f16_acc_nofast:
1705; CHECK-NOFP:       @ %bb.0: @ %entry
1706; CHECK-NOFP-NEXT:    vmovx.f16 s4, s0
1707; CHECK-NOFP-NEXT:    vmovx.f16 s6, s1
1708; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s0, s4
1709; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
1710; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s1
1711; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
1712; CHECK-NOFP-NEXT:    vmovx.f16 s6, s2
1713; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s2
1714; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
1715; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s6
1716; CHECK-NOFP-NEXT:    vmaxnm.f16 s4, s4, s3
1717; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s4, s0
1718; CHECK-NOFP-NEXT:    vcmp.f16 s2, s0
1719; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1720; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
1721; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
1722; CHECK-NOFP-NEXT:    bx lr
1723entry:
1724  %y = load half, half* %yy
1725  %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1726  %c = fcmp ogt half %y, %z
1727  %r = select i1 %c, half %y, half %z
1728  store half %r, half* %yy
1729  ret void
1730}
1731
1732define arm_aapcs_vfpcc void @fmax_v16f16_acc_nofast(<16 x half> %x, half* %yy) {
1733; CHECK-FP-LABEL: fmax_v16f16_acc_nofast:
1734; CHECK-FP:       @ %bb.0: @ %entry
1735; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1736; CHECK-FP-NEXT:    vrev32.16 q1, q0
1737; CHECK-FP-NEXT:    vmaxnm.f16 q0, q0, q1
1738; CHECK-FP-NEXT:    vmaxnm.f16 s4, s2, s3
1739; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s1
1740; CHECK-FP-NEXT:    vldr.16 s2, [r0]
1741; CHECK-FP-NEXT:    vmaxnm.f16 s0, s0, s4
1742; CHECK-FP-NEXT:    vcmp.f16 s2, s0
1743; CHECK-FP-NEXT:    vmrs APSR_nzcv, fpscr
1744; CHECK-FP-NEXT:    vselgt.f16 s0, s2, s0
1745; CHECK-FP-NEXT:    vstr.16 s0, [r0]
1746; CHECK-FP-NEXT:    bx lr
1747;
1748; CHECK-NOFP-LABEL: fmax_v16f16_acc_nofast:
1749; CHECK-NOFP:       @ %bb.0: @ %entry
1750; CHECK-NOFP-NEXT:    vmovx.f16 s8, s4
1751; CHECK-NOFP-NEXT:    vmovx.f16 s10, s0
1752; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
1753; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s0, s4
1754; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s10, s8
1755; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s1, s5
1756; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1757; CHECK-NOFP-NEXT:    vmovx.f16 s10, s5
1758; CHECK-NOFP-NEXT:    vmovx.f16 s12, s1
1759; CHECK-NOFP-NEXT:    vmovx.f16 s4, s7
1760; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s12, s10
1761; CHECK-NOFP-NEXT:    vmovx.f16 s12, s2
1762; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1763; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s2, s6
1764; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1765; CHECK-NOFP-NEXT:    vmovx.f16 s10, s6
1766; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s12, s10
1767; CHECK-NOFP-NEXT:    vmovx.f16 s0, s3
1768; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1769; CHECK-NOFP-NEXT:    vmaxnm.f16 s10, s3, s7
1770; CHECK-NOFP-NEXT:    vmaxnm.f16 s8, s8, s10
1771; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s0, s4
1772; CHECK-NOFP-NEXT:    vldr.16 s2, [r0]
1773; CHECK-NOFP-NEXT:    vmaxnm.f16 s0, s8, s0
1774; CHECK-NOFP-NEXT:    vcmp.f16 s2, s0
1775; CHECK-NOFP-NEXT:    vmrs APSR_nzcv, fpscr
1776; CHECK-NOFP-NEXT:    vselgt.f16 s0, s2, s0
1777; CHECK-NOFP-NEXT:    vstr.16 s0, [r0]
1778; CHECK-NOFP-NEXT:    bx lr
1779entry:
1780  %y = load half, half* %yy
1781  %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1782  %c = fcmp ogt half %y, %z
1783  %r = select i1 %c, half %y, half %z
1784  store half %r, half* %yy
1785  ret void
1786}
1787
1788define arm_aapcs_vfpcc double @fmax_v1f64_acc_nofast(<1 x double> %x, double %y) {
1789; CHECK-LABEL: fmax_v1f64_acc_nofast:
1790; CHECK:       @ %bb.0: @ %entry
1791; CHECK-NEXT:    vcmp.f64 d1, d0
1792; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
1793; CHECK-NEXT:    vselgt.f64 d0, d1, d0
1794; CHECK-NEXT:    bx lr
1795entry:
1796  %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1797  %c = fcmp ogt double %y, %z
1798  %r = select i1 %c, double %y, double %z
1799  ret double %r
1800}
1801
1802define arm_aapcs_vfpcc double @fmax_v2f64_acc_nofast(<2 x double> %x, double %y) {
1803; CHECK-LABEL: fmax_v2f64_acc_nofast:
1804; CHECK:       @ %bb.0: @ %entry
1805; CHECK-NEXT:    vmaxnm.f64 d0, d0, d1
1806; CHECK-NEXT:    vcmp.f64 d2, d0
1807; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
1808; CHECK-NEXT:    vselgt.f64 d0, d2, d0
1809; CHECK-NEXT:    bx lr
1810entry:
1811  %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1812  %c = fcmp ogt double %y, %z
1813  %r = select i1 %c, double %y, double %z
1814  ret double %r
1815}
1816
1817define arm_aapcs_vfpcc double @fmax_v4f64_acc_nofast(<4 x double> %x, double %y) {
1818; CHECK-LABEL: fmax_v4f64_acc_nofast:
1819; CHECK:       @ %bb.0: @ %entry
1820; CHECK-NEXT:    vmaxnm.f64 d5, d1, d3
1821; CHECK-NEXT:    vmaxnm.f64 d0, d0, d2
1822; CHECK-NEXT:    vmaxnm.f64 d0, d0, d5
1823; CHECK-NEXT:    vcmp.f64 d4, d0
1824; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
1825; CHECK-NEXT:    vselgt.f64 d0, d4, d0
1826; CHECK-NEXT:    bx lr
1827entry:
1828  %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1829  %c = fcmp ogt double %y, %z
1830  %r = select i1 %c, double %y, double %z
1831  ret double %r
1832}
1833
1834declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
1835declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
1836declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
1837declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
1838declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
1839declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
1840declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
1841declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
1842declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
1843declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
1844declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
1845declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
1846declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
1847declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
1848declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
1849declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
1850declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
1851declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
1852declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
1853declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
1854