1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
8
9;
10; vXf32
11;
12
13define float @test_v1f32(<1 x float> %a0) {
14; ALL-LABEL: test_v1f32:
15; ALL:       # %bb.0:
16; ALL-NEXT:    retq
17  %1 = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a0)
18  ret float %1
19}
20
21define float @test_v2f32(<2 x float> %a0) {
22; SSE2-LABEL: test_v2f32:
23; SSE2:       # %bb.0:
24; SSE2-NEXT:    movaps %xmm0, %xmm1
25; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
26; SSE2-NEXT:    minss %xmm1, %xmm0
27; SSE2-NEXT:    retq
28;
29; SSE41-LABEL: test_v2f32:
30; SSE41:       # %bb.0:
31; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
32; SSE41-NEXT:    minss %xmm1, %xmm0
33; SSE41-NEXT:    retq
34;
35; AVX-LABEL: test_v2f32:
36; AVX:       # %bb.0:
37; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
38; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
39; AVX-NEXT:    retq
40;
41; AVX512-LABEL: test_v2f32:
42; AVX512:       # %bb.0:
43; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
44; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
45; AVX512-NEXT:    retq
46  %1 = call nnan float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0)
47  ret float %1
48}
49
50define float @test_v3f32(<3 x float> %a0) {
51; SSE2-LABEL: test_v3f32:
52; SSE2:       # %bb.0:
53; SSE2-NEXT:    movaps %xmm0, %xmm2
54; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
55; SSE2-NEXT:    movaps %xmm0, %xmm1
56; SSE2-NEXT:    minss %xmm2, %xmm1
57; SSE2-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
58; SSE2-NEXT:    minss %xmm0, %xmm1
59; SSE2-NEXT:    movaps %xmm1, %xmm0
60; SSE2-NEXT:    retq
61;
62; SSE41-LABEL: test_v3f32:
63; SSE41:       # %bb.0:
64; SSE41-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
65; SSE41-NEXT:    movaps %xmm0, %xmm1
66; SSE41-NEXT:    minss %xmm2, %xmm1
67; SSE41-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
68; SSE41-NEXT:    minss %xmm0, %xmm1
69; SSE41-NEXT:    movaps %xmm1, %xmm0
70; SSE41-NEXT:    retq
71;
72; AVX-LABEL: test_v3f32:
73; AVX:       # %bb.0:
74; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
75; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm1
76; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
77; AVX-NEXT:    vminss %xmm0, %xmm1, %xmm0
78; AVX-NEXT:    retq
79;
80; AVX512-LABEL: test_v3f32:
81; AVX512:       # %bb.0:
82; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
83; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm1
84; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
85; AVX512-NEXT:    vminss %xmm0, %xmm1, %xmm0
86; AVX512-NEXT:    retq
87  %1 = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a0)
88  ret float %1
89}
90
91define float @test_v4f32(<4 x float> %a0) {
92; SSE2-LABEL: test_v4f32:
93; SSE2:       # %bb.0:
94; SSE2-NEXT:    movaps %xmm0, %xmm1
95; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
96; SSE2-NEXT:    movaps %xmm0, %xmm2
97; SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
98; SSE2-NEXT:    movaps %xmm0, %xmm3
99; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
100; SSE2-NEXT:    minss %xmm3, %xmm0
101; SSE2-NEXT:    minss %xmm2, %xmm0
102; SSE2-NEXT:    minss %xmm1, %xmm0
103; SSE2-NEXT:    retq
104;
105; SSE41-LABEL: test_v4f32:
106; SSE41:       # %bb.0:
107; SSE41-NEXT:    movaps %xmm0, %xmm1
108; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
109; SSE41-NEXT:    movaps %xmm0, %xmm2
110; SSE41-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
111; SSE41-NEXT:    movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
112; SSE41-NEXT:    minss %xmm3, %xmm0
113; SSE41-NEXT:    minss %xmm2, %xmm0
114; SSE41-NEXT:    minss %xmm1, %xmm0
115; SSE41-NEXT:    retq
116;
117; AVX-LABEL: test_v4f32:
118; AVX:       # %bb.0:
119; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
120; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
121; AVX-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
122; AVX-NEXT:    vminss %xmm3, %xmm0, %xmm0
123; AVX-NEXT:    vminss %xmm2, %xmm0, %xmm0
124; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
125; AVX-NEXT:    retq
126;
127; AVX512-LABEL: test_v4f32:
128; AVX512:       # %bb.0:
129; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3]
130; AVX512-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
131; AVX512-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
132; AVX512-NEXT:    vminss %xmm3, %xmm0, %xmm0
133; AVX512-NEXT:    vminss %xmm2, %xmm0, %xmm0
134; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
135; AVX512-NEXT:    retq
136  %1 = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0)
137  ret float %1
138}
139
140define float @test_v8f32(<8 x float> %a0) {
141; SSE2-LABEL: test_v8f32:
142; SSE2:       # %bb.0:
143; SSE2-NEXT:    minps %xmm1, %xmm0
144; SSE2-NEXT:    movaps %xmm0, %xmm2
145; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
146; SSE2-NEXT:    movaps %xmm0, %xmm1
147; SSE2-NEXT:    minss %xmm2, %xmm1
148; SSE2-NEXT:    movaps %xmm0, %xmm2
149; SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
150; SSE2-NEXT:    minss %xmm2, %xmm1
151; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
152; SSE2-NEXT:    minss %xmm0, %xmm1
153; SSE2-NEXT:    movaps %xmm1, %xmm0
154; SSE2-NEXT:    retq
155;
156; SSE41-LABEL: test_v8f32:
157; SSE41:       # %bb.0:
158; SSE41-NEXT:    minps %xmm1, %xmm0
159; SSE41-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
160; SSE41-NEXT:    movaps %xmm0, %xmm1
161; SSE41-NEXT:    minss %xmm2, %xmm1
162; SSE41-NEXT:    movaps %xmm0, %xmm2
163; SSE41-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
164; SSE41-NEXT:    minss %xmm2, %xmm1
165; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
166; SSE41-NEXT:    minss %xmm0, %xmm1
167; SSE41-NEXT:    movaps %xmm1, %xmm0
168; SSE41-NEXT:    retq
169;
170; AVX-LABEL: test_v8f32:
171; AVX:       # %bb.0:
172; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
173; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
174; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
175; AVX-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
176; AVX-NEXT:    vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3]
177; AVX-NEXT:    vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
178; AVX-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3]
179; AVX-NEXT:    vminss %xmm7, %xmm0, %xmm0
180; AVX-NEXT:    vminss %xmm6, %xmm0, %xmm0
181; AVX-NEXT:    vminss %xmm5, %xmm0, %xmm0
182; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
183; AVX-NEXT:    vminss %xmm4, %xmm0, %xmm0
184; AVX-NEXT:    vminss %xmm3, %xmm0, %xmm0
185; AVX-NEXT:    vminss %xmm2, %xmm0, %xmm0
186; AVX-NEXT:    vzeroupper
187; AVX-NEXT:    retq
188;
189; AVX512-LABEL: test_v8f32:
190; AVX512:       # %bb.0:
191; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
192; AVX512-NEXT:    vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
193; AVX512-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
194; AVX512-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
195; AVX512-NEXT:    vpermilps {{.*#+}} xmm5 = xmm0[3,3,3,3]
196; AVX512-NEXT:    vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
197; AVX512-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm0[1,1,3,3]
198; AVX512-NEXT:    vminss %xmm7, %xmm0, %xmm0
199; AVX512-NEXT:    vminss %xmm6, %xmm0, %xmm0
200; AVX512-NEXT:    vminss %xmm5, %xmm0, %xmm0
201; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
202; AVX512-NEXT:    vminss %xmm4, %xmm0, %xmm0
203; AVX512-NEXT:    vminss %xmm3, %xmm0, %xmm0
204; AVX512-NEXT:    vminss %xmm2, %xmm0, %xmm0
205; AVX512-NEXT:    vzeroupper
206; AVX512-NEXT:    retq
207  %1 = call nnan float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0)
208  ret float %1
209}
210
211define float @test_v16f32(<16 x float> %a0) {
212; SSE2-LABEL: test_v16f32:
213; SSE2:       # %bb.0:
214; SSE2-NEXT:    minps %xmm3, %xmm1
215; SSE2-NEXT:    minps %xmm2, %xmm0
216; SSE2-NEXT:    minps %xmm1, %xmm0
217; SSE2-NEXT:    movaps %xmm0, %xmm2
218; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
219; SSE2-NEXT:    movaps %xmm0, %xmm1
220; SSE2-NEXT:    minss %xmm2, %xmm1
221; SSE2-NEXT:    movaps %xmm0, %xmm2
222; SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
223; SSE2-NEXT:    minss %xmm2, %xmm1
224; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
225; SSE2-NEXT:    minss %xmm0, %xmm1
226; SSE2-NEXT:    movaps %xmm1, %xmm0
227; SSE2-NEXT:    retq
228;
229; SSE41-LABEL: test_v16f32:
230; SSE41:       # %bb.0:
231; SSE41-NEXT:    minps %xmm3, %xmm1
232; SSE41-NEXT:    minps %xmm2, %xmm0
233; SSE41-NEXT:    minps %xmm1, %xmm0
234; SSE41-NEXT:    movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
235; SSE41-NEXT:    movaps %xmm0, %xmm1
236; SSE41-NEXT:    minss %xmm2, %xmm1
237; SSE41-NEXT:    movaps %xmm0, %xmm2
238; SSE41-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
239; SSE41-NEXT:    minss %xmm2, %xmm1
240; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
241; SSE41-NEXT:    minss %xmm0, %xmm1
242; SSE41-NEXT:    movaps %xmm1, %xmm0
243; SSE41-NEXT:    retq
244;
245; AVX-LABEL: test_v16f32:
246; AVX:       # %bb.0:
247; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
248; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
249; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm1
250; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
251; AVX-NEXT:    vminss %xmm2, %xmm1, %xmm1
252; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
253; AVX-NEXT:    vminss %xmm2, %xmm1, %xmm1
254; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
255; AVX-NEXT:    vminss %xmm0, %xmm1, %xmm1
256; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
257; AVX-NEXT:    vminss %xmm2, %xmm1, %xmm1
258; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
259; AVX-NEXT:    vminss %xmm2, %xmm1, %xmm1
260; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
261; AVX-NEXT:    vminss %xmm0, %xmm1, %xmm0
262; AVX-NEXT:    vzeroupper
263; AVX-NEXT:    retq
264;
265; AVX512-LABEL: test_v16f32:
266; AVX512:       # %bb.0:
267; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
268; AVX512-NEXT:    vpermilps {{.*#+}} xmm8 = xmm1[3,3,3,3]
269; AVX512-NEXT:    vpermilpd {{.*#+}} xmm9 = xmm1[1,0]
270; AVX512-NEXT:    vmovshdup {{.*#+}} xmm10 = xmm1[1,1,3,3]
271; AVX512-NEXT:    vextractf32x4 $2, %zmm0, %xmm5
272; AVX512-NEXT:    vpermilps {{.*#+}} xmm11 = xmm5[3,3,3,3]
273; AVX512-NEXT:    vpermilpd {{.*#+}} xmm12 = xmm5[1,0]
274; AVX512-NEXT:    vmovshdup {{.*#+}} xmm13 = xmm5[1,1,3,3]
275; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm3
276; AVX512-NEXT:    vpermilps {{.*#+}} xmm14 = xmm3[3,3,3,3]
277; AVX512-NEXT:    vpermilpd {{.*#+}} xmm15 = xmm3[1,0]
278; AVX512-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3]
279; AVX512-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
280; AVX512-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
281; AVX512-NEXT:    vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3]
282; AVX512-NEXT:    vminss %xmm6, %xmm0, %xmm0
283; AVX512-NEXT:    vminss %xmm4, %xmm0, %xmm0
284; AVX512-NEXT:    vminss %xmm2, %xmm0, %xmm0
285; AVX512-NEXT:    vminss %xmm3, %xmm0, %xmm0
286; AVX512-NEXT:    vminss %xmm7, %xmm0, %xmm0
287; AVX512-NEXT:    vminss %xmm15, %xmm0, %xmm0
288; AVX512-NEXT:    vminss %xmm14, %xmm0, %xmm0
289; AVX512-NEXT:    vminss %xmm5, %xmm0, %xmm0
290; AVX512-NEXT:    vminss %xmm13, %xmm0, %xmm0
291; AVX512-NEXT:    vminss %xmm12, %xmm0, %xmm0
292; AVX512-NEXT:    vminss %xmm11, %xmm0, %xmm0
293; AVX512-NEXT:    vminss %xmm1, %xmm0, %xmm0
294; AVX512-NEXT:    vminss %xmm10, %xmm0, %xmm0
295; AVX512-NEXT:    vminss %xmm9, %xmm0, %xmm0
296; AVX512-NEXT:    vminss %xmm8, %xmm0, %xmm0
297; AVX512-NEXT:    vzeroupper
298; AVX512-NEXT:    retq
299  %1 = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0)
300  ret float %1
301}
302
303;
304; vXf64
305;
306
307define double @test_v2f64(<2 x double> %a0) {
308; SSE-LABEL: test_v2f64:
309; SSE:       # %bb.0:
310; SSE-NEXT:    movapd %xmm0, %xmm1
311; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
312; SSE-NEXT:    minsd %xmm1, %xmm0
313; SSE-NEXT:    retq
314;
315; AVX-LABEL: test_v2f64:
316; AVX:       # %bb.0:
317; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
318; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
319; AVX-NEXT:    retq
320;
321; AVX512-LABEL: test_v2f64:
322; AVX512:       # %bb.0:
323; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
324; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
325; AVX512-NEXT:    retq
326  %1 = call nnan double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0)
327  ret double %1
328}
329
330define double @test_v4f64(<4 x double> %a0) {
331; SSE-LABEL: test_v4f64:
332; SSE:       # %bb.0:
333; SSE-NEXT:    minpd %xmm1, %xmm0
334; SSE-NEXT:    movapd %xmm0, %xmm1
335; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
336; SSE-NEXT:    minsd %xmm1, %xmm0
337; SSE-NEXT:    retq
338;
339; AVX-LABEL: test_v4f64:
340; AVX:       # %bb.0:
341; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
342; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
343; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
344; AVX-NEXT:    vminsd %xmm3, %xmm0, %xmm0
345; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
346; AVX-NEXT:    vminsd %xmm2, %xmm0, %xmm0
347; AVX-NEXT:    vzeroupper
348; AVX-NEXT:    retq
349;
350; AVX512-LABEL: test_v4f64:
351; AVX512:       # %bb.0:
352; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
353; AVX512-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
354; AVX512-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
355; AVX512-NEXT:    vminsd %xmm3, %xmm0, %xmm0
356; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
357; AVX512-NEXT:    vminsd %xmm2, %xmm0, %xmm0
358; AVX512-NEXT:    vzeroupper
359; AVX512-NEXT:    retq
360  %1 = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0)
361  ret double %1
362}
363
364define double @test_v8f64(<8 x double> %a0) {
365; SSE-LABEL: test_v8f64:
366; SSE:       # %bb.0:
367; SSE-NEXT:    minpd %xmm3, %xmm1
368; SSE-NEXT:    minpd %xmm2, %xmm0
369; SSE-NEXT:    minpd %xmm1, %xmm0
370; SSE-NEXT:    movapd %xmm0, %xmm1
371; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
372; SSE-NEXT:    minsd %xmm1, %xmm0
373; SSE-NEXT:    retq
374;
375; AVX-LABEL: test_v8f64:
376; AVX:       # %bb.0:
377; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
378; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
379; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm1
380; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
381; AVX-NEXT:    vminsd %xmm0, %xmm1, %xmm1
382; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
383; AVX-NEXT:    vminsd %xmm0, %xmm1, %xmm0
384; AVX-NEXT:    vzeroupper
385; AVX-NEXT:    retq
386;
387; AVX512-LABEL: test_v8f64:
388; AVX512:       # %bb.0:
389; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
390; AVX512-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
391; AVX512-NEXT:    vextractf32x4 $2, %zmm0, %xmm3
392; AVX512-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
393; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm5
394; AVX512-NEXT:    vpermilpd {{.*#+}} xmm6 = xmm5[1,0]
395; AVX512-NEXT:    vpermilpd {{.*#+}} xmm7 = xmm0[1,0]
396; AVX512-NEXT:    vminsd %xmm7, %xmm0, %xmm0
397; AVX512-NEXT:    vminsd %xmm5, %xmm0, %xmm0
398; AVX512-NEXT:    vminsd %xmm6, %xmm0, %xmm0
399; AVX512-NEXT:    vminsd %xmm3, %xmm0, %xmm0
400; AVX512-NEXT:    vminsd %xmm4, %xmm0, %xmm0
401; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm0
402; AVX512-NEXT:    vminsd %xmm2, %xmm0, %xmm0
403; AVX512-NEXT:    vzeroupper
404; AVX512-NEXT:    retq
405  %1 = call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0)
406  ret double %1
407}
408
409define double @test_v16f64(<16 x double> %a0) {
410; SSE-LABEL: test_v16f64:
411; SSE:       # %bb.0:
412; SSE-NEXT:    minpd %xmm7, %xmm3
413; SSE-NEXT:    minpd %xmm5, %xmm1
414; SSE-NEXT:    minpd %xmm3, %xmm1
415; SSE-NEXT:    minpd %xmm6, %xmm2
416; SSE-NEXT:    minpd %xmm4, %xmm0
417; SSE-NEXT:    minpd %xmm2, %xmm0
418; SSE-NEXT:    minpd %xmm1, %xmm0
419; SSE-NEXT:    movapd %xmm0, %xmm1
420; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
421; SSE-NEXT:    minsd %xmm1, %xmm0
422; SSE-NEXT:    retq
423;
424; AVX-LABEL: test_v16f64:
425; AVX:       # %bb.0:
426; AVX-NEXT:    vminpd %ymm3, %ymm1, %ymm1
427; AVX-NEXT:    vminpd %ymm2, %ymm0, %ymm0
428; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
429; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
430; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm1
431; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
432; AVX-NEXT:    vminsd %xmm0, %xmm1, %xmm1
433; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
434; AVX-NEXT:    vminsd %xmm0, %xmm1, %xmm0
435; AVX-NEXT:    vzeroupper
436; AVX-NEXT:    retq
437;
438; AVX512-LABEL: test_v16f64:
439; AVX512:       # %bb.0:
440; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
441; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
442; AVX512-NEXT:    vminsd %xmm1, %xmm0, %xmm1
443; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm2
444; AVX512-NEXT:    vminsd %xmm2, %xmm1, %xmm1
445; AVX512-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
446; AVX512-NEXT:    vminsd %xmm2, %xmm1, %xmm1
447; AVX512-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
448; AVX512-NEXT:    vminsd %xmm2, %xmm1, %xmm1
449; AVX512-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
450; AVX512-NEXT:    vminsd %xmm2, %xmm1, %xmm1
451; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
452; AVX512-NEXT:    vminsd %xmm0, %xmm1, %xmm1
453; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
454; AVX512-NEXT:    vminsd %xmm0, %xmm1, %xmm0
455; AVX512-NEXT:    vzeroupper
456; AVX512-NEXT:    retq
457  %1 = call nnan double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0)
458  ret double %1
459}
460
461define half @test_v2f16(<2 x half> %a0) nounwind {
462; SSE-LABEL: test_v2f16:
463; SSE:       # %bb.0:
464; SSE-NEXT:    pushq %rbx
465; SSE-NEXT:    subq $16, %rsp
466; SSE-NEXT:    movl %edi, %ebx
467; SSE-NEXT:    movzwl %si, %edi
468; SSE-NEXT:    callq __gnu_h2f_ieee
469; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
470; SSE-NEXT:    movzwl %bx, %edi
471; SSE-NEXT:    callq __gnu_h2f_ieee
472; SSE-NEXT:    movaps %xmm0, %xmm1
473; SSE-NEXT:    cmpunordss %xmm0, %xmm1
474; SSE-NEXT:    movaps %xmm1, %xmm2
475; SSE-NEXT:    movaps (%rsp), %xmm3 # 16-byte Reload
476; SSE-NEXT:    andps %xmm3, %xmm2
477; SSE-NEXT:    minss %xmm0, %xmm3
478; SSE-NEXT:    andnps %xmm3, %xmm1
479; SSE-NEXT:    orps %xmm2, %xmm1
480; SSE-NEXT:    movaps %xmm1, %xmm0
481; SSE-NEXT:    callq __gnu_f2h_ieee
482; SSE-NEXT:    addq $16, %rsp
483; SSE-NEXT:    popq %rbx
484; SSE-NEXT:    retq
485;
486; AVX-LABEL: test_v2f16:
487; AVX:       # %bb.0:
488; AVX-NEXT:    pushq %rbx
489; AVX-NEXT:    subq $16, %rsp
490; AVX-NEXT:    movl %esi, %ebx
491; AVX-NEXT:    movzwl %di, %edi
492; AVX-NEXT:    callq __gnu_h2f_ieee
493; AVX-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
494; AVX-NEXT:    movzwl %bx, %edi
495; AVX-NEXT:    callq __gnu_h2f_ieee
496; AVX-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
497; AVX-NEXT:    # xmm2 = mem[0],zero,zero,zero
498; AVX-NEXT:    vminss %xmm2, %xmm0, %xmm1
499; AVX-NEXT:    vcmpunordss %xmm2, %xmm2, %xmm2
500; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
501; AVX-NEXT:    callq __gnu_f2h_ieee
502; AVX-NEXT:    addq $16, %rsp
503; AVX-NEXT:    popq %rbx
504; AVX-NEXT:    retq
505;
506; AVX512-LABEL: test_v2f16:
507; AVX512:       # %bb.0:
508; AVX512-NEXT:    movzwl %di, %eax
509; AVX512-NEXT:    vmovd %eax, %xmm0
510; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
511; AVX512-NEXT:    movzwl %si, %eax
512; AVX512-NEXT:    vmovd %eax, %xmm1
513; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm1
514; AVX512-NEXT:    vminss %xmm0, %xmm1, %xmm2
515; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
516; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
517; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm0
518; AVX512-NEXT:    vmovd %xmm0, %eax
519; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
520; AVX512-NEXT:    retq
521  %1 = call nnan half @llvm.vector.reduce.fmin.v2f16(<2 x half> %a0)
522  ret half %1
523}
524
525declare float @llvm.vector.reduce.fmin.v1f32(<1 x float>)
526declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
527declare float @llvm.vector.reduce.fmin.v3f32(<3 x float>)
528declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
529declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
530declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>)
531
532declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
533declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
534declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
535declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>)
536
537declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
538