1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2     | FileCheck %s --check-prefix=X86-SSE2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2   | FileCheck %s --check-prefix=X86-SSE42
4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx      | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2     | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2   | FileCheck %s --check-prefix=X64-SSE2
7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42
8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx    | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2   | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512
11
12;
13; 128-bit Vectors
14;
15
16define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17; X86-SSE2-LABEL: test_reduce_v2i64:
18; X86-SSE2:       ## %bb.0:
19; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
20; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
21; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
22; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
23; X86-SSE2-NEXT:    pxor %xmm1, %xmm2
24; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
25; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
26; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
28; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29; X86-SSE2-NEXT:    pand %xmm5, %xmm2
30; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31; X86-SSE2-NEXT:    por %xmm2, %xmm3
32; X86-SSE2-NEXT:    pand %xmm3, %xmm0
33; X86-SSE2-NEXT:    pandn %xmm1, %xmm3
34; X86-SSE2-NEXT:    por %xmm0, %xmm3
35; X86-SSE2-NEXT:    movd %xmm3, %eax
36; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
37; X86-SSE2-NEXT:    movd %xmm0, %edx
38; X86-SSE2-NEXT:    retl
39;
40; X86-SSE42-LABEL: test_reduce_v2i64:
41; X86-SSE42:       ## %bb.0:
42; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
43; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
44; X86-SSE42-NEXT:    movdqa %xmm2, %xmm0
45; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
46; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
47; X86-SSE42-NEXT:    movd %xmm2, %eax
48; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
49; X86-SSE42-NEXT:    retl
50;
51; X86-AVX-LABEL: test_reduce_v2i64:
52; X86-AVX:       ## %bb.0:
53; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
54; X86-AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
55; X86-AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
56; X86-AVX-NEXT:    vmovd %xmm0, %eax
57; X86-AVX-NEXT:    vpextrd $1, %xmm0, %edx
58; X86-AVX-NEXT:    retl
59;
60; X64-SSE2-LABEL: test_reduce_v2i64:
61; X64-SSE2:       ## %bb.0:
62; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
63; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
64; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
65; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
66; X64-SSE2-NEXT:    pxor %xmm1, %xmm2
67; X64-SSE2-NEXT:    movdqa %xmm2, %xmm4
68; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
69; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
70; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
71; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
72; X64-SSE2-NEXT:    pand %xmm5, %xmm2
73; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
74; X64-SSE2-NEXT:    por %xmm2, %xmm3
75; X64-SSE2-NEXT:    pand %xmm3, %xmm0
76; X64-SSE2-NEXT:    pandn %xmm1, %xmm3
77; X64-SSE2-NEXT:    por %xmm0, %xmm3
78; X64-SSE2-NEXT:    movq %xmm3, %rax
79; X64-SSE2-NEXT:    retq
80;
81; X64-SSE42-LABEL: test_reduce_v2i64:
82; X64-SSE42:       ## %bb.0:
83; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
84; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
85; X64-SSE42-NEXT:    movdqa %xmm2, %xmm0
86; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
87; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
88; X64-SSE42-NEXT:    movq %xmm2, %rax
89; X64-SSE42-NEXT:    retq
90;
91; X64-AVX1-LABEL: test_reduce_v2i64:
92; X64-AVX1:       ## %bb.0:
93; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
94; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
95; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
96; X64-AVX1-NEXT:    vmovq %xmm0, %rax
97; X64-AVX1-NEXT:    retq
98;
99; X64-AVX2-LABEL: test_reduce_v2i64:
100; X64-AVX2:       ## %bb.0:
101; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
102; X64-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
103; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
104; X64-AVX2-NEXT:    vmovq %xmm0, %rax
105; X64-AVX2-NEXT:    retq
106;
107; X64-AVX512-LABEL: test_reduce_v2i64:
108; X64-AVX512:       ## %bb.0:
109; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
110; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
111; X64-AVX512-NEXT:    vmovq %xmm0, %rax
112; X64-AVX512-NEXT:    retq
113  %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
114  %2 = icmp slt <2 x i64> %a0, %1
115  %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
116  %4 = extractelement <2 x i64> %3, i32 0
117  ret i64 %4
118}
119
120define i32 @test_reduce_v4i32(<4 x i32> %a0) {
121; X86-SSE2-LABEL: test_reduce_v4i32:
122; X86-SSE2:       ## %bb.0:
123; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
124; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
125; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
126; X86-SSE2-NEXT:    pand %xmm2, %xmm0
127; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
128; X86-SSE2-NEXT:    por %xmm0, %xmm2
129; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
130; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
131; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
132; X86-SSE2-NEXT:    pand %xmm1, %xmm2
133; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
134; X86-SSE2-NEXT:    por %xmm2, %xmm1
135; X86-SSE2-NEXT:    movd %xmm1, %eax
136; X86-SSE2-NEXT:    retl
137;
138; X86-SSE42-LABEL: test_reduce_v4i32:
139; X86-SSE42:       ## %bb.0:
140; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
141; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
142; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
143; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
144; X86-SSE42-NEXT:    movd %xmm0, %eax
145; X86-SSE42-NEXT:    retl
146;
147; X86-AVX-LABEL: test_reduce_v4i32:
148; X86-AVX:       ## %bb.0:
149; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
150; X86-AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
151; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
152; X86-AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
153; X86-AVX-NEXT:    vmovd %xmm0, %eax
154; X86-AVX-NEXT:    retl
155;
156; X64-SSE2-LABEL: test_reduce_v4i32:
157; X64-SSE2:       ## %bb.0:
158; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
159; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
160; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
161; X64-SSE2-NEXT:    pand %xmm2, %xmm0
162; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
163; X64-SSE2-NEXT:    por %xmm0, %xmm2
164; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
165; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
166; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
167; X64-SSE2-NEXT:    pand %xmm1, %xmm2
168; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
169; X64-SSE2-NEXT:    por %xmm2, %xmm1
170; X64-SSE2-NEXT:    movd %xmm1, %eax
171; X64-SSE2-NEXT:    retq
172;
173; X64-SSE42-LABEL: test_reduce_v4i32:
174; X64-SSE42:       ## %bb.0:
175; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
176; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
177; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
178; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
179; X64-SSE42-NEXT:    movd %xmm0, %eax
180; X64-SSE42-NEXT:    retq
181;
182; X64-AVX-LABEL: test_reduce_v4i32:
183; X64-AVX:       ## %bb.0:
184; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
185; X64-AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
186; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
187; X64-AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
188; X64-AVX-NEXT:    vmovd %xmm0, %eax
189; X64-AVX-NEXT:    retq
190  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
191  %2 = icmp slt <4 x i32> %a0, %1
192  %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
193  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
194  %5 = icmp slt <4 x i32> %3, %4
195  %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
196  %7 = extractelement <4 x i32> %6, i32 0
197  ret i32 %7
198}
199
200define i16 @test_reduce_v8i16(<8 x i16> %a0) {
201; X86-SSE2-LABEL: test_reduce_v8i16:
202; X86-SSE2:       ## %bb.0:
203; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
204; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
205; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
206; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
207; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
208; X86-SSE2-NEXT:    psrld $16, %xmm1
209; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
210; X86-SSE2-NEXT:    movd %xmm1, %eax
211; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
212; X86-SSE2-NEXT:    retl
213;
214; X86-SSE42-LABEL: test_reduce_v8i16:
215; X86-SSE42:       ## %bb.0:
216; X86-SSE42-NEXT:    pxor LCPI2_0, %xmm0
217; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
218; X86-SSE42-NEXT:    movd %xmm0, %eax
219; X86-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
220; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
221; X86-SSE42-NEXT:    retl
222;
223; X86-AVX-LABEL: test_reduce_v8i16:
224; X86-AVX:       ## %bb.0:
225; X86-AVX-NEXT:    vpxor LCPI2_0, %xmm0, %xmm0
226; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
227; X86-AVX-NEXT:    vmovd %xmm0, %eax
228; X86-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
229; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
230; X86-AVX-NEXT:    retl
231;
232; X64-SSE2-LABEL: test_reduce_v8i16:
233; X64-SSE2:       ## %bb.0:
234; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
235; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
236; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
237; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
238; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
239; X64-SSE2-NEXT:    psrld $16, %xmm1
240; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
241; X64-SSE2-NEXT:    movd %xmm1, %eax
242; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
243; X64-SSE2-NEXT:    retq
244;
245; X64-SSE42-LABEL: test_reduce_v8i16:
246; X64-SSE42:       ## %bb.0:
247; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
248; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
249; X64-SSE42-NEXT:    movd %xmm0, %eax
250; X64-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
251; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
252; X64-SSE42-NEXT:    retq
253;
254; X64-AVX-LABEL: test_reduce_v8i16:
255; X64-AVX:       ## %bb.0:
256; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
257; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
258; X64-AVX-NEXT:    vmovd %xmm0, %eax
259; X64-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
260; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
261; X64-AVX-NEXT:    retq
262  %1  = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
263  %2  = icmp slt <8 x i16> %a0, %1
264  %3  = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
265  %4  = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
266  %5  = icmp slt <8 x i16> %3, %4
267  %6  = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
268  %7  = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
269  %8  = icmp slt <8 x i16> %6, %7
270  %9  = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
271  %10 = extractelement <8 x i16> %9, i32 0
272  ret i16 %10
273}
274
275define i8 @test_reduce_v16i8(<16 x i8> %a0) {
276; X86-SSE2-LABEL: test_reduce_v16i8:
277; X86-SSE2:       ## %bb.0:
278; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
279; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
280; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
281; X86-SSE2-NEXT:    pand %xmm2, %xmm0
282; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
283; X86-SSE2-NEXT:    por %xmm0, %xmm2
284; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
285; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
286; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
287; X86-SSE2-NEXT:    pand %xmm1, %xmm2
288; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
289; X86-SSE2-NEXT:    por %xmm2, %xmm1
290; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
291; X86-SSE2-NEXT:    psrld $16, %xmm0
292; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
293; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
294; X86-SSE2-NEXT:    pand %xmm2, %xmm1
295; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
296; X86-SSE2-NEXT:    por %xmm1, %xmm2
297; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
298; X86-SSE2-NEXT:    psrlw $8, %xmm0
299; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
300; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
301; X86-SSE2-NEXT:    pand %xmm1, %xmm2
302; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
303; X86-SSE2-NEXT:    por %xmm2, %xmm1
304; X86-SSE2-NEXT:    movd %xmm1, %eax
305; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
306; X86-SSE2-NEXT:    retl
307;
308; X86-SSE42-LABEL: test_reduce_v16i8:
309; X86-SSE42:       ## %bb.0:
310; X86-SSE42-NEXT:    pxor LCPI3_0, %xmm0
311; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
312; X86-SSE42-NEXT:    psrlw $8, %xmm1
313; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
314; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
315; X86-SSE42-NEXT:    movd %xmm0, %eax
316; X86-SSE42-NEXT:    xorb $-128, %al
317; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
318; X86-SSE42-NEXT:    retl
319;
320; X86-AVX-LABEL: test_reduce_v16i8:
321; X86-AVX:       ## %bb.0:
322; X86-AVX-NEXT:    vpxor LCPI3_0, %xmm0, %xmm0
323; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
324; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
325; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
326; X86-AVX-NEXT:    vmovd %xmm0, %eax
327; X86-AVX-NEXT:    xorb $-128, %al
328; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
329; X86-AVX-NEXT:    retl
330;
331; X64-SSE2-LABEL: test_reduce_v16i8:
332; X64-SSE2:       ## %bb.0:
333; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
334; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
335; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
336; X64-SSE2-NEXT:    pand %xmm2, %xmm0
337; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
338; X64-SSE2-NEXT:    por %xmm0, %xmm2
339; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
340; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
341; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
342; X64-SSE2-NEXT:    pand %xmm1, %xmm2
343; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
344; X64-SSE2-NEXT:    por %xmm2, %xmm1
345; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
346; X64-SSE2-NEXT:    psrld $16, %xmm0
347; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
348; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
349; X64-SSE2-NEXT:    pand %xmm2, %xmm1
350; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
351; X64-SSE2-NEXT:    por %xmm1, %xmm2
352; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
353; X64-SSE2-NEXT:    psrlw $8, %xmm0
354; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
355; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
356; X64-SSE2-NEXT:    pand %xmm1, %xmm2
357; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
358; X64-SSE2-NEXT:    por %xmm2, %xmm1
359; X64-SSE2-NEXT:    movd %xmm1, %eax
360; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
361; X64-SSE2-NEXT:    retq
362;
363; X64-SSE42-LABEL: test_reduce_v16i8:
364; X64-SSE42:       ## %bb.0:
365; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
366; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
367; X64-SSE42-NEXT:    psrlw $8, %xmm1
368; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
369; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
370; X64-SSE42-NEXT:    movd %xmm0, %eax
371; X64-SSE42-NEXT:    xorb $-128, %al
372; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
373; X64-SSE42-NEXT:    retq
374;
375; X64-AVX-LABEL: test_reduce_v16i8:
376; X64-AVX:       ## %bb.0:
377; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
378; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
379; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
380; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
381; X64-AVX-NEXT:    vmovd %xmm0, %eax
382; X64-AVX-NEXT:    xorb $-128, %al
383; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
384; X64-AVX-NEXT:    retq
385  %1  = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
386  %2  = icmp slt <16 x i8> %a0, %1
387  %3  = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
388  %4  = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
389  %5  = icmp slt <16 x i8> %3, %4
390  %6  = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
391  %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
392  %8  = icmp slt <16 x i8> %6, %7
393  %9  = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
394  %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
395  %11 = icmp slt <16 x i8> %9, %10
396  %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
397  %13 = extractelement <16 x i8> %12, i32 0
398  ret i8 %13
399}
400
401;
402; 256-bit Vectors
403;
404
405define i64 @test_reduce_v4i64(<4 x i64> %a0) {
406; X86-SSE2-LABEL: test_reduce_v4i64:
407; X86-SSE2:       ## %bb.0:
408; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
409; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
410; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
411; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
412; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
413; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
414; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
415; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
416; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
417; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
418; X86-SSE2-NEXT:    pand %xmm6, %xmm3
419; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
420; X86-SSE2-NEXT:    por %xmm3, %xmm4
421; X86-SSE2-NEXT:    pand %xmm4, %xmm0
422; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
423; X86-SSE2-NEXT:    por %xmm0, %xmm4
424; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
425; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
426; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
427; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
428; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
429; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
430; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
431; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
432; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
433; X86-SSE2-NEXT:    pand %xmm5, %xmm1
434; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
435; X86-SSE2-NEXT:    por %xmm1, %xmm2
436; X86-SSE2-NEXT:    pand %xmm2, %xmm4
437; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
438; X86-SSE2-NEXT:    por %xmm4, %xmm2
439; X86-SSE2-NEXT:    movd %xmm2, %eax
440; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
441; X86-SSE2-NEXT:    movd %xmm0, %edx
442; X86-SSE2-NEXT:    retl
443;
444; X86-SSE42-LABEL: test_reduce_v4i64:
445; X86-SSE42:       ## %bb.0:
446; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
447; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
448; X86-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
449; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
450; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
451; X86-SSE42-NEXT:    movdqa %xmm2, %xmm0
452; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
453; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
454; X86-SSE42-NEXT:    movd %xmm2, %eax
455; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
456; X86-SSE42-NEXT:    retl
457;
458; X86-AVX1-LABEL: test_reduce_v4i64:
459; X86-AVX1:       ## %bb.0:
460; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
461; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
462; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
463; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
464; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
465; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
466; X86-AVX1-NEXT:    vmovd %xmm0, %eax
467; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
468; X86-AVX1-NEXT:    vzeroupper
469; X86-AVX1-NEXT:    retl
470;
471; X86-AVX2-LABEL: test_reduce_v4i64:
472; X86-AVX2:       ## %bb.0:
473; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
474; X86-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
475; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
476; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
477; X86-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
478; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
479; X86-AVX2-NEXT:    vmovd %xmm0, %eax
480; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
481; X86-AVX2-NEXT:    vzeroupper
482; X86-AVX2-NEXT:    retl
483;
484; X64-SSE2-LABEL: test_reduce_v4i64:
485; X64-SSE2:       ## %bb.0:
486; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
487; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
488; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
489; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
490; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
491; X64-SSE2-NEXT:    movdqa %xmm4, %xmm5
492; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
493; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
494; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
495; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
496; X64-SSE2-NEXT:    pand %xmm6, %xmm3
497; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
498; X64-SSE2-NEXT:    por %xmm3, %xmm4
499; X64-SSE2-NEXT:    pand %xmm4, %xmm0
500; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
501; X64-SSE2-NEXT:    por %xmm0, %xmm4
502; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
503; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
504; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
505; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
506; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
507; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
508; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
509; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
510; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
511; X64-SSE2-NEXT:    pand %xmm5, %xmm1
512; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
513; X64-SSE2-NEXT:    por %xmm1, %xmm2
514; X64-SSE2-NEXT:    pand %xmm2, %xmm4
515; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
516; X64-SSE2-NEXT:    por %xmm4, %xmm2
517; X64-SSE2-NEXT:    movq %xmm2, %rax
518; X64-SSE2-NEXT:    retq
519;
520; X64-SSE42-LABEL: test_reduce_v4i64:
521; X64-SSE42:       ## %bb.0:
522; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
523; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
524; X64-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
525; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
526; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
527; X64-SSE42-NEXT:    movdqa %xmm2, %xmm0
528; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
529; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
530; X64-SSE42-NEXT:    movq %xmm2, %rax
531; X64-SSE42-NEXT:    retq
532;
533; X64-AVX1-LABEL: test_reduce_v4i64:
534; X64-AVX1:       ## %bb.0:
535; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
536; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
537; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
538; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
539; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
540; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
541; X64-AVX1-NEXT:    vmovq %xmm0, %rax
542; X64-AVX1-NEXT:    vzeroupper
543; X64-AVX1-NEXT:    retq
544;
545; X64-AVX2-LABEL: test_reduce_v4i64:
546; X64-AVX2:       ## %bb.0:
547; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
548; X64-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
549; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
550; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
551; X64-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
552; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
553; X64-AVX2-NEXT:    vmovq %xmm0, %rax
554; X64-AVX2-NEXT:    vzeroupper
555; X64-AVX2-NEXT:    retq
556;
557; X64-AVX512-LABEL: test_reduce_v4i64:
558; X64-AVX512:       ## %bb.0:
559; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
560; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
561; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
562; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
563; X64-AVX512-NEXT:    vmovq %xmm0, %rax
564; X64-AVX512-NEXT:    vzeroupper
565; X64-AVX512-NEXT:    retq
566  %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
567  %2 = icmp slt <4 x i64> %a0, %1
568  %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
569  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
570  %5 = icmp slt <4 x i64> %3, %4
571  %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
572  %7 = extractelement <4 x i64> %6, i32 0
573  ret i64 %7
574}
575
576define i32 @test_reduce_v8i32(<8 x i32> %a0) {
577; X86-SSE2-LABEL: test_reduce_v8i32:
578; X86-SSE2:       ## %bb.0:
579; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
580; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
581; X86-SSE2-NEXT:    pand %xmm2, %xmm0
582; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
583; X86-SSE2-NEXT:    por %xmm0, %xmm2
584; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
585; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
586; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
587; X86-SSE2-NEXT:    pand %xmm1, %xmm2
588; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
589; X86-SSE2-NEXT:    por %xmm2, %xmm1
590; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
591; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
592; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
593; X86-SSE2-NEXT:    pand %xmm2, %xmm1
594; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
595; X86-SSE2-NEXT:    por %xmm1, %xmm2
596; X86-SSE2-NEXT:    movd %xmm2, %eax
597; X86-SSE2-NEXT:    retl
598;
599; X86-SSE42-LABEL: test_reduce_v8i32:
600; X86-SSE42:       ## %bb.0:
601; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
602; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
603; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
604; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
605; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
606; X86-SSE42-NEXT:    movd %xmm0, %eax
607; X86-SSE42-NEXT:    retl
608;
609; X86-AVX1-LABEL: test_reduce_v8i32:
610; X86-AVX1:       ## %bb.0:
611; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
612; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
613; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
614; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
615; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
616; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
617; X86-AVX1-NEXT:    vmovd %xmm0, %eax
618; X86-AVX1-NEXT:    vzeroupper
619; X86-AVX1-NEXT:    retl
620;
621; X86-AVX2-LABEL: test_reduce_v8i32:
622; X86-AVX2:       ## %bb.0:
623; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
624; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
625; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
626; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
627; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
628; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
629; X86-AVX2-NEXT:    vmovd %xmm0, %eax
630; X86-AVX2-NEXT:    vzeroupper
631; X86-AVX2-NEXT:    retl
632;
633; X64-SSE2-LABEL: test_reduce_v8i32:
634; X64-SSE2:       ## %bb.0:
635; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
636; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
637; X64-SSE2-NEXT:    pand %xmm2, %xmm0
638; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
639; X64-SSE2-NEXT:    por %xmm0, %xmm2
640; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
641; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
642; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
643; X64-SSE2-NEXT:    pand %xmm1, %xmm2
644; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
645; X64-SSE2-NEXT:    por %xmm2, %xmm1
646; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
647; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
648; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
649; X64-SSE2-NEXT:    pand %xmm2, %xmm1
650; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
651; X64-SSE2-NEXT:    por %xmm1, %xmm2
652; X64-SSE2-NEXT:    movd %xmm2, %eax
653; X64-SSE2-NEXT:    retq
654;
655; X64-SSE42-LABEL: test_reduce_v8i32:
656; X64-SSE42:       ## %bb.0:
657; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
658; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
659; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
660; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
661; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
662; X64-SSE42-NEXT:    movd %xmm0, %eax
663; X64-SSE42-NEXT:    retq
664;
665; X64-AVX1-LABEL: test_reduce_v8i32:
666; X64-AVX1:       ## %bb.0:
667; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
668; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
669; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
670; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
671; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
672; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
673; X64-AVX1-NEXT:    vmovd %xmm0, %eax
674; X64-AVX1-NEXT:    vzeroupper
675; X64-AVX1-NEXT:    retq
676;
677; X64-AVX2-LABEL: test_reduce_v8i32:
678; X64-AVX2:       ## %bb.0:
679; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
680; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
681; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
682; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
683; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
684; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
685; X64-AVX2-NEXT:    vmovd %xmm0, %eax
686; X64-AVX2-NEXT:    vzeroupper
687; X64-AVX2-NEXT:    retq
688;
689; X64-AVX512-LABEL: test_reduce_v8i32:
690; X64-AVX512:       ## %bb.0:
691; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
692; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
693; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
694; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
695; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
696; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
697; X64-AVX512-NEXT:    vmovd %xmm0, %eax
698; X64-AVX512-NEXT:    vzeroupper
699; X64-AVX512-NEXT:    retq
700  %1  = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
701  %2  = icmp slt <8 x i32> %a0, %1
702  %3  = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
703  %4  = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
704  %5  = icmp slt <8 x i32> %3, %4
705  %6  = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
706  %7  = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
707  %8  = icmp slt <8 x i32> %6, %7
708  %9  = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
709  %10 = extractelement <8 x i32> %9, i32 0
710  ret i32 %10
711}
712
713define i16 @test_reduce_v16i16(<16 x i16> %a0) {
714; X86-SSE2-LABEL: test_reduce_v16i16:
715; X86-SSE2:       ## %bb.0:
716; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
717; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
718; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
719; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
720; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
721; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
722; X86-SSE2-NEXT:    psrld $16, %xmm1
723; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
724; X86-SSE2-NEXT:    movd %xmm1, %eax
725; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
726; X86-SSE2-NEXT:    retl
727;
728; X86-SSE42-LABEL: test_reduce_v16i16:
729; X86-SSE42:       ## %bb.0:
730; X86-SSE42-NEXT:    pminsw %xmm1, %xmm0
731; X86-SSE42-NEXT:    pxor LCPI6_0, %xmm0
732; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
733; X86-SSE42-NEXT:    movd %xmm0, %eax
734; X86-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
735; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
736; X86-SSE42-NEXT:    retl
737;
738; X86-AVX1-LABEL: test_reduce_v16i16:
739; X86-AVX1:       ## %bb.0:
740; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
741; X86-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
742; X86-AVX1-NEXT:    vpxor LCPI6_0, %xmm0, %xmm0
743; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
744; X86-AVX1-NEXT:    vmovd %xmm0, %eax
745; X86-AVX1-NEXT:    xorl $32768, %eax ## imm = 0x8000
746; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
747; X86-AVX1-NEXT:    vzeroupper
748; X86-AVX1-NEXT:    retl
749;
750; X86-AVX2-LABEL: test_reduce_v16i16:
751; X86-AVX2:       ## %bb.0:
752; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
753; X86-AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
754; X86-AVX2-NEXT:    vpxor LCPI6_0, %xmm0, %xmm0
755; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
756; X86-AVX2-NEXT:    vmovd %xmm0, %eax
757; X86-AVX2-NEXT:    xorl $32768, %eax ## imm = 0x8000
758; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
759; X86-AVX2-NEXT:    vzeroupper
760; X86-AVX2-NEXT:    retl
761;
762; X64-SSE2-LABEL: test_reduce_v16i16:
763; X64-SSE2:       ## %bb.0:
764; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
765; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
766; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
767; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
768; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
769; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
770; X64-SSE2-NEXT:    psrld $16, %xmm1
771; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
772; X64-SSE2-NEXT:    movd %xmm1, %eax
773; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
774; X64-SSE2-NEXT:    retq
775;
776; X64-SSE42-LABEL: test_reduce_v16i16:
777; X64-SSE42:       ## %bb.0:
778; X64-SSE42-NEXT:    pminsw %xmm1, %xmm0
779; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
780; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
781; X64-SSE42-NEXT:    movd %xmm0, %eax
782; X64-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
783; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
784; X64-SSE42-NEXT:    retq
785;
786; X64-AVX1-LABEL: test_reduce_v16i16:
787; X64-AVX1:       ## %bb.0:
788; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
789; X64-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
790; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
791; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
792; X64-AVX1-NEXT:    vmovd %xmm0, %eax
793; X64-AVX1-NEXT:    xorl $32768, %eax ## imm = 0x8000
794; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
795; X64-AVX1-NEXT:    vzeroupper
796; X64-AVX1-NEXT:    retq
797;
798; X64-AVX2-LABEL: test_reduce_v16i16:
799; X64-AVX2:       ## %bb.0:
800; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
801; X64-AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
802; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
803; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
804; X64-AVX2-NEXT:    vmovd %xmm0, %eax
805; X64-AVX2-NEXT:    xorl $32768, %eax ## imm = 0x8000
806; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
807; X64-AVX2-NEXT:    vzeroupper
808; X64-AVX2-NEXT:    retq
809;
810; X64-AVX512-LABEL: test_reduce_v16i16:
811; X64-AVX512:       ## %bb.0:
812; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
813; X64-AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
814; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
815; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
816; X64-AVX512-NEXT:    vmovd %xmm0, %eax
817; X64-AVX512-NEXT:    xorl $32768, %eax ## imm = 0x8000
818; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
819; X64-AVX512-NEXT:    vzeroupper
820; X64-AVX512-NEXT:    retq
821  %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
822  %2  = icmp slt <16 x i16> %a0, %1
823  %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
824  %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
825  %5  = icmp slt <16 x i16> %3, %4
826  %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
827  %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
828  %8  = icmp slt <16 x i16> %6, %7
829  %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
830  %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
831  %11 = icmp slt <16 x i16> %9, %10
832  %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
833  %13 = extractelement <16 x i16> %12, i32 0
834  ret i16 %13
835}
836
837define i8 @test_reduce_v32i8(<32 x i8> %a0) {
838; X86-SSE2-LABEL: test_reduce_v32i8:
839; X86-SSE2:       ## %bb.0:
840; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
841; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
842; X86-SSE2-NEXT:    pand %xmm2, %xmm0
843; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
844; X86-SSE2-NEXT:    por %xmm0, %xmm2
845; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
846; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
847; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
848; X86-SSE2-NEXT:    pand %xmm1, %xmm2
849; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
850; X86-SSE2-NEXT:    por %xmm2, %xmm1
851; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
852; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
853; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
854; X86-SSE2-NEXT:    pand %xmm2, %xmm1
855; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
856; X86-SSE2-NEXT:    por %xmm1, %xmm2
857; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
858; X86-SSE2-NEXT:    psrld $16, %xmm0
859; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
860; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
861; X86-SSE2-NEXT:    pand %xmm1, %xmm2
862; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
863; X86-SSE2-NEXT:    por %xmm2, %xmm1
864; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
865; X86-SSE2-NEXT:    psrlw $8, %xmm0
866; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
867; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
868; X86-SSE2-NEXT:    pand %xmm2, %xmm1
869; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
870; X86-SSE2-NEXT:    por %xmm1, %xmm2
871; X86-SSE2-NEXT:    movd %xmm2, %eax
872; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
873; X86-SSE2-NEXT:    retl
874;
875; X86-SSE42-LABEL: test_reduce_v32i8:
876; X86-SSE42:       ## %bb.0:
877; X86-SSE42-NEXT:    pminsb %xmm1, %xmm0
878; X86-SSE42-NEXT:    pxor LCPI7_0, %xmm0
879; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
880; X86-SSE42-NEXT:    psrlw $8, %xmm1
881; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
882; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
883; X86-SSE42-NEXT:    movd %xmm0, %eax
884; X86-SSE42-NEXT:    xorb $-128, %al
885; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
886; X86-SSE42-NEXT:    retl
887;
888; X86-AVX1-LABEL: test_reduce_v32i8:
889; X86-AVX1:       ## %bb.0:
890; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
891; X86-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
892; X86-AVX1-NEXT:    vpxor LCPI7_0, %xmm0, %xmm0
893; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
894; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
895; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
896; X86-AVX1-NEXT:    vmovd %xmm0, %eax
897; X86-AVX1-NEXT:    xorb $-128, %al
898; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
899; X86-AVX1-NEXT:    vzeroupper
900; X86-AVX1-NEXT:    retl
901;
902; X86-AVX2-LABEL: test_reduce_v32i8:
903; X86-AVX2:       ## %bb.0:
904; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
905; X86-AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
906; X86-AVX2-NEXT:    vpxor LCPI7_0, %xmm0, %xmm0
907; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
908; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
909; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
910; X86-AVX2-NEXT:    vmovd %xmm0, %eax
911; X86-AVX2-NEXT:    xorb $-128, %al
912; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
913; X86-AVX2-NEXT:    vzeroupper
914; X86-AVX2-NEXT:    retl
915;
916; X64-SSE2-LABEL: test_reduce_v32i8:
917; X64-SSE2:       ## %bb.0:
918; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
919; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
920; X64-SSE2-NEXT:    pand %xmm2, %xmm0
921; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
922; X64-SSE2-NEXT:    por %xmm0, %xmm2
923; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
924; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
925; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
926; X64-SSE2-NEXT:    pand %xmm1, %xmm2
927; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
928; X64-SSE2-NEXT:    por %xmm2, %xmm1
929; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
930; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
931; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
932; X64-SSE2-NEXT:    pand %xmm2, %xmm1
933; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
934; X64-SSE2-NEXT:    por %xmm1, %xmm2
935; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
936; X64-SSE2-NEXT:    psrld $16, %xmm0
937; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
938; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
939; X64-SSE2-NEXT:    pand %xmm1, %xmm2
940; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
941; X64-SSE2-NEXT:    por %xmm2, %xmm1
942; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
943; X64-SSE2-NEXT:    psrlw $8, %xmm0
944; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
945; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
946; X64-SSE2-NEXT:    pand %xmm2, %xmm1
947; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
948; X64-SSE2-NEXT:    por %xmm1, %xmm2
949; X64-SSE2-NEXT:    movd %xmm2, %eax
950; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
951; X64-SSE2-NEXT:    retq
952;
953; X64-SSE42-LABEL: test_reduce_v32i8:
954; X64-SSE42:       ## %bb.0:
955; X64-SSE42-NEXT:    pminsb %xmm1, %xmm0
956; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
957; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
958; X64-SSE42-NEXT:    psrlw $8, %xmm1
959; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
960; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
961; X64-SSE42-NEXT:    movd %xmm0, %eax
962; X64-SSE42-NEXT:    xorb $-128, %al
963; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
964; X64-SSE42-NEXT:    retq
965;
966; X64-AVX1-LABEL: test_reduce_v32i8:
967; X64-AVX1:       ## %bb.0:
968; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
969; X64-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
970; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
971; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
972; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
973; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
974; X64-AVX1-NEXT:    vmovd %xmm0, %eax
975; X64-AVX1-NEXT:    xorb $-128, %al
976; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
977; X64-AVX1-NEXT:    vzeroupper
978; X64-AVX1-NEXT:    retq
979;
980; X64-AVX2-LABEL: test_reduce_v32i8:
981; X64-AVX2:       ## %bb.0:
982; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
983; X64-AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
984; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
985; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
986; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
987; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
988; X64-AVX2-NEXT:    vmovd %xmm0, %eax
989; X64-AVX2-NEXT:    xorb $-128, %al
990; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
991; X64-AVX2-NEXT:    vzeroupper
992; X64-AVX2-NEXT:    retq
993;
994; X64-AVX512-LABEL: test_reduce_v32i8:
995; X64-AVX512:       ## %bb.0:
996; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
997; X64-AVX512-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
998; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
999; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
1000; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1001; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1002; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1003; X64-AVX512-NEXT:    xorb $-128, %al
1004; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
1005; X64-AVX512-NEXT:    vzeroupper
1006; X64-AVX512-NEXT:    retq
1007  %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1008  %2  = icmp slt <32 x i8> %a0, %1
1009  %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1010  %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1011  %5  = icmp slt <32 x i8> %3, %4
1012  %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1013  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1014  %8  = icmp slt <32 x i8> %6, %7
1015  %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1016  %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1017  %11 = icmp slt <32 x i8> %9, %10
1018  %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1019  %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1020  %14 = icmp slt <32 x i8> %12, %13
1021  %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1022  %16 = extractelement <32 x i8> %15, i32 0
1023  ret i8 %16
1024}
1025
1026;
1027; 512-bit Vectors
1028;
1029
1030define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1031; X86-SSE2-LABEL: test_reduce_v8i64:
1032; X86-SSE2:       ## %bb.0:
1033; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1034; X86-SSE2-NEXT:    movdqa %xmm1, %xmm5
1035; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
1036; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
1037; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
1038; X86-SSE2-NEXT:    movdqa %xmm6, %xmm7
1039; X86-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1040; X86-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1041; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1042; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1043; X86-SSE2-NEXT:    pand %xmm5, %xmm6
1044; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1045; X86-SSE2-NEXT:    por %xmm6, %xmm5
1046; X86-SSE2-NEXT:    pand %xmm5, %xmm1
1047; X86-SSE2-NEXT:    pandn %xmm3, %xmm5
1048; X86-SSE2-NEXT:    por %xmm1, %xmm5
1049; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1050; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
1051; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1052; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
1053; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
1054; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
1055; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
1056; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
1057; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1058; X86-SSE2-NEXT:    pand %xmm1, %xmm3
1059; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
1060; X86-SSE2-NEXT:    por %xmm3, %xmm1
1061; X86-SSE2-NEXT:    pand %xmm1, %xmm0
1062; X86-SSE2-NEXT:    pandn %xmm2, %xmm1
1063; X86-SSE2-NEXT:    por %xmm0, %xmm1
1064; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1065; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
1066; X86-SSE2-NEXT:    movdqa %xmm5, %xmm2
1067; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1068; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
1069; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
1070; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
1071; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1072; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1073; X86-SSE2-NEXT:    pand %xmm0, %xmm2
1074; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1075; X86-SSE2-NEXT:    por %xmm2, %xmm0
1076; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1077; X86-SSE2-NEXT:    pandn %xmm5, %xmm0
1078; X86-SSE2-NEXT:    por %xmm1, %xmm0
1079; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1080; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1081; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
1082; X86-SSE2-NEXT:    pxor %xmm1, %xmm4
1083; X86-SSE2-NEXT:    movdqa %xmm4, %xmm3
1084; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1085; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1086; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1087; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1088; X86-SSE2-NEXT:    pand %xmm2, %xmm4
1089; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1090; X86-SSE2-NEXT:    por %xmm4, %xmm2
1091; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1092; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
1093; X86-SSE2-NEXT:    por %xmm0, %xmm2
1094; X86-SSE2-NEXT:    movd %xmm2, %eax
1095; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1096; X86-SSE2-NEXT:    movd %xmm0, %edx
1097; X86-SSE2-NEXT:    retl
1098;
1099; X86-SSE42-LABEL: test_reduce_v8i64:
1100; X86-SSE42:       ## %bb.0:
1101; X86-SSE42-NEXT:    movdqa %xmm0, %xmm4
1102; X86-SSE42-NEXT:    movdqa %xmm2, %xmm0
1103; X86-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
1104; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1105; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
1106; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1107; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1108; X86-SSE42-NEXT:    movapd %xmm3, %xmm0
1109; X86-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1110; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1111; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1112; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
1113; X86-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1114; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
1115; X86-SSE42-NEXT:    movd %xmm1, %eax
1116; X86-SSE42-NEXT:    pextrd $1, %xmm1, %edx
1117; X86-SSE42-NEXT:    retl
1118;
1119; X86-AVX1-LABEL: test_reduce_v8i64:
1120; X86-AVX1:       ## %bb.0:
1121; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1122; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1123; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm4
1124; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm5
1125; X86-AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm1, %xmm0
1126; X86-AVX1-NEXT:    vblendvpd %xmm4, %xmm2, %xmm3, %xmm1
1127; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1128; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1129; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1130; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1131; X86-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1132; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1133; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
1134; X86-AVX1-NEXT:    vzeroupper
1135; X86-AVX1-NEXT:    retl
1136;
1137; X86-AVX2-LABEL: test_reduce_v8i64:
1138; X86-AVX2:       ## %bb.0:
1139; X86-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
1140; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1141; X86-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1142; X86-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1143; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1144; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1145; X86-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1146; X86-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1147; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1148; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
1149; X86-AVX2-NEXT:    vzeroupper
1150; X86-AVX2-NEXT:    retl
1151;
1152; X64-SSE2-LABEL: test_reduce_v8i64:
1153; X64-SSE2:       ## %bb.0:
1154; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1155; X64-SSE2-NEXT:    movdqa %xmm1, %xmm5
1156; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
1157; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
1158; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
1159; X64-SSE2-NEXT:    movdqa %xmm6, %xmm7
1160; X64-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1161; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1162; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1163; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1164; X64-SSE2-NEXT:    pand %xmm8, %xmm6
1165; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1166; X64-SSE2-NEXT:    por %xmm6, %xmm5
1167; X64-SSE2-NEXT:    pand %xmm5, %xmm1
1168; X64-SSE2-NEXT:    pandn %xmm3, %xmm5
1169; X64-SSE2-NEXT:    por %xmm1, %xmm5
1170; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1171; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1172; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
1173; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
1174; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
1175; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
1176; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1177; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
1178; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1179; X64-SSE2-NEXT:    pand %xmm7, %xmm1
1180; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1181; X64-SSE2-NEXT:    por %xmm1, %xmm3
1182; X64-SSE2-NEXT:    pand %xmm3, %xmm0
1183; X64-SSE2-NEXT:    pandn %xmm2, %xmm3
1184; X64-SSE2-NEXT:    por %xmm0, %xmm3
1185; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
1186; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
1187; X64-SSE2-NEXT:    movdqa %xmm5, %xmm1
1188; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
1189; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1190; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
1191; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
1192; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
1193; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1194; X64-SSE2-NEXT:    pand %xmm6, %xmm0
1195; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1196; X64-SSE2-NEXT:    por %xmm0, %xmm1
1197; X64-SSE2-NEXT:    pand %xmm1, %xmm3
1198; X64-SSE2-NEXT:    pandn %xmm5, %xmm1
1199; X64-SSE2-NEXT:    por %xmm3, %xmm1
1200; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1201; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
1202; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
1203; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
1204; X64-SSE2-NEXT:    movdqa %xmm4, %xmm3
1205; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
1206; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1207; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1208; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1209; X64-SSE2-NEXT:    pand %xmm5, %xmm2
1210; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1211; X64-SSE2-NEXT:    por %xmm2, %xmm3
1212; X64-SSE2-NEXT:    pand %xmm3, %xmm1
1213; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
1214; X64-SSE2-NEXT:    por %xmm1, %xmm3
1215; X64-SSE2-NEXT:    movq %xmm3, %rax
1216; X64-SSE2-NEXT:    retq
1217;
1218; X64-SSE42-LABEL: test_reduce_v8i64:
1219; X64-SSE42:       ## %bb.0:
1220; X64-SSE42-NEXT:    movdqa %xmm0, %xmm4
1221; X64-SSE42-NEXT:    movdqa %xmm2, %xmm0
1222; X64-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
1223; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1224; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
1225; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1226; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1227; X64-SSE42-NEXT:    movapd %xmm3, %xmm0
1228; X64-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1229; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
1230; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1231; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
1232; X64-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1233; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
1234; X64-SSE42-NEXT:    movq %xmm1, %rax
1235; X64-SSE42-NEXT:    retq
1236;
1237; X64-AVX1-LABEL: test_reduce_v8i64:
1238; X64-AVX1:       ## %bb.0:
1239; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1240; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
1241; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm4
1242; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm5
1243; X64-AVX1-NEXT:    vblendvpd %xmm5, %xmm0, %xmm1, %xmm0
1244; X64-AVX1-NEXT:    vblendvpd %xmm4, %xmm2, %xmm3, %xmm1
1245; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1246; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1247; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1248; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1249; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1250; X64-AVX1-NEXT:    vmovq %xmm0, %rax
1251; X64-AVX1-NEXT:    vzeroupper
1252; X64-AVX1-NEXT:    retq
1253;
1254; X64-AVX2-LABEL: test_reduce_v8i64:
1255; X64-AVX2:       ## %bb.0:
1256; X64-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
1257; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1258; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
1259; X64-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1260; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1261; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1262; X64-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
1263; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1264; X64-AVX2-NEXT:    vmovq %xmm0, %rax
1265; X64-AVX2-NEXT:    vzeroupper
1266; X64-AVX2-NEXT:    retq
1267;
1268; X64-AVX512-LABEL: test_reduce_v8i64:
1269; X64-AVX512:       ## %bb.0:
1270; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1271; X64-AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
1272; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1273; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
1274; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1275; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
1276; X64-AVX512-NEXT:    vmovq %xmm0, %rax
1277; X64-AVX512-NEXT:    vzeroupper
1278; X64-AVX512-NEXT:    retq
1279  %1  = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1280  %2  = icmp slt <8 x i64> %a0, %1
1281  %3  = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1282  %4  = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1283  %5  = icmp slt <8 x i64> %3, %4
1284  %6  = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1285  %7  = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1286  %8  = icmp slt <8 x i64> %6, %7
1287  %9  = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1288  %10 = extractelement <8 x i64> %9, i32 0
1289  ret i64 %10
1290}
1291
1292define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1293; X86-SSE2-LABEL: test_reduce_v16i32:
1294; X86-SSE2:       ## %bb.0:
1295; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
1296; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1297; X86-SSE2-NEXT:    pand %xmm4, %xmm0
1298; X86-SSE2-NEXT:    pandn %xmm2, %xmm4
1299; X86-SSE2-NEXT:    por %xmm0, %xmm4
1300; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
1301; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
1302; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1303; X86-SSE2-NEXT:    pandn %xmm3, %xmm0
1304; X86-SSE2-NEXT:    por %xmm1, %xmm0
1305; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1306; X86-SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
1307; X86-SSE2-NEXT:    pand %xmm1, %xmm4
1308; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1309; X86-SSE2-NEXT:    por %xmm4, %xmm1
1310; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1311; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1312; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
1313; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1314; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
1315; X86-SSE2-NEXT:    por %xmm1, %xmm2
1316; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1317; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1318; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
1319; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1320; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1321; X86-SSE2-NEXT:    por %xmm2, %xmm1
1322; X86-SSE2-NEXT:    movd %xmm1, %eax
1323; X86-SSE2-NEXT:    retl
1324;
1325; X86-SSE42-LABEL: test_reduce_v16i32:
1326; X86-SSE42:       ## %bb.0:
1327; X86-SSE42-NEXT:    pminsd %xmm3, %xmm1
1328; X86-SSE42-NEXT:    pminsd %xmm2, %xmm1
1329; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
1330; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1331; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
1332; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1333; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
1334; X86-SSE42-NEXT:    movd %xmm1, %eax
1335; X86-SSE42-NEXT:    retl
1336;
1337; X86-AVX1-LABEL: test_reduce_v16i32:
1338; X86-AVX1:       ## %bb.0:
1339; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1340; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1341; X86-AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
1342; X86-AVX1-NEXT:    vpminsd %xmm2, %xmm1, %xmm1
1343; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1344; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1345; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1346; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1347; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1348; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1349; X86-AVX1-NEXT:    vzeroupper
1350; X86-AVX1-NEXT:    retl
1351;
1352; X86-AVX2-LABEL: test_reduce_v16i32:
1353; X86-AVX2:       ## %bb.0:
1354; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
1355; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1356; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1357; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1358; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1359; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1360; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1361; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1362; X86-AVX2-NEXT:    vzeroupper
1363; X86-AVX2-NEXT:    retl
1364;
1365; X64-SSE2-LABEL: test_reduce_v16i32:
1366; X64-SSE2:       ## %bb.0:
1367; X64-SSE2-NEXT:    movdqa %xmm2, %xmm4
1368; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
1369; X64-SSE2-NEXT:    pand %xmm4, %xmm0
1370; X64-SSE2-NEXT:    pandn %xmm2, %xmm4
1371; X64-SSE2-NEXT:    por %xmm0, %xmm4
1372; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
1373; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
1374; X64-SSE2-NEXT:    pand %xmm0, %xmm1
1375; X64-SSE2-NEXT:    pandn %xmm3, %xmm0
1376; X64-SSE2-NEXT:    por %xmm1, %xmm0
1377; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1378; X64-SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
1379; X64-SSE2-NEXT:    pand %xmm1, %xmm4
1380; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1381; X64-SSE2-NEXT:    por %xmm4, %xmm1
1382; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1383; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1384; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
1385; X64-SSE2-NEXT:    pand %xmm2, %xmm1
1386; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
1387; X64-SSE2-NEXT:    por %xmm1, %xmm2
1388; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1389; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1390; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
1391; X64-SSE2-NEXT:    pand %xmm1, %xmm2
1392; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1393; X64-SSE2-NEXT:    por %xmm2, %xmm1
1394; X64-SSE2-NEXT:    movd %xmm1, %eax
1395; X64-SSE2-NEXT:    retq
1396;
1397; X64-SSE42-LABEL: test_reduce_v16i32:
1398; X64-SSE42:       ## %bb.0:
1399; X64-SSE42-NEXT:    pminsd %xmm3, %xmm1
1400; X64-SSE42-NEXT:    pminsd %xmm2, %xmm1
1401; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
1402; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1403; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
1404; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1405; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
1406; X64-SSE42-NEXT:    movd %xmm1, %eax
1407; X64-SSE42-NEXT:    retq
1408;
1409; X64-AVX1-LABEL: test_reduce_v16i32:
1410; X64-AVX1:       ## %bb.0:
1411; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1412; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1413; X64-AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
1414; X64-AVX1-NEXT:    vpminsd %xmm2, %xmm1, %xmm1
1415; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1416; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1417; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1418; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1419; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1420; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1421; X64-AVX1-NEXT:    vzeroupper
1422; X64-AVX1-NEXT:    retq
1423;
1424; X64-AVX2-LABEL: test_reduce_v16i32:
1425; X64-AVX2:       ## %bb.0:
1426; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
1427; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1428; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1429; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1430; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1431; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1432; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1433; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1434; X64-AVX2-NEXT:    vzeroupper
1435; X64-AVX2-NEXT:    retq
1436;
1437; X64-AVX512-LABEL: test_reduce_v16i32:
1438; X64-AVX512:       ## %bb.0:
1439; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1440; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
1441; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1442; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1443; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1444; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1445; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1446; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
1447; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1448; X64-AVX512-NEXT:    vzeroupper
1449; X64-AVX512-NEXT:    retq
1450  %1  = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1451  %2  = icmp slt <16 x i32> %a0, %1
1452  %3  = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1453  %4  = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1454  %5  = icmp slt <16 x i32> %3, %4
1455  %6  = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1456  %7  = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1457  %8  = icmp slt <16 x i32> %6, %7
1458  %9  = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1459  %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1460  %11 = icmp slt <16 x i32> %9, %10
1461  %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1462  %13 = extractelement <16 x i32> %12, i32 0
1463  ret i32 %13
1464}
1465
1466define i16 @test_reduce_v32i16(<32 x i16> %a0) {
1467; X86-SSE2-LABEL: test_reduce_v32i16:
1468; X86-SSE2:       ## %bb.0:
1469; X86-SSE2-NEXT:    pminsw %xmm3, %xmm1
1470; X86-SSE2-NEXT:    pminsw %xmm2, %xmm1
1471; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
1472; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1473; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
1474; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1475; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
1476; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1477; X86-SSE2-NEXT:    psrld $16, %xmm0
1478; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
1479; X86-SSE2-NEXT:    movd %xmm0, %eax
1480; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1481; X86-SSE2-NEXT:    retl
1482;
1483; X86-SSE42-LABEL: test_reduce_v32i16:
1484; X86-SSE42:       ## %bb.0:
1485; X86-SSE42-NEXT:    pminsw %xmm3, %xmm1
1486; X86-SSE42-NEXT:    pminsw %xmm2, %xmm1
1487; X86-SSE42-NEXT:    pminsw %xmm0, %xmm1
1488; X86-SSE42-NEXT:    pxor LCPI10_0, %xmm1
1489; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
1490; X86-SSE42-NEXT:    movd %xmm0, %eax
1491; X86-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
1492; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1493; X86-SSE42-NEXT:    retl
1494;
1495; X86-AVX1-LABEL: test_reduce_v32i16:
1496; X86-AVX1:       ## %bb.0:
1497; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1498; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1499; X86-AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
1500; X86-AVX1-NEXT:    vpminsw %xmm2, %xmm1, %xmm1
1501; X86-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
1502; X86-AVX1-NEXT:    vpxor LCPI10_0, %xmm0, %xmm0
1503; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1504; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1505; X86-AVX1-NEXT:    xorl $32768, %eax ## imm = 0x8000
1506; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
1507; X86-AVX1-NEXT:    vzeroupper
1508; X86-AVX1-NEXT:    retl
1509;
1510; X86-AVX2-LABEL: test_reduce_v32i16:
1511; X86-AVX2:       ## %bb.0:
1512; X86-AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
1513; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1514; X86-AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
1515; X86-AVX2-NEXT:    vpxor LCPI10_0, %xmm0, %xmm0
1516; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1517; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1518; X86-AVX2-NEXT:    xorl $32768, %eax ## imm = 0x8000
1519; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
1520; X86-AVX2-NEXT:    vzeroupper
1521; X86-AVX2-NEXT:    retl
1522;
1523; X64-SSE2-LABEL: test_reduce_v32i16:
1524; X64-SSE2:       ## %bb.0:
1525; X64-SSE2-NEXT:    pminsw %xmm3, %xmm1
1526; X64-SSE2-NEXT:    pminsw %xmm2, %xmm1
1527; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
1528; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1529; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
1530; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1531; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
1532; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1533; X64-SSE2-NEXT:    psrld $16, %xmm0
1534; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
1535; X64-SSE2-NEXT:    movd %xmm0, %eax
1536; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1537; X64-SSE2-NEXT:    retq
1538;
1539; X64-SSE42-LABEL: test_reduce_v32i16:
1540; X64-SSE42:       ## %bb.0:
1541; X64-SSE42-NEXT:    pminsw %xmm3, %xmm1
1542; X64-SSE42-NEXT:    pminsw %xmm2, %xmm1
1543; X64-SSE42-NEXT:    pminsw %xmm0, %xmm1
1544; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm1
1545; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
1546; X64-SSE42-NEXT:    movd %xmm0, %eax
1547; X64-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
1548; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1549; X64-SSE42-NEXT:    retq
1550;
1551; X64-AVX1-LABEL: test_reduce_v32i16:
1552; X64-AVX1:       ## %bb.0:
1553; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1554; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1555; X64-AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
1556; X64-AVX1-NEXT:    vpminsw %xmm2, %xmm1, %xmm1
1557; X64-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
1558; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1559; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1560; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1561; X64-AVX1-NEXT:    xorl $32768, %eax ## imm = 0x8000
1562; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
1563; X64-AVX1-NEXT:    vzeroupper
1564; X64-AVX1-NEXT:    retq
1565;
1566; X64-AVX2-LABEL: test_reduce_v32i16:
1567; X64-AVX2:       ## %bb.0:
1568; X64-AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
1569; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1570; X64-AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
1571; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1572; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1573; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1574; X64-AVX2-NEXT:    xorl $32768, %eax ## imm = 0x8000
1575; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
1576; X64-AVX2-NEXT:    vzeroupper
1577; X64-AVX2-NEXT:    retq
1578;
1579; X64-AVX512-LABEL: test_reduce_v32i16:
1580; X64-AVX512:       ## %bb.0:
1581; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1582; X64-AVX512-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
1583; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1584; X64-AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
1585; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1586; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1587; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1588; X64-AVX512-NEXT:    xorl $32768, %eax ## imm = 0x8000
1589; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
1590; X64-AVX512-NEXT:    vzeroupper
1591; X64-AVX512-NEXT:    retq
1592  %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1593  %2  = icmp slt <32 x i16> %a0, %1
1594  %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1595  %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1596  %5  = icmp slt <32 x i16> %3, %4
1597  %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1598  %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1599  %8  = icmp slt <32 x i16> %6, %7
1600  %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1601  %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1602  %11 = icmp slt <32 x i16> %9, %10
1603  %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1604  %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1605  %14 = icmp slt <32 x i16> %12, %13
1606  %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1607  %16 = extractelement <32 x i16> %15, i32 0
1608  ret i16 %16
1609}
1610
1611define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1612; X86-SSE2-LABEL: test_reduce_v64i8:
1613; X86-SSE2:       ## %bb.0:
1614; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
1615; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm4
1616; X86-SSE2-NEXT:    pand %xmm4, %xmm0
1617; X86-SSE2-NEXT:    pandn %xmm2, %xmm4
1618; X86-SSE2-NEXT:    por %xmm0, %xmm4
1619; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
1620; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
1621; X86-SSE2-NEXT:    pand %xmm0, %xmm1
1622; X86-SSE2-NEXT:    pandn %xmm3, %xmm0
1623; X86-SSE2-NEXT:    por %xmm1, %xmm0
1624; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1625; X86-SSE2-NEXT:    pcmpgtb %xmm4, %xmm1
1626; X86-SSE2-NEXT:    pand %xmm1, %xmm4
1627; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1628; X86-SSE2-NEXT:    por %xmm4, %xmm1
1629; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1630; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1631; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1632; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1633; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
1634; X86-SSE2-NEXT:    por %xmm1, %xmm2
1635; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1636; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1637; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
1638; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1639; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1640; X86-SSE2-NEXT:    por %xmm2, %xmm1
1641; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1642; X86-SSE2-NEXT:    psrld $16, %xmm0
1643; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1644; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1645; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1646; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
1647; X86-SSE2-NEXT:    por %xmm1, %xmm2
1648; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
1649; X86-SSE2-NEXT:    psrlw $8, %xmm0
1650; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1651; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
1652; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1653; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
1654; X86-SSE2-NEXT:    por %xmm2, %xmm1
1655; X86-SSE2-NEXT:    movd %xmm1, %eax
1656; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1657; X86-SSE2-NEXT:    retl
1658;
1659; X86-SSE42-LABEL: test_reduce_v64i8:
1660; X86-SSE42:       ## %bb.0:
1661; X86-SSE42-NEXT:    pminsb %xmm3, %xmm1
1662; X86-SSE42-NEXT:    pminsb %xmm2, %xmm1
1663; X86-SSE42-NEXT:    pminsb %xmm0, %xmm1
1664; X86-SSE42-NEXT:    pxor LCPI11_0, %xmm1
1665; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
1666; X86-SSE42-NEXT:    psrlw $8, %xmm0
1667; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
1668; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1669; X86-SSE42-NEXT:    movd %xmm0, %eax
1670; X86-SSE42-NEXT:    xorb $-128, %al
1671; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1672; X86-SSE42-NEXT:    retl
1673;
1674; X86-AVX1-LABEL: test_reduce_v64i8:
1675; X86-AVX1:       ## %bb.0:
1676; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1677; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1678; X86-AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
1679; X86-AVX1-NEXT:    vpminsb %xmm2, %xmm1, %xmm1
1680; X86-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1681; X86-AVX1-NEXT:    vpxor LCPI11_0, %xmm0, %xmm0
1682; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1683; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1684; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1685; X86-AVX1-NEXT:    vmovd %xmm0, %eax
1686; X86-AVX1-NEXT:    xorb $-128, %al
1687; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1688; X86-AVX1-NEXT:    vzeroupper
1689; X86-AVX1-NEXT:    retl
1690;
1691; X86-AVX2-LABEL: test_reduce_v64i8:
1692; X86-AVX2:       ## %bb.0:
1693; X86-AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
1694; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1695; X86-AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1696; X86-AVX2-NEXT:    vpxor LCPI11_0, %xmm0, %xmm0
1697; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1698; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1699; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1700; X86-AVX2-NEXT:    vmovd %xmm0, %eax
1701; X86-AVX2-NEXT:    xorb $-128, %al
1702; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1703; X86-AVX2-NEXT:    vzeroupper
1704; X86-AVX2-NEXT:    retl
1705;
1706; X64-SSE2-LABEL: test_reduce_v64i8:
1707; X64-SSE2:       ## %bb.0:
1708; X64-SSE2-NEXT:    movdqa %xmm2, %xmm4
1709; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm4
1710; X64-SSE2-NEXT:    pand %xmm4, %xmm0
1711; X64-SSE2-NEXT:    pandn %xmm2, %xmm4
1712; X64-SSE2-NEXT:    por %xmm0, %xmm4
1713; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
1714; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
1715; X64-SSE2-NEXT:    pand %xmm0, %xmm1
1716; X64-SSE2-NEXT:    pandn %xmm3, %xmm0
1717; X64-SSE2-NEXT:    por %xmm1, %xmm0
1718; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1719; X64-SSE2-NEXT:    pcmpgtb %xmm4, %xmm1
1720; X64-SSE2-NEXT:    pand %xmm1, %xmm4
1721; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1722; X64-SSE2-NEXT:    por %xmm4, %xmm1
1723; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1724; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1725; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1726; X64-SSE2-NEXT:    pand %xmm2, %xmm1
1727; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
1728; X64-SSE2-NEXT:    por %xmm1, %xmm2
1729; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1730; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1731; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
1732; X64-SSE2-NEXT:    pand %xmm1, %xmm2
1733; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1734; X64-SSE2-NEXT:    por %xmm2, %xmm1
1735; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
1736; X64-SSE2-NEXT:    psrld $16, %xmm0
1737; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
1738; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
1739; X64-SSE2-NEXT:    pand %xmm2, %xmm1
1740; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
1741; X64-SSE2-NEXT:    por %xmm1, %xmm2
1742; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
1743; X64-SSE2-NEXT:    psrlw $8, %xmm0
1744; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1745; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
1746; X64-SSE2-NEXT:    pand %xmm1, %xmm2
1747; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
1748; X64-SSE2-NEXT:    por %xmm2, %xmm1
1749; X64-SSE2-NEXT:    movd %xmm1, %eax
1750; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
1751; X64-SSE2-NEXT:    retq
1752;
1753; X64-SSE42-LABEL: test_reduce_v64i8:
1754; X64-SSE42:       ## %bb.0:
1755; X64-SSE42-NEXT:    pminsb %xmm3, %xmm1
1756; X64-SSE42-NEXT:    pminsb %xmm2, %xmm1
1757; X64-SSE42-NEXT:    pminsb %xmm0, %xmm1
1758; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm1
1759; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
1760; X64-SSE42-NEXT:    psrlw $8, %xmm0
1761; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
1762; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1763; X64-SSE42-NEXT:    movd %xmm0, %eax
1764; X64-SSE42-NEXT:    xorb $-128, %al
1765; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
1766; X64-SSE42-NEXT:    retq
1767;
1768; X64-AVX1-LABEL: test_reduce_v64i8:
1769; X64-AVX1:       ## %bb.0:
1770; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1771; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1772; X64-AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
1773; X64-AVX1-NEXT:    vpminsb %xmm2, %xmm1, %xmm1
1774; X64-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1775; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1776; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
1777; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1778; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
1779; X64-AVX1-NEXT:    vmovd %xmm0, %eax
1780; X64-AVX1-NEXT:    xorb $-128, %al
1781; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
1782; X64-AVX1-NEXT:    vzeroupper
1783; X64-AVX1-NEXT:    retq
1784;
1785; X64-AVX2-LABEL: test_reduce_v64i8:
1786; X64-AVX2:       ## %bb.0:
1787; X64-AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
1788; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1789; X64-AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1790; X64-AVX2-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1791; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
1792; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1793; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
1794; X64-AVX2-NEXT:    vmovd %xmm0, %eax
1795; X64-AVX2-NEXT:    xorb $-128, %al
1796; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
1797; X64-AVX2-NEXT:    vzeroupper
1798; X64-AVX2-NEXT:    retq
1799;
1800; X64-AVX512-LABEL: test_reduce_v64i8:
1801; X64-AVX512:       ## %bb.0:
1802; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
1803; X64-AVX512-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
1804; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
1805; X64-AVX512-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
1806; X64-AVX512-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1807; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
1808; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1809; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
1810; X64-AVX512-NEXT:    vmovd %xmm0, %eax
1811; X64-AVX512-NEXT:    xorb $-128, %al
1812; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
1813; X64-AVX512-NEXT:    vzeroupper
1814; X64-AVX512-NEXT:    retq
1815  %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1816  %2  = icmp slt <64 x i8> %a0, %1
1817  %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1818  %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1819  %5  = icmp slt <64 x i8> %3, %4
1820  %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1821  %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1822  %8  = icmp slt <64 x i8> %6, %7
1823  %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1824  %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1825  %11 = icmp slt <64 x i8> %9, %10
1826  %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1827  %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1828  %14 = icmp slt <64 x i8> %12, %13
1829  %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1830  %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1831  %17 = icmp slt <64 x i8> %15, %16
1832  %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1833  %19 = extractelement <64 x i8> %18, i32 0
1834  ret i8 %19
1835}
1836
1837;
1838; Partial Vector Reductions
1839;
1840
1841define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
1842; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
1843; X86-SSE2:       ## %bb.0:
1844; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1845; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
1846; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1847; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
1848; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1849; X86-SSE2-NEXT:    psrld $16, %xmm1
1850; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
1851; X86-SSE2-NEXT:    movd %xmm1, %eax
1852; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1853; X86-SSE2-NEXT:    retl
1854;
1855; X86-SSE42-LABEL: test_reduce_v16i16_v8i16:
1856; X86-SSE42:       ## %bb.0:
1857; X86-SSE42-NEXT:    pxor LCPI12_0, %xmm0
1858; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1859; X86-SSE42-NEXT:    movd %xmm0, %eax
1860; X86-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
1861; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1862; X86-SSE42-NEXT:    retl
1863;
1864; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
1865; X86-AVX:       ## %bb.0:
1866; X86-AVX-NEXT:    vpxor LCPI12_0, %xmm0, %xmm0
1867; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1868; X86-AVX-NEXT:    vmovd %xmm0, %eax
1869; X86-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
1870; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1871; X86-AVX-NEXT:    vzeroupper
1872; X86-AVX-NEXT:    retl
1873;
1874; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
1875; X64-SSE2:       ## %bb.0:
1876; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1877; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
1878; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1879; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
1880; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1881; X64-SSE2-NEXT:    psrld $16, %xmm1
1882; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
1883; X64-SSE2-NEXT:    movd %xmm1, %eax
1884; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1885; X64-SSE2-NEXT:    retq
1886;
1887; X64-SSE42-LABEL: test_reduce_v16i16_v8i16:
1888; X64-SSE42:       ## %bb.0:
1889; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
1890; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1891; X64-SSE42-NEXT:    movd %xmm0, %eax
1892; X64-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
1893; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1894; X64-SSE42-NEXT:    retq
1895;
1896; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
1897; X64-AVX:       ## %bb.0:
1898; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1899; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1900; X64-AVX-NEXT:    vmovd %xmm0, %eax
1901; X64-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
1902; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1903; X64-AVX-NEXT:    vzeroupper
1904; X64-AVX-NEXT:    retq
1905  %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1906  %2  = icmp slt <16 x i16> %a0, %1
1907  %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
1908  %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1909  %5  = icmp slt <16 x i16> %3, %4
1910  %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
1911  %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1912  %8  = icmp slt <16 x i16> %6, %7
1913  %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
1914  %10 = extractelement <16 x i16> %9, i32 0
1915  ret i16 %10
1916}
1917
1918define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
1919; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
1920; X86-SSE2:       ## %bb.0:
1921; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1922; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
1923; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1924; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
1925; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1926; X86-SSE2-NEXT:    psrld $16, %xmm1
1927; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
1928; X86-SSE2-NEXT:    movd %xmm1, %eax
1929; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1930; X86-SSE2-NEXT:    retl
1931;
1932; X86-SSE42-LABEL: test_reduce_v32i16_v8i16:
1933; X86-SSE42:       ## %bb.0:
1934; X86-SSE42-NEXT:    pxor LCPI13_0, %xmm0
1935; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1936; X86-SSE42-NEXT:    movd %xmm0, %eax
1937; X86-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
1938; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1939; X86-SSE42-NEXT:    retl
1940;
1941; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
1942; X86-AVX:       ## %bb.0:
1943; X86-AVX-NEXT:    vpxor LCPI13_0, %xmm0, %xmm0
1944; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1945; X86-AVX-NEXT:    vmovd %xmm0, %eax
1946; X86-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
1947; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1948; X86-AVX-NEXT:    vzeroupper
1949; X86-AVX-NEXT:    retl
1950;
1951; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
1952; X64-SSE2:       ## %bb.0:
1953; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1954; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
1955; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1956; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
1957; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
1958; X64-SSE2-NEXT:    psrld $16, %xmm1
1959; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
1960; X64-SSE2-NEXT:    movd %xmm1, %eax
1961; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
1962; X64-SSE2-NEXT:    retq
1963;
1964; X64-SSE42-LABEL: test_reduce_v32i16_v8i16:
1965; X64-SSE42:       ## %bb.0:
1966; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
1967; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
1968; X64-SSE42-NEXT:    movd %xmm0, %eax
1969; X64-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
1970; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
1971; X64-SSE42-NEXT:    retq
1972;
1973; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
1974; X64-AVX:       ## %bb.0:
1975; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
1976; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
1977; X64-AVX-NEXT:    vmovd %xmm0, %eax
1978; X64-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
1979; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
1980; X64-AVX-NEXT:    vzeroupper
1981; X64-AVX-NEXT:    retq
1982  %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1983  %2  = icmp slt <32 x i16> %a0, %1
1984  %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1985  %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1986  %5  = icmp slt <32 x i16> %3, %4
1987  %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1988  %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1989  %8  = icmp slt <32 x i16> %6, %7
1990  %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1991  %10 = extractelement <32 x i16> %9, i32 0
1992  ret i16 %10
1993}
1994
1995define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
1996; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
1997; X86-SSE2:       ## %bb.0:
1998; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1999; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
2000; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
2001; X86-SSE2-NEXT:    pand %xmm2, %xmm0
2002; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
2003; X86-SSE2-NEXT:    por %xmm0, %xmm2
2004; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
2005; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2006; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
2007; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2008; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
2009; X86-SSE2-NEXT:    por %xmm2, %xmm1
2010; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
2011; X86-SSE2-NEXT:    psrld $16, %xmm0
2012; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
2013; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
2014; X86-SSE2-NEXT:    pand %xmm2, %xmm1
2015; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
2016; X86-SSE2-NEXT:    por %xmm1, %xmm2
2017; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
2018; X86-SSE2-NEXT:    psrlw $8, %xmm0
2019; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2020; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
2021; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2022; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
2023; X86-SSE2-NEXT:    por %xmm2, %xmm1
2024; X86-SSE2-NEXT:    movd %xmm1, %eax
2025; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2026; X86-SSE2-NEXT:    retl
2027;
2028; X86-SSE42-LABEL: test_reduce_v32i8_v16i8:
2029; X86-SSE42:       ## %bb.0:
2030; X86-SSE42-NEXT:    pxor LCPI14_0, %xmm0
2031; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
2032; X86-SSE42-NEXT:    psrlw $8, %xmm1
2033; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
2034; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2035; X86-SSE42-NEXT:    movd %xmm0, %eax
2036; X86-SSE42-NEXT:    xorb $-128, %al
2037; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2038; X86-SSE42-NEXT:    retl
2039;
2040; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
2041; X86-AVX:       ## %bb.0:
2042; X86-AVX-NEXT:    vpxor LCPI14_0, %xmm0, %xmm0
2043; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2044; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2045; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2046; X86-AVX-NEXT:    vmovd %xmm0, %eax
2047; X86-AVX-NEXT:    xorb $-128, %al
2048; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2049; X86-AVX-NEXT:    vzeroupper
2050; X86-AVX-NEXT:    retl
2051;
2052; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
2053; X64-SSE2:       ## %bb.0:
2054; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2055; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
2056; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
2057; X64-SSE2-NEXT:    pand %xmm2, %xmm0
2058; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
2059; X64-SSE2-NEXT:    por %xmm0, %xmm2
2060; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
2061; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2062; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
2063; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2064; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
2065; X64-SSE2-NEXT:    por %xmm2, %xmm1
2066; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
2067; X64-SSE2-NEXT:    psrld $16, %xmm0
2068; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
2069; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
2070; X64-SSE2-NEXT:    pand %xmm2, %xmm1
2071; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
2072; X64-SSE2-NEXT:    por %xmm1, %xmm2
2073; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
2074; X64-SSE2-NEXT:    psrlw $8, %xmm0
2075; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2076; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
2077; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2078; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
2079; X64-SSE2-NEXT:    por %xmm2, %xmm1
2080; X64-SSE2-NEXT:    movd %xmm1, %eax
2081; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2082; X64-SSE2-NEXT:    retq
2083;
2084; X64-SSE42-LABEL: test_reduce_v32i8_v16i8:
2085; X64-SSE42:       ## %bb.0:
2086; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
2087; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
2088; X64-SSE42-NEXT:    psrlw $8, %xmm1
2089; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
2090; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2091; X64-SSE42-NEXT:    movd %xmm0, %eax
2092; X64-SSE42-NEXT:    xorb $-128, %al
2093; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2094; X64-SSE42-NEXT:    retq
2095;
2096; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
2097; X64-AVX:       ## %bb.0:
2098; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
2099; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2100; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2101; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2102; X64-AVX-NEXT:    vmovd %xmm0, %eax
2103; X64-AVX-NEXT:    xorb $-128, %al
2104; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2105; X64-AVX-NEXT:    vzeroupper
2106; X64-AVX-NEXT:    retq
2107  %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2108  %2  = icmp slt <32 x i8> %a0, %1
2109  %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
2110  %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2111  %5  = icmp slt <32 x i8> %3, %4
2112  %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
2113  %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2114  %8  = icmp slt <32 x i8> %6, %7
2115  %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
2116  %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2117  %11 = icmp slt <32 x i8> %9, %10
2118  %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
2119  %13 = extractelement <32 x i8> %12, i32 0
2120  ret i8 %13
2121}
2122
2123define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
2124; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
2125; X86-SSE2:       ## %bb.0:
2126; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2127; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
2128; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
2129; X86-SSE2-NEXT:    pand %xmm2, %xmm0
2130; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
2131; X86-SSE2-NEXT:    por %xmm0, %xmm2
2132; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
2133; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2134; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
2135; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2136; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
2137; X86-SSE2-NEXT:    por %xmm2, %xmm1
2138; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
2139; X86-SSE2-NEXT:    psrld $16, %xmm0
2140; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
2141; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
2142; X86-SSE2-NEXT:    pand %xmm2, %xmm1
2143; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
2144; X86-SSE2-NEXT:    por %xmm1, %xmm2
2145; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
2146; X86-SSE2-NEXT:    psrlw $8, %xmm0
2147; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2148; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
2149; X86-SSE2-NEXT:    pand %xmm1, %xmm2
2150; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
2151; X86-SSE2-NEXT:    por %xmm2, %xmm1
2152; X86-SSE2-NEXT:    movd %xmm1, %eax
2153; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2154; X86-SSE2-NEXT:    retl
2155;
2156; X86-SSE42-LABEL: test_reduce_v64i8_v16i8:
2157; X86-SSE42:       ## %bb.0:
2158; X86-SSE42-NEXT:    pxor LCPI15_0, %xmm0
2159; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
2160; X86-SSE42-NEXT:    psrlw $8, %xmm1
2161; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
2162; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2163; X86-SSE42-NEXT:    movd %xmm0, %eax
2164; X86-SSE42-NEXT:    xorb $-128, %al
2165; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2166; X86-SSE42-NEXT:    retl
2167;
2168; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
2169; X86-AVX:       ## %bb.0:
2170; X86-AVX-NEXT:    vpxor LCPI15_0, %xmm0, %xmm0
2171; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2172; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2173; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2174; X86-AVX-NEXT:    vmovd %xmm0, %eax
2175; X86-AVX-NEXT:    xorb $-128, %al
2176; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2177; X86-AVX-NEXT:    vzeroupper
2178; X86-AVX-NEXT:    retl
2179;
2180; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
2181; X64-SSE2:       ## %bb.0:
2182; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2183; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
2184; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
2185; X64-SSE2-NEXT:    pand %xmm2, %xmm0
2186; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
2187; X64-SSE2-NEXT:    por %xmm0, %xmm2
2188; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
2189; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2190; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
2191; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2192; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
2193; X64-SSE2-NEXT:    por %xmm2, %xmm1
2194; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
2195; X64-SSE2-NEXT:    psrld $16, %xmm0
2196; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
2197; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
2198; X64-SSE2-NEXT:    pand %xmm2, %xmm1
2199; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
2200; X64-SSE2-NEXT:    por %xmm1, %xmm2
2201; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
2202; X64-SSE2-NEXT:    psrlw $8, %xmm0
2203; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
2204; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
2205; X64-SSE2-NEXT:    pand %xmm1, %xmm2
2206; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
2207; X64-SSE2-NEXT:    por %xmm2, %xmm1
2208; X64-SSE2-NEXT:    movd %xmm1, %eax
2209; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
2210; X64-SSE2-NEXT:    retq
2211;
2212; X64-SSE42-LABEL: test_reduce_v64i8_v16i8:
2213; X64-SSE42:       ## %bb.0:
2214; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
2215; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
2216; X64-SSE42-NEXT:    psrlw $8, %xmm1
2217; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
2218; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
2219; X64-SSE42-NEXT:    movd %xmm0, %eax
2220; X64-SSE42-NEXT:    xorb $-128, %al
2221; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
2222; X64-SSE42-NEXT:    retq
2223;
2224; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
2225; X64-AVX:       ## %bb.0:
2226; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
2227; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
2228; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
2229; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
2230; X64-AVX-NEXT:    vmovd %xmm0, %eax
2231; X64-AVX-NEXT:    xorb $-128, %al
2232; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
2233; X64-AVX-NEXT:    vzeroupper
2234; X64-AVX-NEXT:    retq
2235  %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2236  %2  = icmp slt <64 x i8> %a0, %1
2237  %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
2238  %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2239  %5  = icmp slt <64 x i8> %3, %4
2240  %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
2241  %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2242  %8  = icmp slt <64 x i8> %6, %7
2243  %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
2244  %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2245  %11 = icmp slt <64 x i8> %9, %10
2246  %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
2247  %13 = extractelement <64 x i8> %12, i32 0
2248  ret i8 %13
2249}
2250