1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42 4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2 10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 11 12; 13; 128-bit Vectors 14; 15 16define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17; X86-SSE2-LABEL: test_reduce_v2i64: 18; X86-SSE2: ## %bb.0: 19; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 25; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29; X86-SSE2-NEXT: pand %xmm5, %xmm2 30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31; X86-SSE2-NEXT: por %xmm2, %xmm3 32; X86-SSE2-NEXT: pand %xmm3, %xmm0 33; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34; X86-SSE2-NEXT: por %xmm0, %xmm3 35; X86-SSE2-NEXT: movd %xmm3, %eax 36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 37; X86-SSE2-NEXT: movd %xmm0, %edx 38; X86-SSE2-NEXT: retl 39; 40; X86-SSE42-LABEL: test_reduce_v2i64: 41; X86-SSE42: ## %bb.0: 42; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 44; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 45; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 46; X86-SSE42-NEXT: movd %xmm2, %eax 47; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 48; X86-SSE42-NEXT: retl 49; 50; X86-AVX-LABEL: test_reduce_v2i64: 51; X86-AVX: ## %bb.0: 52; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 53; X86-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 54; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 55; X86-AVX-NEXT: vmovd %xmm0, %eax 56; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx 57; X86-AVX-NEXT: retl 58; 59; X64-SSE2-LABEL: test_reduce_v2i64: 60; X64-SSE2: ## %bb.0: 61; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 62; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 63; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 64; X64-SSE2-NEXT: pxor %xmm2, %xmm3 65; X64-SSE2-NEXT: pxor %xmm1, %xmm2 66; X64-SSE2-NEXT: movdqa %xmm3, %xmm4 67; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 68; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 69; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 70; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 71; X64-SSE2-NEXT: pand %xmm5, %xmm2 72; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 73; X64-SSE2-NEXT: por %xmm2, %xmm3 74; X64-SSE2-NEXT: pand %xmm3, %xmm0 75; X64-SSE2-NEXT: pandn %xmm1, %xmm3 76; X64-SSE2-NEXT: por %xmm0, %xmm3 77; X64-SSE2-NEXT: movq %xmm3, %rax 78; X64-SSE2-NEXT: retq 79; 80; X64-SSE42-LABEL: test_reduce_v2i64: 81; X64-SSE42: ## %bb.0: 82; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 83; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 84; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 85; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 86; X64-SSE42-NEXT: movq %xmm2, %rax 87; X64-SSE42-NEXT: retq 88; 89; X64-AVX1-LABEL: test_reduce_v2i64: 90; X64-AVX1: ## %bb.0: 91; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 92; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 93; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 94; X64-AVX1-NEXT: vmovq %xmm0, %rax 95; X64-AVX1-NEXT: retq 96; 97; X64-AVX2-LABEL: test_reduce_v2i64: 98; X64-AVX2: ## %bb.0: 99; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 100; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 101; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 102; X64-AVX2-NEXT: vmovq %xmm0, %rax 103; X64-AVX2-NEXT: retq 104; 105; X64-AVX512-LABEL: test_reduce_v2i64: 106; X64-AVX512: ## %bb.0: 107; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 108; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 109; X64-AVX512-NEXT: vmovq %xmm0, %rax 110; X64-AVX512-NEXT: retq 111 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 112 %2 = icmp sgt <2 x i64> %a0, %1 113 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 114 %4 = extractelement <2 x i64> %3, i32 0 115 ret i64 %4 116} 117 118define i32 @test_reduce_v4i32(<4 x i32> %a0) { 119; X86-SSE2-LABEL: test_reduce_v4i32: 120; X86-SSE2: ## %bb.0: 121; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 122; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 123; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 124; X86-SSE2-NEXT: pand %xmm2, %xmm0 125; X86-SSE2-NEXT: pandn %xmm1, %xmm2 126; X86-SSE2-NEXT: por %xmm0, %xmm2 127; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 128; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 129; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 130; X86-SSE2-NEXT: pand %xmm1, %xmm2 131; X86-SSE2-NEXT: pandn %xmm0, %xmm1 132; X86-SSE2-NEXT: por %xmm2, %xmm1 133; X86-SSE2-NEXT: movd %xmm1, %eax 134; X86-SSE2-NEXT: retl 135; 136; X86-SSE42-LABEL: test_reduce_v4i32: 137; X86-SSE42: ## %bb.0: 138; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 139; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 140; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 141; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 142; X86-SSE42-NEXT: movd %xmm0, %eax 143; X86-SSE42-NEXT: retl 144; 145; X86-AVX-LABEL: test_reduce_v4i32: 146; X86-AVX: ## %bb.0: 147; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 148; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 149; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 150; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 151; X86-AVX-NEXT: vmovd %xmm0, %eax 152; X86-AVX-NEXT: retl 153; 154; X64-SSE2-LABEL: test_reduce_v4i32: 155; X64-SSE2: ## %bb.0: 156; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 157; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 158; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 159; X64-SSE2-NEXT: pand %xmm2, %xmm0 160; X64-SSE2-NEXT: pandn %xmm1, %xmm2 161; X64-SSE2-NEXT: por %xmm0, %xmm2 162; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 163; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 164; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 165; X64-SSE2-NEXT: pand %xmm1, %xmm2 166; X64-SSE2-NEXT: pandn %xmm0, %xmm1 167; X64-SSE2-NEXT: por %xmm2, %xmm1 168; X64-SSE2-NEXT: movd %xmm1, %eax 169; X64-SSE2-NEXT: retq 170; 171; X64-SSE42-LABEL: test_reduce_v4i32: 172; X64-SSE42: ## %bb.0: 173; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 174; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 175; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 176; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 177; X64-SSE42-NEXT: movd %xmm0, %eax 178; X64-SSE42-NEXT: retq 179; 180; X64-AVX-LABEL: test_reduce_v4i32: 181; X64-AVX: ## %bb.0: 182; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 183; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 184; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 185; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 186; X64-AVX-NEXT: vmovd %xmm0, %eax 187; X64-AVX-NEXT: retq 188 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 189 %2 = icmp sgt <4 x i32> %a0, %1 190 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 191 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 192 %5 = icmp sgt <4 x i32> %3, %4 193 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 194 %7 = extractelement <4 x i32> %6, i32 0 195 ret i32 %7 196} 197 198define i16 @test_reduce_v8i16(<8 x i16> %a0) { 199; X86-SSE2-LABEL: test_reduce_v8i16: 200; X86-SSE2: ## %bb.0: 201; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 202; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 203; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 204; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 205; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 206; X86-SSE2-NEXT: psrld $16, %xmm1 207; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 208; X86-SSE2-NEXT: movd %xmm1, %eax 209; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 210; X86-SSE2-NEXT: retl 211; 212; X86-SSE42-LABEL: test_reduce_v8i16: 213; X86-SSE42: ## %bb.0: 214; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0 215; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 216; X86-SSE42-NEXT: movd %xmm0, %eax 217; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 218; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 219; X86-SSE42-NEXT: retl 220; 221; X86-AVX-LABEL: test_reduce_v8i16: 222; X86-AVX: ## %bb.0: 223; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0 224; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 225; X86-AVX-NEXT: vmovd %xmm0, %eax 226; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 227; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 228; X86-AVX-NEXT: retl 229; 230; X64-SSE2-LABEL: test_reduce_v8i16: 231; X64-SSE2: ## %bb.0: 232; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 233; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 234; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 235; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 236; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 237; X64-SSE2-NEXT: psrld $16, %xmm1 238; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 239; X64-SSE2-NEXT: movd %xmm1, %eax 240; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 241; X64-SSE2-NEXT: retq 242; 243; X64-SSE42-LABEL: test_reduce_v8i16: 244; X64-SSE42: ## %bb.0: 245; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 246; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 247; X64-SSE42-NEXT: movd %xmm0, %eax 248; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 249; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 250; X64-SSE42-NEXT: retq 251; 252; X64-AVX-LABEL: test_reduce_v8i16: 253; X64-AVX: ## %bb.0: 254; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 255; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 256; X64-AVX-NEXT: vmovd %xmm0, %eax 257; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 258; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 259; X64-AVX-NEXT: retq 260 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 261 %2 = icmp sgt <8 x i16> %a0, %1 262 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 263 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 264 %5 = icmp sgt <8 x i16> %3, %4 265 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 266 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 267 %8 = icmp sgt <8 x i16> %6, %7 268 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 269 %10 = extractelement <8 x i16> %9, i32 0 270 ret i16 %10 271} 272 273define i8 @test_reduce_v16i8(<16 x i8> %a0) { 274; X86-SSE2-LABEL: test_reduce_v16i8: 275; X86-SSE2: ## %bb.0: 276; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 277; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 278; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 279; X86-SSE2-NEXT: pand %xmm2, %xmm0 280; X86-SSE2-NEXT: pandn %xmm1, %xmm2 281; X86-SSE2-NEXT: por %xmm0, %xmm2 282; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 283; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 284; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 285; X86-SSE2-NEXT: pand %xmm1, %xmm2 286; X86-SSE2-NEXT: pandn %xmm0, %xmm1 287; X86-SSE2-NEXT: por %xmm2, %xmm1 288; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 289; X86-SSE2-NEXT: psrld $16, %xmm0 290; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 291; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 292; X86-SSE2-NEXT: pand %xmm2, %xmm1 293; X86-SSE2-NEXT: pandn %xmm0, %xmm2 294; X86-SSE2-NEXT: por %xmm1, %xmm2 295; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 296; X86-SSE2-NEXT: psrlw $8, %xmm0 297; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 298; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 299; X86-SSE2-NEXT: pand %xmm1, %xmm2 300; X86-SSE2-NEXT: pandn %xmm0, %xmm1 301; X86-SSE2-NEXT: por %xmm2, %xmm1 302; X86-SSE2-NEXT: movd %xmm1, %eax 303; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 304; X86-SSE2-NEXT: retl 305; 306; X86-SSE42-LABEL: test_reduce_v16i8: 307; X86-SSE42: ## %bb.0: 308; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0 309; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 310; X86-SSE42-NEXT: psrlw $8, %xmm1 311; X86-SSE42-NEXT: pminub %xmm0, %xmm1 312; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 313; X86-SSE42-NEXT: movd %xmm0, %eax 314; X86-SSE42-NEXT: xorb $127, %al 315; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 316; X86-SSE42-NEXT: retl 317; 318; X86-AVX-LABEL: test_reduce_v16i8: 319; X86-AVX: ## %bb.0: 320; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0 321; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 322; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 323; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 324; X86-AVX-NEXT: vmovd %xmm0, %eax 325; X86-AVX-NEXT: xorb $127, %al 326; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 327; X86-AVX-NEXT: retl 328; 329; X64-SSE2-LABEL: test_reduce_v16i8: 330; X64-SSE2: ## %bb.0: 331; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 332; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 333; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 334; X64-SSE2-NEXT: pand %xmm2, %xmm0 335; X64-SSE2-NEXT: pandn %xmm1, %xmm2 336; X64-SSE2-NEXT: por %xmm0, %xmm2 337; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 338; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 339; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 340; X64-SSE2-NEXT: pand %xmm1, %xmm2 341; X64-SSE2-NEXT: pandn %xmm0, %xmm1 342; X64-SSE2-NEXT: por %xmm2, %xmm1 343; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 344; X64-SSE2-NEXT: psrld $16, %xmm0 345; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 346; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 347; X64-SSE2-NEXT: pand %xmm2, %xmm1 348; X64-SSE2-NEXT: pandn %xmm0, %xmm2 349; X64-SSE2-NEXT: por %xmm1, %xmm2 350; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 351; X64-SSE2-NEXT: psrlw $8, %xmm0 352; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 353; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 354; X64-SSE2-NEXT: pand %xmm1, %xmm2 355; X64-SSE2-NEXT: pandn %xmm0, %xmm1 356; X64-SSE2-NEXT: por %xmm2, %xmm1 357; X64-SSE2-NEXT: movd %xmm1, %eax 358; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 359; X64-SSE2-NEXT: retq 360; 361; X64-SSE42-LABEL: test_reduce_v16i8: 362; X64-SSE42: ## %bb.0: 363; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 364; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 365; X64-SSE42-NEXT: psrlw $8, %xmm1 366; X64-SSE42-NEXT: pminub %xmm0, %xmm1 367; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 368; X64-SSE42-NEXT: movd %xmm0, %eax 369; X64-SSE42-NEXT: xorb $127, %al 370; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 371; X64-SSE42-NEXT: retq 372; 373; X64-AVX-LABEL: test_reduce_v16i8: 374; X64-AVX: ## %bb.0: 375; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 376; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 377; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 378; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 379; X64-AVX-NEXT: vmovd %xmm0, %eax 380; X64-AVX-NEXT: xorb $127, %al 381; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 382; X64-AVX-NEXT: retq 383 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 384 %2 = icmp sgt <16 x i8> %a0, %1 385 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 386 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 387 %5 = icmp sgt <16 x i8> %3, %4 388 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 389 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 390 %8 = icmp sgt <16 x i8> %6, %7 391 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 392 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 393 %11 = icmp sgt <16 x i8> %9, %10 394 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 395 %13 = extractelement <16 x i8> %12, i32 0 396 ret i8 %13 397} 398 399; 400; 256-bit Vectors 401; 402 403define i64 @test_reduce_v4i64(<4 x i64> %a0) { 404; X86-SSE2-LABEL: test_reduce_v4i64: 405; X86-SSE2: ## %bb.0: 406; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 407; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 408; X86-SSE2-NEXT: pxor %xmm2, %xmm3 409; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 410; X86-SSE2-NEXT: pxor %xmm2, %xmm4 411; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 412; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 413; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 414; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 415; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 416; X86-SSE2-NEXT: pand %xmm6, %xmm3 417; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 418; X86-SSE2-NEXT: por %xmm3, %xmm4 419; X86-SSE2-NEXT: pand %xmm4, %xmm0 420; X86-SSE2-NEXT: pandn %xmm1, %xmm4 421; X86-SSE2-NEXT: por %xmm0, %xmm4 422; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 423; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 424; X86-SSE2-NEXT: pxor %xmm2, %xmm1 425; X86-SSE2-NEXT: pxor %xmm0, %xmm2 426; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 427; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 428; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 429; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 430; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 431; X86-SSE2-NEXT: pand %xmm5, %xmm1 432; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 433; X86-SSE2-NEXT: por %xmm1, %xmm2 434; X86-SSE2-NEXT: pand %xmm2, %xmm4 435; X86-SSE2-NEXT: pandn %xmm0, %xmm2 436; X86-SSE2-NEXT: por %xmm4, %xmm2 437; X86-SSE2-NEXT: movd %xmm2, %eax 438; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 439; X86-SSE2-NEXT: movd %xmm0, %edx 440; X86-SSE2-NEXT: retl 441; 442; X86-SSE42-LABEL: test_reduce_v4i64: 443; X86-SSE42: ## %bb.0: 444; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 445; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 446; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 447; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 448; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 449; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 450; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 451; X86-SSE42-NEXT: movd %xmm2, %eax 452; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 453; X86-SSE42-NEXT: retl 454; 455; X86-AVX1-LABEL: test_reduce_v4i64: 456; X86-AVX1: ## %bb.0: 457; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 458; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 459; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 460; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 461; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 462; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 463; X86-AVX1-NEXT: vmovd %xmm0, %eax 464; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 465; X86-AVX1-NEXT: vzeroupper 466; X86-AVX1-NEXT: retl 467; 468; X86-AVX2-LABEL: test_reduce_v4i64: 469; X86-AVX2: ## %bb.0: 470; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 471; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 472; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 473; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 474; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 475; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 476; X86-AVX2-NEXT: vmovd %xmm0, %eax 477; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 478; X86-AVX2-NEXT: vzeroupper 479; X86-AVX2-NEXT: retl 480; 481; X64-SSE2-LABEL: test_reduce_v4i64: 482; X64-SSE2: ## %bb.0: 483; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 484; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 485; X64-SSE2-NEXT: pxor %xmm2, %xmm3 486; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 487; X64-SSE2-NEXT: pxor %xmm2, %xmm4 488; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 489; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 490; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 491; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 492; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 493; X64-SSE2-NEXT: pand %xmm6, %xmm3 494; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 495; X64-SSE2-NEXT: por %xmm3, %xmm4 496; X64-SSE2-NEXT: pand %xmm4, %xmm0 497; X64-SSE2-NEXT: pandn %xmm1, %xmm4 498; X64-SSE2-NEXT: por %xmm0, %xmm4 499; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 500; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 501; X64-SSE2-NEXT: pxor %xmm2, %xmm1 502; X64-SSE2-NEXT: pxor %xmm0, %xmm2 503; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 504; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 505; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 506; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 507; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 508; X64-SSE2-NEXT: pand %xmm5, %xmm1 509; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 510; X64-SSE2-NEXT: por %xmm1, %xmm2 511; X64-SSE2-NEXT: pand %xmm2, %xmm4 512; X64-SSE2-NEXT: pandn %xmm0, %xmm2 513; X64-SSE2-NEXT: por %xmm4, %xmm2 514; X64-SSE2-NEXT: movq %xmm2, %rax 515; X64-SSE2-NEXT: retq 516; 517; X64-SSE42-LABEL: test_reduce_v4i64: 518; X64-SSE42: ## %bb.0: 519; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 520; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 521; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 522; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 523; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 524; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 525; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 526; X64-SSE42-NEXT: movq %xmm2, %rax 527; X64-SSE42-NEXT: retq 528; 529; X64-AVX1-LABEL: test_reduce_v4i64: 530; X64-AVX1: ## %bb.0: 531; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 532; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 533; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 534; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 535; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 536; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 537; X64-AVX1-NEXT: vmovq %xmm0, %rax 538; X64-AVX1-NEXT: vzeroupper 539; X64-AVX1-NEXT: retq 540; 541; X64-AVX2-LABEL: test_reduce_v4i64: 542; X64-AVX2: ## %bb.0: 543; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 544; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 545; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 546; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 547; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 548; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 549; X64-AVX2-NEXT: vmovq %xmm0, %rax 550; X64-AVX2-NEXT: vzeroupper 551; X64-AVX2-NEXT: retq 552; 553; X64-AVX512-LABEL: test_reduce_v4i64: 554; X64-AVX512: ## %bb.0: 555; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 556; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 557; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 558; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 559; X64-AVX512-NEXT: vmovq %xmm0, %rax 560; X64-AVX512-NEXT: vzeroupper 561; X64-AVX512-NEXT: retq 562 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 563 %2 = icmp sgt <4 x i64> %a0, %1 564 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 565 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 566 %5 = icmp sgt <4 x i64> %3, %4 567 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 568 %7 = extractelement <4 x i64> %6, i32 0 569 ret i64 %7 570} 571 572define i32 @test_reduce_v8i32(<8 x i32> %a0) { 573; X86-SSE2-LABEL: test_reduce_v8i32: 574; X86-SSE2: ## %bb.0: 575; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 576; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 577; X86-SSE2-NEXT: pand %xmm2, %xmm0 578; X86-SSE2-NEXT: pandn %xmm1, %xmm2 579; X86-SSE2-NEXT: por %xmm0, %xmm2 580; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 581; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 582; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 583; X86-SSE2-NEXT: pand %xmm1, %xmm2 584; X86-SSE2-NEXT: pandn %xmm0, %xmm1 585; X86-SSE2-NEXT: por %xmm2, %xmm1 586; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 587; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 588; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 589; X86-SSE2-NEXT: pand %xmm2, %xmm1 590; X86-SSE2-NEXT: pandn %xmm0, %xmm2 591; X86-SSE2-NEXT: por %xmm1, %xmm2 592; X86-SSE2-NEXT: movd %xmm2, %eax 593; X86-SSE2-NEXT: retl 594; 595; X86-SSE42-LABEL: test_reduce_v8i32: 596; X86-SSE42: ## %bb.0: 597; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 598; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 599; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 600; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 601; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 602; X86-SSE42-NEXT: movd %xmm0, %eax 603; X86-SSE42-NEXT: retl 604; 605; X86-AVX1-LABEL: test_reduce_v8i32: 606; X86-AVX1: ## %bb.0: 607; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 608; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 609; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 610; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 611; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 612; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 613; X86-AVX1-NEXT: vmovd %xmm0, %eax 614; X86-AVX1-NEXT: vzeroupper 615; X86-AVX1-NEXT: retl 616; 617; X86-AVX2-LABEL: test_reduce_v8i32: 618; X86-AVX2: ## %bb.0: 619; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 620; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 621; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 622; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 623; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 624; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 625; X86-AVX2-NEXT: vmovd %xmm0, %eax 626; X86-AVX2-NEXT: vzeroupper 627; X86-AVX2-NEXT: retl 628; 629; X64-SSE2-LABEL: test_reduce_v8i32: 630; X64-SSE2: ## %bb.0: 631; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 632; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 633; X64-SSE2-NEXT: pand %xmm2, %xmm0 634; X64-SSE2-NEXT: pandn %xmm1, %xmm2 635; X64-SSE2-NEXT: por %xmm0, %xmm2 636; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 637; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 638; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 639; X64-SSE2-NEXT: pand %xmm1, %xmm2 640; X64-SSE2-NEXT: pandn %xmm0, %xmm1 641; X64-SSE2-NEXT: por %xmm2, %xmm1 642; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 643; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 644; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 645; X64-SSE2-NEXT: pand %xmm2, %xmm1 646; X64-SSE2-NEXT: pandn %xmm0, %xmm2 647; X64-SSE2-NEXT: por %xmm1, %xmm2 648; X64-SSE2-NEXT: movd %xmm2, %eax 649; X64-SSE2-NEXT: retq 650; 651; X64-SSE42-LABEL: test_reduce_v8i32: 652; X64-SSE42: ## %bb.0: 653; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 654; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 655; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 656; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 657; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 658; X64-SSE42-NEXT: movd %xmm0, %eax 659; X64-SSE42-NEXT: retq 660; 661; X64-AVX1-LABEL: test_reduce_v8i32: 662; X64-AVX1: ## %bb.0: 663; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 664; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 665; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 666; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 667; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 668; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 669; X64-AVX1-NEXT: vmovd %xmm0, %eax 670; X64-AVX1-NEXT: vzeroupper 671; X64-AVX1-NEXT: retq 672; 673; X64-AVX2-LABEL: test_reduce_v8i32: 674; X64-AVX2: ## %bb.0: 675; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 676; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 677; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 678; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 679; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 680; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 681; X64-AVX2-NEXT: vmovd %xmm0, %eax 682; X64-AVX2-NEXT: vzeroupper 683; X64-AVX2-NEXT: retq 684; 685; X64-AVX512-LABEL: test_reduce_v8i32: 686; X64-AVX512: ## %bb.0: 687; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 688; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 689; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 690; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 691; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 692; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 693; X64-AVX512-NEXT: vmovd %xmm0, %eax 694; X64-AVX512-NEXT: vzeroupper 695; X64-AVX512-NEXT: retq 696 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 697 %2 = icmp sgt <8 x i32> %a0, %1 698 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 699 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 700 %5 = icmp sgt <8 x i32> %3, %4 701 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 702 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 703 %8 = icmp sgt <8 x i32> %6, %7 704 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 705 %10 = extractelement <8 x i32> %9, i32 0 706 ret i32 %10 707} 708 709define i16 @test_reduce_v16i16(<16 x i16> %a0) { 710; X86-SSE2-LABEL: test_reduce_v16i16: 711; X86-SSE2: ## %bb.0: 712; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 713; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 714; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 715; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 716; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 717; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 718; X86-SSE2-NEXT: psrld $16, %xmm1 719; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 720; X86-SSE2-NEXT: movd %xmm1, %eax 721; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 722; X86-SSE2-NEXT: retl 723; 724; X86-SSE42-LABEL: test_reduce_v16i16: 725; X86-SSE42: ## %bb.0: 726; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0 727; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0 728; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 729; X86-SSE42-NEXT: movd %xmm0, %eax 730; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 731; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 732; X86-SSE42-NEXT: retl 733; 734; X86-AVX1-LABEL: test_reduce_v16i16: 735; X86-AVX1: ## %bb.0: 736; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 737; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 738; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0 739; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 740; X86-AVX1-NEXT: vmovd %xmm0, %eax 741; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF 742; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 743; X86-AVX1-NEXT: vzeroupper 744; X86-AVX1-NEXT: retl 745; 746; X86-AVX2-LABEL: test_reduce_v16i16: 747; X86-AVX2: ## %bb.0: 748; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 749; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 750; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0 751; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 752; X86-AVX2-NEXT: vmovd %xmm0, %eax 753; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 754; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 755; X86-AVX2-NEXT: vzeroupper 756; X86-AVX2-NEXT: retl 757; 758; X64-SSE2-LABEL: test_reduce_v16i16: 759; X64-SSE2: ## %bb.0: 760; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 761; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 762; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 763; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 764; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 765; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 766; X64-SSE2-NEXT: psrld $16, %xmm1 767; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 768; X64-SSE2-NEXT: movd %xmm1, %eax 769; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 770; X64-SSE2-NEXT: retq 771; 772; X64-SSE42-LABEL: test_reduce_v16i16: 773; X64-SSE42: ## %bb.0: 774; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0 775; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 776; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 777; X64-SSE42-NEXT: movd %xmm0, %eax 778; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 779; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 780; X64-SSE42-NEXT: retq 781; 782; X64-AVX1-LABEL: test_reduce_v16i16: 783; X64-AVX1: ## %bb.0: 784; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 785; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 786; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 787; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 788; X64-AVX1-NEXT: vmovd %xmm0, %eax 789; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF 790; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 791; X64-AVX1-NEXT: vzeroupper 792; X64-AVX1-NEXT: retq 793; 794; X64-AVX2-LABEL: test_reduce_v16i16: 795; X64-AVX2: ## %bb.0: 796; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 797; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 798; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 799; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 800; X64-AVX2-NEXT: vmovd %xmm0, %eax 801; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 802; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 803; X64-AVX2-NEXT: vzeroupper 804; X64-AVX2-NEXT: retq 805; 806; X64-AVX512-LABEL: test_reduce_v16i16: 807; X64-AVX512: ## %bb.0: 808; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 809; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 810; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 811; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 812; X64-AVX512-NEXT: vmovd %xmm0, %eax 813; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF 814; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 815; X64-AVX512-NEXT: vzeroupper 816; X64-AVX512-NEXT: retq 817 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 818 %2 = icmp sgt <16 x i16> %a0, %1 819 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 820 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 821 %5 = icmp sgt <16 x i16> %3, %4 822 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 823 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 824 %8 = icmp sgt <16 x i16> %6, %7 825 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 826 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 827 %11 = icmp sgt <16 x i16> %9, %10 828 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 829 %13 = extractelement <16 x i16> %12, i32 0 830 ret i16 %13 831} 832 833define i8 @test_reduce_v32i8(<32 x i8> %a0) { 834; X86-SSE2-LABEL: test_reduce_v32i8: 835; X86-SSE2: ## %bb.0: 836; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 837; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 838; X86-SSE2-NEXT: pand %xmm2, %xmm0 839; X86-SSE2-NEXT: pandn %xmm1, %xmm2 840; X86-SSE2-NEXT: por %xmm0, %xmm2 841; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 842; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 843; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 844; X86-SSE2-NEXT: pand %xmm1, %xmm2 845; X86-SSE2-NEXT: pandn %xmm0, %xmm1 846; X86-SSE2-NEXT: por %xmm2, %xmm1 847; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 848; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 849; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 850; X86-SSE2-NEXT: pand %xmm2, %xmm1 851; X86-SSE2-NEXT: pandn %xmm0, %xmm2 852; X86-SSE2-NEXT: por %xmm1, %xmm2 853; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 854; X86-SSE2-NEXT: psrld $16, %xmm0 855; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 856; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 857; X86-SSE2-NEXT: pand %xmm1, %xmm2 858; X86-SSE2-NEXT: pandn %xmm0, %xmm1 859; X86-SSE2-NEXT: por %xmm2, %xmm1 860; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 861; X86-SSE2-NEXT: psrlw $8, %xmm0 862; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 863; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 864; X86-SSE2-NEXT: pand %xmm2, %xmm1 865; X86-SSE2-NEXT: pandn %xmm0, %xmm2 866; X86-SSE2-NEXT: por %xmm1, %xmm2 867; X86-SSE2-NEXT: movd %xmm2, %eax 868; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 869; X86-SSE2-NEXT: retl 870; 871; X86-SSE42-LABEL: test_reduce_v32i8: 872; X86-SSE42: ## %bb.0: 873; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0 874; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0 875; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 876; X86-SSE42-NEXT: psrlw $8, %xmm1 877; X86-SSE42-NEXT: pminub %xmm0, %xmm1 878; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 879; X86-SSE42-NEXT: movd %xmm0, %eax 880; X86-SSE42-NEXT: xorb $127, %al 881; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 882; X86-SSE42-NEXT: retl 883; 884; X86-AVX1-LABEL: test_reduce_v32i8: 885; X86-AVX1: ## %bb.0: 886; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 887; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 888; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0 889; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 890; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 891; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 892; X86-AVX1-NEXT: vmovd %xmm0, %eax 893; X86-AVX1-NEXT: xorb $127, %al 894; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 895; X86-AVX1-NEXT: vzeroupper 896; X86-AVX1-NEXT: retl 897; 898; X86-AVX2-LABEL: test_reduce_v32i8: 899; X86-AVX2: ## %bb.0: 900; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 901; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 902; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0 903; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 904; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 905; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 906; X86-AVX2-NEXT: vmovd %xmm0, %eax 907; X86-AVX2-NEXT: xorb $127, %al 908; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 909; X86-AVX2-NEXT: vzeroupper 910; X86-AVX2-NEXT: retl 911; 912; X64-SSE2-LABEL: test_reduce_v32i8: 913; X64-SSE2: ## %bb.0: 914; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 915; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 916; X64-SSE2-NEXT: pand %xmm2, %xmm0 917; X64-SSE2-NEXT: pandn %xmm1, %xmm2 918; X64-SSE2-NEXT: por %xmm0, %xmm2 919; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 920; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 921; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 922; X64-SSE2-NEXT: pand %xmm1, %xmm2 923; X64-SSE2-NEXT: pandn %xmm0, %xmm1 924; X64-SSE2-NEXT: por %xmm2, %xmm1 925; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 926; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 927; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 928; X64-SSE2-NEXT: pand %xmm2, %xmm1 929; X64-SSE2-NEXT: pandn %xmm0, %xmm2 930; X64-SSE2-NEXT: por %xmm1, %xmm2 931; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 932; X64-SSE2-NEXT: psrld $16, %xmm0 933; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 934; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 935; X64-SSE2-NEXT: pand %xmm1, %xmm2 936; X64-SSE2-NEXT: pandn %xmm0, %xmm1 937; X64-SSE2-NEXT: por %xmm2, %xmm1 938; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 939; X64-SSE2-NEXT: psrlw $8, %xmm0 940; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 941; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 942; X64-SSE2-NEXT: pand %xmm2, %xmm1 943; X64-SSE2-NEXT: pandn %xmm0, %xmm2 944; X64-SSE2-NEXT: por %xmm1, %xmm2 945; X64-SSE2-NEXT: movd %xmm2, %eax 946; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 947; X64-SSE2-NEXT: retq 948; 949; X64-SSE42-LABEL: test_reduce_v32i8: 950; X64-SSE42: ## %bb.0: 951; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0 952; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 953; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 954; X64-SSE42-NEXT: psrlw $8, %xmm1 955; X64-SSE42-NEXT: pminub %xmm0, %xmm1 956; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 957; X64-SSE42-NEXT: movd %xmm0, %eax 958; X64-SSE42-NEXT: xorb $127, %al 959; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 960; X64-SSE42-NEXT: retq 961; 962; X64-AVX1-LABEL: test_reduce_v32i8: 963; X64-AVX1: ## %bb.0: 964; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 965; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 966; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 967; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 968; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 969; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 970; X64-AVX1-NEXT: vmovd %xmm0, %eax 971; X64-AVX1-NEXT: xorb $127, %al 972; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 973; X64-AVX1-NEXT: vzeroupper 974; X64-AVX1-NEXT: retq 975; 976; X64-AVX2-LABEL: test_reduce_v32i8: 977; X64-AVX2: ## %bb.0: 978; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 979; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 980; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 981; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 982; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 983; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 984; X64-AVX2-NEXT: vmovd %xmm0, %eax 985; X64-AVX2-NEXT: xorb $127, %al 986; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 987; X64-AVX2-NEXT: vzeroupper 988; X64-AVX2-NEXT: retq 989; 990; X64-AVX512-LABEL: test_reduce_v32i8: 991; X64-AVX512: ## %bb.0: 992; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 993; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 994; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 995; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 996; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 997; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 998; X64-AVX512-NEXT: vmovd %xmm0, %eax 999; X64-AVX512-NEXT: xorb $127, %al 1000; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1001; X64-AVX512-NEXT: vzeroupper 1002; X64-AVX512-NEXT: retq 1003 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1004 %2 = icmp sgt <32 x i8> %a0, %1 1005 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1006 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1007 %5 = icmp sgt <32 x i8> %3, %4 1008 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1009 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1010 %8 = icmp sgt <32 x i8> %6, %7 1011 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1012 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1013 %11 = icmp sgt <32 x i8> %9, %10 1014 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1015 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1016 %14 = icmp sgt <32 x i8> %12, %13 1017 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1018 %16 = extractelement <32 x i8> %15, i32 0 1019 ret i8 %16 1020} 1021 1022; 1023; 512-bit Vectors 1024; 1025 1026define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1027; X86-SSE2-LABEL: test_reduce_v8i64: 1028; X86-SSE2: ## %bb.0: 1029; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 1030; X86-SSE2-NEXT: movdqa %xmm2, %xmm5 1031; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1032; X86-SSE2-NEXT: movdqa %xmm0, %xmm6 1033; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1034; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1035; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1036; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1037; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1038; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1039; X86-SSE2-NEXT: pand %xmm5, %xmm6 1040; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1041; X86-SSE2-NEXT: por %xmm6, %xmm5 1042; X86-SSE2-NEXT: pand %xmm5, %xmm0 1043; X86-SSE2-NEXT: pandn %xmm2, %xmm5 1044; X86-SSE2-NEXT: por %xmm0, %xmm5 1045; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1046; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1047; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1048; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1049; X86-SSE2-NEXT: movdqa %xmm2, %xmm6 1050; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1051; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1052; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1053; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1054; X86-SSE2-NEXT: pand %xmm0, %xmm2 1055; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 1056; X86-SSE2-NEXT: por %xmm2, %xmm0 1057; X86-SSE2-NEXT: pand %xmm0, %xmm1 1058; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1059; X86-SSE2-NEXT: por %xmm1, %xmm0 1060; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1061; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1062; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1063; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1064; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1065; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1066; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 1067; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2] 1068; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1069; X86-SSE2-NEXT: pand %xmm1, %xmm2 1070; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1071; X86-SSE2-NEXT: por %xmm2, %xmm1 1072; X86-SSE2-NEXT: pand %xmm1, %xmm5 1073; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1074; X86-SSE2-NEXT: por %xmm5, %xmm1 1075; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1076; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1077; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1078; X86-SSE2-NEXT: pxor %xmm0, %xmm4 1079; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1080; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1081; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1082; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1083; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1084; X86-SSE2-NEXT: pand %xmm2, %xmm4 1085; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1086; X86-SSE2-NEXT: por %xmm4, %xmm2 1087; X86-SSE2-NEXT: pand %xmm2, %xmm1 1088; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1089; X86-SSE2-NEXT: por %xmm1, %xmm2 1090; X86-SSE2-NEXT: movd %xmm2, %eax 1091; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1092; X86-SSE2-NEXT: movd %xmm0, %edx 1093; X86-SSE2-NEXT: retl 1094; 1095; X86-SSE42-LABEL: test_reduce_v8i64: 1096; X86-SSE42: ## %bb.0: 1097; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1098; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1099; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1100; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1101; X86-SSE42-NEXT: movdqa %xmm4, %xmm0 1102; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1103; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1104; X86-SSE42-NEXT: movapd %xmm2, %xmm0 1105; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1106; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1107; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1108; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1109; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1110; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1111; X86-SSE42-NEXT: movd %xmm1, %eax 1112; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1113; X86-SSE42-NEXT: retl 1114; 1115; X86-AVX1-LABEL: test_reduce_v8i64: 1116; X86-AVX1: ## %bb.0: 1117; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1118; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1119; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1120; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 1121; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm4, %xmm3, %xmm3 1122; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1123; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm1 1124; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 1125; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1126; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1127; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1128; X86-AVX1-NEXT: vmovd %xmm0, %eax 1129; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1130; X86-AVX1-NEXT: vzeroupper 1131; X86-AVX1-NEXT: retl 1132; 1133; X86-AVX2-LABEL: test_reduce_v8i64: 1134; X86-AVX2: ## %bb.0: 1135; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1136; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1137; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1138; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1139; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1140; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1141; X86-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1142; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1143; X86-AVX2-NEXT: vmovd %xmm0, %eax 1144; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1145; X86-AVX2-NEXT: vzeroupper 1146; X86-AVX2-NEXT: retl 1147; 1148; X64-SSE2-LABEL: test_reduce_v8i64: 1149; X64-SSE2: ## %bb.0: 1150; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 1151; X64-SSE2-NEXT: movdqa %xmm2, %xmm5 1152; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1153; X64-SSE2-NEXT: movdqa %xmm0, %xmm6 1154; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1155; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1156; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1157; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1158; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1159; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1160; X64-SSE2-NEXT: pand %xmm8, %xmm6 1161; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1162; X64-SSE2-NEXT: por %xmm6, %xmm5 1163; X64-SSE2-NEXT: pand %xmm5, %xmm0 1164; X64-SSE2-NEXT: pandn %xmm2, %xmm5 1165; X64-SSE2-NEXT: por %xmm0, %xmm5 1166; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1167; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1168; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1169; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1170; X64-SSE2-NEXT: movdqa %xmm2, %xmm6 1171; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1172; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1173; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1174; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 1175; X64-SSE2-NEXT: pand %xmm7, %xmm0 1176; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1177; X64-SSE2-NEXT: por %xmm0, %xmm2 1178; X64-SSE2-NEXT: pand %xmm2, %xmm1 1179; X64-SSE2-NEXT: pandn %xmm3, %xmm2 1180; X64-SSE2-NEXT: por %xmm1, %xmm2 1181; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1182; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1183; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1184; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1185; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 1186; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1187; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 1188; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1189; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1190; X64-SSE2-NEXT: pand %xmm6, %xmm0 1191; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1192; X64-SSE2-NEXT: por %xmm0, %xmm1 1193; X64-SSE2-NEXT: pand %xmm1, %xmm5 1194; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1195; X64-SSE2-NEXT: por %xmm5, %xmm1 1196; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1197; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1198; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1199; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1200; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1201; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1202; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1203; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1204; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1205; X64-SSE2-NEXT: pand %xmm5, %xmm2 1206; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1207; X64-SSE2-NEXT: por %xmm2, %xmm3 1208; X64-SSE2-NEXT: pand %xmm3, %xmm1 1209; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1210; X64-SSE2-NEXT: por %xmm1, %xmm3 1211; X64-SSE2-NEXT: movq %xmm3, %rax 1212; X64-SSE2-NEXT: retq 1213; 1214; X64-SSE42-LABEL: test_reduce_v8i64: 1215; X64-SSE42: ## %bb.0: 1216; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 1217; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1218; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1219; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1220; X64-SSE42-NEXT: movdqa %xmm4, %xmm0 1221; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1222; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1223; X64-SSE42-NEXT: movapd %xmm2, %xmm0 1224; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1225; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1226; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1227; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1228; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1229; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1230; X64-SSE42-NEXT: movq %xmm1, %rax 1231; X64-SSE42-NEXT: retq 1232; 1233; X64-AVX1-LABEL: test_reduce_v8i64: 1234; X64-AVX1: ## %bb.0: 1235; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1236; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1237; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1238; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 1239; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm4, %xmm3, %xmm3 1240; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1241; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm1 1242; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 1243; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1244; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1245; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1246; X64-AVX1-NEXT: vmovq %xmm0, %rax 1247; X64-AVX1-NEXT: vzeroupper 1248; X64-AVX1-NEXT: retq 1249; 1250; X64-AVX2-LABEL: test_reduce_v8i64: 1251; X64-AVX2: ## %bb.0: 1252; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1253; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1254; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1255; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1256; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1257; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1258; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1259; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1260; X64-AVX2-NEXT: vmovq %xmm0, %rax 1261; X64-AVX2-NEXT: vzeroupper 1262; X64-AVX2-NEXT: retq 1263; 1264; X64-AVX512-LABEL: test_reduce_v8i64: 1265; X64-AVX512: ## %bb.0: 1266; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1267; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 1268; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1269; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 1270; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1271; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 1272; X64-AVX512-NEXT: vmovq %xmm0, %rax 1273; X64-AVX512-NEXT: vzeroupper 1274; X64-AVX512-NEXT: retq 1275 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1276 %2 = icmp sgt <8 x i64> %a0, %1 1277 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1278 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1279 %5 = icmp sgt <8 x i64> %3, %4 1280 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1281 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1282 %8 = icmp sgt <8 x i64> %6, %7 1283 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1284 %10 = extractelement <8 x i64> %9, i32 0 1285 ret i64 %10 1286} 1287 1288define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1289; X86-SSE2-LABEL: test_reduce_v16i32: 1290; X86-SSE2: ## %bb.0: 1291; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1292; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1293; X86-SSE2-NEXT: pand %xmm4, %xmm1 1294; X86-SSE2-NEXT: pandn %xmm3, %xmm4 1295; X86-SSE2-NEXT: por %xmm1, %xmm4 1296; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1297; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1298; X86-SSE2-NEXT: pand %xmm1, %xmm0 1299; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1300; X86-SSE2-NEXT: por %xmm0, %xmm1 1301; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1302; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1303; X86-SSE2-NEXT: pand %xmm0, %xmm1 1304; X86-SSE2-NEXT: pandn %xmm4, %xmm0 1305; X86-SSE2-NEXT: por %xmm1, %xmm0 1306; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1307; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1308; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1309; X86-SSE2-NEXT: pand %xmm2, %xmm0 1310; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1311; X86-SSE2-NEXT: por %xmm0, %xmm2 1312; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1313; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1314; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1315; X86-SSE2-NEXT: pand %xmm1, %xmm2 1316; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1317; X86-SSE2-NEXT: por %xmm2, %xmm1 1318; X86-SSE2-NEXT: movd %xmm1, %eax 1319; X86-SSE2-NEXT: retl 1320; 1321; X86-SSE42-LABEL: test_reduce_v16i32: 1322; X86-SSE42: ## %bb.0: 1323; X86-SSE42-NEXT: pmaxsd %xmm3, %xmm1 1324; X86-SSE42-NEXT: pmaxsd %xmm2, %xmm1 1325; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 1326; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1327; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1328; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1329; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 1330; X86-SSE42-NEXT: movd %xmm1, %eax 1331; X86-SSE42-NEXT: retl 1332; 1333; X86-AVX1-LABEL: test_reduce_v16i32: 1334; X86-AVX1: ## %bb.0: 1335; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1336; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1337; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 1338; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm1, %xmm1 1339; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1340; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1341; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1342; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1343; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1344; X86-AVX1-NEXT: vmovd %xmm0, %eax 1345; X86-AVX1-NEXT: vzeroupper 1346; X86-AVX1-NEXT: retl 1347; 1348; X86-AVX2-LABEL: test_reduce_v16i32: 1349; X86-AVX2: ## %bb.0: 1350; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1351; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1352; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1353; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1354; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1355; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1356; X86-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1357; X86-AVX2-NEXT: vmovd %xmm0, %eax 1358; X86-AVX2-NEXT: vzeroupper 1359; X86-AVX2-NEXT: retl 1360; 1361; X64-SSE2-LABEL: test_reduce_v16i32: 1362; X64-SSE2: ## %bb.0: 1363; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 1364; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1365; X64-SSE2-NEXT: pand %xmm4, %xmm1 1366; X64-SSE2-NEXT: pandn %xmm3, %xmm4 1367; X64-SSE2-NEXT: por %xmm1, %xmm4 1368; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1369; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1370; X64-SSE2-NEXT: pand %xmm1, %xmm0 1371; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1372; X64-SSE2-NEXT: por %xmm0, %xmm1 1373; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1374; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1375; X64-SSE2-NEXT: pand %xmm0, %xmm1 1376; X64-SSE2-NEXT: pandn %xmm4, %xmm0 1377; X64-SSE2-NEXT: por %xmm1, %xmm0 1378; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1379; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1380; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1381; X64-SSE2-NEXT: pand %xmm2, %xmm0 1382; X64-SSE2-NEXT: pandn %xmm1, %xmm2 1383; X64-SSE2-NEXT: por %xmm0, %xmm2 1384; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1385; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1386; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1387; X64-SSE2-NEXT: pand %xmm1, %xmm2 1388; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1389; X64-SSE2-NEXT: por %xmm2, %xmm1 1390; X64-SSE2-NEXT: movd %xmm1, %eax 1391; X64-SSE2-NEXT: retq 1392; 1393; X64-SSE42-LABEL: test_reduce_v16i32: 1394; X64-SSE42: ## %bb.0: 1395; X64-SSE42-NEXT: pmaxsd %xmm3, %xmm1 1396; X64-SSE42-NEXT: pmaxsd %xmm2, %xmm1 1397; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 1398; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1399; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1400; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1401; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 1402; X64-SSE42-NEXT: movd %xmm1, %eax 1403; X64-SSE42-NEXT: retq 1404; 1405; X64-AVX1-LABEL: test_reduce_v16i32: 1406; X64-AVX1: ## %bb.0: 1407; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1408; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1409; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 1410; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm1, %xmm1 1411; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1412; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1413; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1414; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1415; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1416; X64-AVX1-NEXT: vmovd %xmm0, %eax 1417; X64-AVX1-NEXT: vzeroupper 1418; X64-AVX1-NEXT: retq 1419; 1420; X64-AVX2-LABEL: test_reduce_v16i32: 1421; X64-AVX2: ## %bb.0: 1422; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1423; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1424; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1425; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1426; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1427; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1428; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1429; X64-AVX2-NEXT: vmovd %xmm0, %eax 1430; X64-AVX2-NEXT: vzeroupper 1431; X64-AVX2-NEXT: retq 1432; 1433; X64-AVX512-LABEL: test_reduce_v16i32: 1434; X64-AVX512: ## %bb.0: 1435; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1436; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1437; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1438; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1439; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1440; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1441; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1442; X64-AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1443; X64-AVX512-NEXT: vmovd %xmm0, %eax 1444; X64-AVX512-NEXT: vzeroupper 1445; X64-AVX512-NEXT: retq 1446 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1447 %2 = icmp sgt <16 x i32> %a0, %1 1448 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1449 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1450 %5 = icmp sgt <16 x i32> %3, %4 1451 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1452 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1453 %8 = icmp sgt <16 x i32> %6, %7 1454 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1455 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1456 %11 = icmp sgt <16 x i32> %9, %10 1457 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1458 %13 = extractelement <16 x i32> %12, i32 0 1459 ret i32 %13 1460} 1461 1462define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1463; X86-SSE2-LABEL: test_reduce_v32i16: 1464; X86-SSE2: ## %bb.0: 1465; X86-SSE2-NEXT: pmaxsw %xmm3, %xmm1 1466; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm1 1467; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1468; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1469; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1470; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1471; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1472; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1473; X86-SSE2-NEXT: psrld $16, %xmm0 1474; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1475; X86-SSE2-NEXT: movd %xmm0, %eax 1476; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1477; X86-SSE2-NEXT: retl 1478; 1479; X86-SSE42-LABEL: test_reduce_v32i16: 1480; X86-SSE42: ## %bb.0: 1481; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1 1482; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm1 1483; X86-SSE42-NEXT: pmaxsw %xmm0, %xmm1 1484; X86-SSE42-NEXT: pxor LCPI10_0, %xmm1 1485; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1486; X86-SSE42-NEXT: movd %xmm0, %eax 1487; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1488; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1489; X86-SSE42-NEXT: retl 1490; 1491; X86-AVX1-LABEL: test_reduce_v32i16: 1492; X86-AVX1: ## %bb.0: 1493; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1494; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1495; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 1496; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm1, %xmm1 1497; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1498; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0 1499; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1500; X86-AVX1-NEXT: vmovd %xmm0, %eax 1501; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1502; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1503; X86-AVX1-NEXT: vzeroupper 1504; X86-AVX1-NEXT: retl 1505; 1506; X86-AVX2-LABEL: test_reduce_v32i16: 1507; X86-AVX2: ## %bb.0: 1508; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1509; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1510; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1511; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0 1512; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1513; X86-AVX2-NEXT: vmovd %xmm0, %eax 1514; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1515; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1516; X86-AVX2-NEXT: vzeroupper 1517; X86-AVX2-NEXT: retl 1518; 1519; X64-SSE2-LABEL: test_reduce_v32i16: 1520; X64-SSE2: ## %bb.0: 1521; X64-SSE2-NEXT: pmaxsw %xmm3, %xmm1 1522; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm1 1523; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1524; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1525; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1526; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1527; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1528; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1529; X64-SSE2-NEXT: psrld $16, %xmm0 1530; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1531; X64-SSE2-NEXT: movd %xmm0, %eax 1532; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1533; X64-SSE2-NEXT: retq 1534; 1535; X64-SSE42-LABEL: test_reduce_v32i16: 1536; X64-SSE42: ## %bb.0: 1537; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1 1538; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm1 1539; X64-SSE42-NEXT: pmaxsw %xmm0, %xmm1 1540; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm1 1541; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1542; X64-SSE42-NEXT: movd %xmm0, %eax 1543; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1544; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1545; X64-SSE42-NEXT: retq 1546; 1547; X64-AVX1-LABEL: test_reduce_v32i16: 1548; X64-AVX1: ## %bb.0: 1549; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1550; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1551; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 1552; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm1, %xmm1 1553; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1554; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1555; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1556; X64-AVX1-NEXT: vmovd %xmm0, %eax 1557; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1558; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1559; X64-AVX1-NEXT: vzeroupper 1560; X64-AVX1-NEXT: retq 1561; 1562; X64-AVX2-LABEL: test_reduce_v32i16: 1563; X64-AVX2: ## %bb.0: 1564; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1565; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1566; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1567; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1568; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1569; X64-AVX2-NEXT: vmovd %xmm0, %eax 1570; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1571; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1572; X64-AVX2-NEXT: vzeroupper 1573; X64-AVX2-NEXT: retq 1574; 1575; X64-AVX512-LABEL: test_reduce_v32i16: 1576; X64-AVX512: ## %bb.0: 1577; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1578; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1579; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1580; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1581; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1582; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1583; X64-AVX512-NEXT: vmovd %xmm0, %eax 1584; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1585; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1586; X64-AVX512-NEXT: vzeroupper 1587; X64-AVX512-NEXT: retq 1588 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1589 %2 = icmp sgt <32 x i16> %a0, %1 1590 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1591 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1592 %5 = icmp sgt <32 x i16> %3, %4 1593 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1594 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1595 %8 = icmp sgt <32 x i16> %6, %7 1596 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1597 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1598 %11 = icmp sgt <32 x i16> %9, %10 1599 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1600 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1601 %14 = icmp sgt <32 x i16> %12, %13 1602 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1603 %16 = extractelement <32 x i16> %15, i32 0 1604 ret i16 %16 1605} 1606 1607define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1608; X86-SSE2-LABEL: test_reduce_v64i8: 1609; X86-SSE2: ## %bb.0: 1610; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1611; X86-SSE2-NEXT: pcmpgtb %xmm3, %xmm4 1612; X86-SSE2-NEXT: pand %xmm4, %xmm1 1613; X86-SSE2-NEXT: pandn %xmm3, %xmm4 1614; X86-SSE2-NEXT: por %xmm1, %xmm4 1615; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1616; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1617; X86-SSE2-NEXT: pand %xmm1, %xmm0 1618; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1619; X86-SSE2-NEXT: por %xmm0, %xmm1 1620; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1621; X86-SSE2-NEXT: pcmpgtb %xmm4, %xmm0 1622; X86-SSE2-NEXT: pand %xmm0, %xmm1 1623; X86-SSE2-NEXT: pandn %xmm4, %xmm0 1624; X86-SSE2-NEXT: por %xmm1, %xmm0 1625; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1626; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1627; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1628; X86-SSE2-NEXT: pand %xmm2, %xmm0 1629; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1630; X86-SSE2-NEXT: por %xmm0, %xmm2 1631; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1632; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1633; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1634; X86-SSE2-NEXT: pand %xmm1, %xmm2 1635; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1636; X86-SSE2-NEXT: por %xmm2, %xmm1 1637; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1638; X86-SSE2-NEXT: psrld $16, %xmm0 1639; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1640; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1641; X86-SSE2-NEXT: pand %xmm2, %xmm1 1642; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1643; X86-SSE2-NEXT: por %xmm1, %xmm2 1644; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 1645; X86-SSE2-NEXT: psrlw $8, %xmm0 1646; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1647; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1648; X86-SSE2-NEXT: pand %xmm1, %xmm2 1649; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1650; X86-SSE2-NEXT: por %xmm2, %xmm1 1651; X86-SSE2-NEXT: movd %xmm1, %eax 1652; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1653; X86-SSE2-NEXT: retl 1654; 1655; X86-SSE42-LABEL: test_reduce_v64i8: 1656; X86-SSE42: ## %bb.0: 1657; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1 1658; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm1 1659; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1 1660; X86-SSE42-NEXT: pxor LCPI11_0, %xmm1 1661; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1662; X86-SSE42-NEXT: psrlw $8, %xmm0 1663; X86-SSE42-NEXT: pminub %xmm1, %xmm0 1664; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1665; X86-SSE42-NEXT: movd %xmm0, %eax 1666; X86-SSE42-NEXT: xorb $127, %al 1667; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1668; X86-SSE42-NEXT: retl 1669; 1670; X86-AVX1-LABEL: test_reduce_v64i8: 1671; X86-AVX1: ## %bb.0: 1672; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1673; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1674; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 1675; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm1, %xmm1 1676; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1677; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0 1678; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1679; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1680; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1681; X86-AVX1-NEXT: vmovd %xmm0, %eax 1682; X86-AVX1-NEXT: xorb $127, %al 1683; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1684; X86-AVX1-NEXT: vzeroupper 1685; X86-AVX1-NEXT: retl 1686; 1687; X86-AVX2-LABEL: test_reduce_v64i8: 1688; X86-AVX2: ## %bb.0: 1689; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1690; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1691; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1692; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0 1693; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1694; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1695; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1696; X86-AVX2-NEXT: vmovd %xmm0, %eax 1697; X86-AVX2-NEXT: xorb $127, %al 1698; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1699; X86-AVX2-NEXT: vzeroupper 1700; X86-AVX2-NEXT: retl 1701; 1702; X64-SSE2-LABEL: test_reduce_v64i8: 1703; X64-SSE2: ## %bb.0: 1704; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 1705; X64-SSE2-NEXT: pcmpgtb %xmm3, %xmm4 1706; X64-SSE2-NEXT: pand %xmm4, %xmm1 1707; X64-SSE2-NEXT: pandn %xmm3, %xmm4 1708; X64-SSE2-NEXT: por %xmm1, %xmm4 1709; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1710; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1711; X64-SSE2-NEXT: pand %xmm1, %xmm0 1712; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1713; X64-SSE2-NEXT: por %xmm0, %xmm1 1714; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1715; X64-SSE2-NEXT: pcmpgtb %xmm4, %xmm0 1716; X64-SSE2-NEXT: pand %xmm0, %xmm1 1717; X64-SSE2-NEXT: pandn %xmm4, %xmm0 1718; X64-SSE2-NEXT: por %xmm1, %xmm0 1719; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1720; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1721; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1722; X64-SSE2-NEXT: pand %xmm2, %xmm0 1723; X64-SSE2-NEXT: pandn %xmm1, %xmm2 1724; X64-SSE2-NEXT: por %xmm0, %xmm2 1725; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1726; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1727; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1728; X64-SSE2-NEXT: pand %xmm1, %xmm2 1729; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1730; X64-SSE2-NEXT: por %xmm2, %xmm1 1731; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1732; X64-SSE2-NEXT: psrld $16, %xmm0 1733; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1734; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1735; X64-SSE2-NEXT: pand %xmm2, %xmm1 1736; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1737; X64-SSE2-NEXT: por %xmm1, %xmm2 1738; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1739; X64-SSE2-NEXT: psrlw $8, %xmm0 1740; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1741; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1742; X64-SSE2-NEXT: pand %xmm1, %xmm2 1743; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1744; X64-SSE2-NEXT: por %xmm2, %xmm1 1745; X64-SSE2-NEXT: movd %xmm1, %eax 1746; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1747; X64-SSE2-NEXT: retq 1748; 1749; X64-SSE42-LABEL: test_reduce_v64i8: 1750; X64-SSE42: ## %bb.0: 1751; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1 1752; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm1 1753; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1 1754; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm1 1755; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1756; X64-SSE42-NEXT: psrlw $8, %xmm0 1757; X64-SSE42-NEXT: pminub %xmm1, %xmm0 1758; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1759; X64-SSE42-NEXT: movd %xmm0, %eax 1760; X64-SSE42-NEXT: xorb $127, %al 1761; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1762; X64-SSE42-NEXT: retq 1763; 1764; X64-AVX1-LABEL: test_reduce_v64i8: 1765; X64-AVX1: ## %bb.0: 1766; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1767; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1768; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 1769; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm1, %xmm1 1770; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1771; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1772; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1773; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1774; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1775; X64-AVX1-NEXT: vmovd %xmm0, %eax 1776; X64-AVX1-NEXT: xorb $127, %al 1777; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1778; X64-AVX1-NEXT: vzeroupper 1779; X64-AVX1-NEXT: retq 1780; 1781; X64-AVX2-LABEL: test_reduce_v64i8: 1782; X64-AVX2: ## %bb.0: 1783; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1784; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1785; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1786; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1787; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1788; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1789; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1790; X64-AVX2-NEXT: vmovd %xmm0, %eax 1791; X64-AVX2-NEXT: xorb $127, %al 1792; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1793; X64-AVX2-NEXT: vzeroupper 1794; X64-AVX2-NEXT: retq 1795; 1796; X64-AVX512-LABEL: test_reduce_v64i8: 1797; X64-AVX512: ## %bb.0: 1798; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1799; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1800; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1801; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1802; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1803; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1804; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1805; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1806; X64-AVX512-NEXT: vmovd %xmm0, %eax 1807; X64-AVX512-NEXT: xorb $127, %al 1808; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1809; X64-AVX512-NEXT: vzeroupper 1810; X64-AVX512-NEXT: retq 1811 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1812 %2 = icmp sgt <64 x i8> %a0, %1 1813 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1814 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1815 %5 = icmp sgt <64 x i8> %3, %4 1816 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1817 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1818 %8 = icmp sgt <64 x i8> %6, %7 1819 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1820 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1821 %11 = icmp sgt <64 x i8> %9, %10 1822 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1823 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1824 %14 = icmp sgt <64 x i8> %12, %13 1825 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1826 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1827 %17 = icmp sgt <64 x i8> %15, %16 1828 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1829 %19 = extractelement <64 x i8> %18, i32 0 1830 ret i8 %19 1831} 1832 1833; 1834; Partial Vector Reductions 1835; 1836 1837define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { 1838; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: 1839; X86-SSE2: ## %bb.0: 1840; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1841; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1842; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1843; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1844; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1845; X86-SSE2-NEXT: psrld $16, %xmm1 1846; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1847; X86-SSE2-NEXT: movd %xmm1, %eax 1848; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1849; X86-SSE2-NEXT: retl 1850; 1851; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: 1852; X86-SSE42: ## %bb.0: 1853; X86-SSE42-NEXT: pxor LCPI12_0, %xmm0 1854; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1855; X86-SSE42-NEXT: movd %xmm0, %eax 1856; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1857; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1858; X86-SSE42-NEXT: retl 1859; 1860; X86-AVX-LABEL: test_reduce_v16i16_v8i16: 1861; X86-AVX: ## %bb.0: 1862; X86-AVX-NEXT: vpxor LCPI12_0, %xmm0, %xmm0 1863; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1864; X86-AVX-NEXT: vmovd %xmm0, %eax 1865; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1866; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1867; X86-AVX-NEXT: vzeroupper 1868; X86-AVX-NEXT: retl 1869; 1870; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: 1871; X64-SSE2: ## %bb.0: 1872; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1873; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1874; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1875; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1876; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1877; X64-SSE2-NEXT: psrld $16, %xmm1 1878; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1879; X64-SSE2-NEXT: movd %xmm1, %eax 1880; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1881; X64-SSE2-NEXT: retq 1882; 1883; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: 1884; X64-SSE42: ## %bb.0: 1885; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 1886; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1887; X64-SSE42-NEXT: movd %xmm0, %eax 1888; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1889; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1890; X64-SSE42-NEXT: retq 1891; 1892; X64-AVX-LABEL: test_reduce_v16i16_v8i16: 1893; X64-AVX: ## %bb.0: 1894; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1895; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 1896; X64-AVX-NEXT: vmovd %xmm0, %eax 1897; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1898; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1899; X64-AVX-NEXT: vzeroupper 1900; X64-AVX-NEXT: retq 1901 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1902 %2 = icmp sgt <16 x i16> %a0, %1 1903 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 1904 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1905 %5 = icmp sgt <16 x i16> %3, %4 1906 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 1907 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1908 %8 = icmp sgt <16 x i16> %6, %7 1909 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 1910 %10 = extractelement <16 x i16> %9, i32 0 1911 ret i16 %10 1912} 1913 1914define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { 1915; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: 1916; X86-SSE2: ## %bb.0: 1917; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1918; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1919; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1920; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1921; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1922; X86-SSE2-NEXT: psrld $16, %xmm1 1923; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1924; X86-SSE2-NEXT: movd %xmm1, %eax 1925; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1926; X86-SSE2-NEXT: retl 1927; 1928; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: 1929; X86-SSE42: ## %bb.0: 1930; X86-SSE42-NEXT: pxor LCPI13_0, %xmm0 1931; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1932; X86-SSE42-NEXT: movd %xmm0, %eax 1933; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1934; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1935; X86-SSE42-NEXT: retl 1936; 1937; X86-AVX-LABEL: test_reduce_v32i16_v8i16: 1938; X86-AVX: ## %bb.0: 1939; X86-AVX-NEXT: vpxor LCPI13_0, %xmm0, %xmm0 1940; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1941; X86-AVX-NEXT: vmovd %xmm0, %eax 1942; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1943; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1944; X86-AVX-NEXT: vzeroupper 1945; X86-AVX-NEXT: retl 1946; 1947; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: 1948; X64-SSE2: ## %bb.0: 1949; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1950; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1951; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1952; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1953; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1954; X64-SSE2-NEXT: psrld $16, %xmm1 1955; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1956; X64-SSE2-NEXT: movd %xmm1, %eax 1957; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1958; X64-SSE2-NEXT: retq 1959; 1960; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: 1961; X64-SSE42: ## %bb.0: 1962; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 1963; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1964; X64-SSE42-NEXT: movd %xmm0, %eax 1965; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1966; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1967; X64-SSE42-NEXT: retq 1968; 1969; X64-AVX-LABEL: test_reduce_v32i16_v8i16: 1970; X64-AVX: ## %bb.0: 1971; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1972; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 1973; X64-AVX-NEXT: vmovd %xmm0, %eax 1974; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF 1975; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1976; X64-AVX-NEXT: vzeroupper 1977; X64-AVX-NEXT: retq 1978 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1979 %2 = icmp sgt <32 x i16> %a0, %1 1980 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1981 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1982 %5 = icmp sgt <32 x i16> %3, %4 1983 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1984 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1985 %8 = icmp sgt <32 x i16> %6, %7 1986 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1987 %10 = extractelement <32 x i16> %9, i32 0 1988 ret i16 %10 1989} 1990 1991define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { 1992; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: 1993; X86-SSE2: ## %bb.0: 1994; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1995; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1996; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1997; X86-SSE2-NEXT: pand %xmm2, %xmm0 1998; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1999; X86-SSE2-NEXT: por %xmm0, %xmm2 2000; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2001; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 2002; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2003; X86-SSE2-NEXT: pand %xmm1, %xmm2 2004; X86-SSE2-NEXT: pandn %xmm0, %xmm1 2005; X86-SSE2-NEXT: por %xmm2, %xmm1 2006; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2007; X86-SSE2-NEXT: psrld $16, %xmm0 2008; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 2009; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2010; X86-SSE2-NEXT: pand %xmm2, %xmm1 2011; X86-SSE2-NEXT: pandn %xmm0, %xmm2 2012; X86-SSE2-NEXT: por %xmm1, %xmm2 2013; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 2014; X86-SSE2-NEXT: psrlw $8, %xmm0 2015; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 2016; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2017; X86-SSE2-NEXT: pand %xmm1, %xmm2 2018; X86-SSE2-NEXT: pandn %xmm0, %xmm1 2019; X86-SSE2-NEXT: por %xmm2, %xmm1 2020; X86-SSE2-NEXT: movd %xmm1, %eax 2021; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2022; X86-SSE2-NEXT: retl 2023; 2024; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: 2025; X86-SSE42: ## %bb.0: 2026; X86-SSE42-NEXT: pxor LCPI14_0, %xmm0 2027; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2028; X86-SSE42-NEXT: psrlw $8, %xmm1 2029; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2030; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2031; X86-SSE42-NEXT: movd %xmm0, %eax 2032; X86-SSE42-NEXT: xorb $127, %al 2033; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2034; X86-SSE42-NEXT: retl 2035; 2036; X86-AVX-LABEL: test_reduce_v32i8_v16i8: 2037; X86-AVX: ## %bb.0: 2038; X86-AVX-NEXT: vpxor LCPI14_0, %xmm0, %xmm0 2039; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2040; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2041; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2042; X86-AVX-NEXT: vmovd %xmm0, %eax 2043; X86-AVX-NEXT: xorb $127, %al 2044; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2045; X86-AVX-NEXT: vzeroupper 2046; X86-AVX-NEXT: retl 2047; 2048; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: 2049; X64-SSE2: ## %bb.0: 2050; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2051; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2052; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2053; X64-SSE2-NEXT: pand %xmm2, %xmm0 2054; X64-SSE2-NEXT: pandn %xmm1, %xmm2 2055; X64-SSE2-NEXT: por %xmm0, %xmm2 2056; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2057; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 2058; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2059; X64-SSE2-NEXT: pand %xmm1, %xmm2 2060; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2061; X64-SSE2-NEXT: por %xmm2, %xmm1 2062; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2063; X64-SSE2-NEXT: psrld $16, %xmm0 2064; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 2065; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2066; X64-SSE2-NEXT: pand %xmm2, %xmm1 2067; X64-SSE2-NEXT: pandn %xmm0, %xmm2 2068; X64-SSE2-NEXT: por %xmm1, %xmm2 2069; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 2070; X64-SSE2-NEXT: psrlw $8, %xmm0 2071; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 2072; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2073; X64-SSE2-NEXT: pand %xmm1, %xmm2 2074; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2075; X64-SSE2-NEXT: por %xmm2, %xmm1 2076; X64-SSE2-NEXT: movd %xmm1, %eax 2077; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2078; X64-SSE2-NEXT: retq 2079; 2080; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: 2081; X64-SSE42: ## %bb.0: 2082; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 2083; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2084; X64-SSE42-NEXT: psrlw $8, %xmm1 2085; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2086; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2087; X64-SSE42-NEXT: movd %xmm0, %eax 2088; X64-SSE42-NEXT: xorb $127, %al 2089; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2090; X64-SSE42-NEXT: retq 2091; 2092; X64-AVX-LABEL: test_reduce_v32i8_v16i8: 2093; X64-AVX: ## %bb.0: 2094; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 2095; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2096; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2097; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2098; X64-AVX-NEXT: vmovd %xmm0, %eax 2099; X64-AVX-NEXT: xorb $127, %al 2100; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 2101; X64-AVX-NEXT: vzeroupper 2102; X64-AVX-NEXT: retq 2103 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2104 %2 = icmp sgt <32 x i8> %a0, %1 2105 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 2106 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2107 %5 = icmp sgt <32 x i8> %3, %4 2108 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 2109 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2110 %8 = icmp sgt <32 x i8> %6, %7 2111 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 2112 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2113 %11 = icmp sgt <32 x i8> %9, %10 2114 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 2115 %13 = extractelement <32 x i8> %12, i32 0 2116 ret i8 %13 2117} 2118 2119define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { 2120; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: 2121; X86-SSE2: ## %bb.0: 2122; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2123; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2124; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2125; X86-SSE2-NEXT: pand %xmm2, %xmm0 2126; X86-SSE2-NEXT: pandn %xmm1, %xmm2 2127; X86-SSE2-NEXT: por %xmm0, %xmm2 2128; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2129; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 2130; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2131; X86-SSE2-NEXT: pand %xmm1, %xmm2 2132; X86-SSE2-NEXT: pandn %xmm0, %xmm1 2133; X86-SSE2-NEXT: por %xmm2, %xmm1 2134; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2135; X86-SSE2-NEXT: psrld $16, %xmm0 2136; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 2137; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2138; X86-SSE2-NEXT: pand %xmm2, %xmm1 2139; X86-SSE2-NEXT: pandn %xmm0, %xmm2 2140; X86-SSE2-NEXT: por %xmm1, %xmm2 2141; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 2142; X86-SSE2-NEXT: psrlw $8, %xmm0 2143; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 2144; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2145; X86-SSE2-NEXT: pand %xmm1, %xmm2 2146; X86-SSE2-NEXT: pandn %xmm0, %xmm1 2147; X86-SSE2-NEXT: por %xmm2, %xmm1 2148; X86-SSE2-NEXT: movd %xmm1, %eax 2149; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2150; X86-SSE2-NEXT: retl 2151; 2152; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: 2153; X86-SSE42: ## %bb.0: 2154; X86-SSE42-NEXT: pxor LCPI15_0, %xmm0 2155; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2156; X86-SSE42-NEXT: psrlw $8, %xmm1 2157; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2158; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2159; X86-SSE42-NEXT: movd %xmm0, %eax 2160; X86-SSE42-NEXT: xorb $127, %al 2161; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2162; X86-SSE42-NEXT: retl 2163; 2164; X86-AVX-LABEL: test_reduce_v64i8_v16i8: 2165; X86-AVX: ## %bb.0: 2166; X86-AVX-NEXT: vpxor LCPI15_0, %xmm0, %xmm0 2167; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2168; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2169; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2170; X86-AVX-NEXT: vmovd %xmm0, %eax 2171; X86-AVX-NEXT: xorb $127, %al 2172; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2173; X86-AVX-NEXT: vzeroupper 2174; X86-AVX-NEXT: retl 2175; 2176; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: 2177; X64-SSE2: ## %bb.0: 2178; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2179; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2180; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2181; X64-SSE2-NEXT: pand %xmm2, %xmm0 2182; X64-SSE2-NEXT: pandn %xmm1, %xmm2 2183; X64-SSE2-NEXT: por %xmm0, %xmm2 2184; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2185; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 2186; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2187; X64-SSE2-NEXT: pand %xmm1, %xmm2 2188; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2189; X64-SSE2-NEXT: por %xmm2, %xmm1 2190; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2191; X64-SSE2-NEXT: psrld $16, %xmm0 2192; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 2193; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2194; X64-SSE2-NEXT: pand %xmm2, %xmm1 2195; X64-SSE2-NEXT: pandn %xmm0, %xmm2 2196; X64-SSE2-NEXT: por %xmm1, %xmm2 2197; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 2198; X64-SSE2-NEXT: psrlw $8, %xmm0 2199; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 2200; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 2201; X64-SSE2-NEXT: pand %xmm1, %xmm2 2202; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2203; X64-SSE2-NEXT: por %xmm2, %xmm1 2204; X64-SSE2-NEXT: movd %xmm1, %eax 2205; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2206; X64-SSE2-NEXT: retq 2207; 2208; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: 2209; X64-SSE42: ## %bb.0: 2210; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 2211; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2212; X64-SSE42-NEXT: psrlw $8, %xmm1 2213; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2214; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2215; X64-SSE42-NEXT: movd %xmm0, %eax 2216; X64-SSE42-NEXT: xorb $127, %al 2217; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2218; X64-SSE42-NEXT: retq 2219; 2220; X64-AVX-LABEL: test_reduce_v64i8_v16i8: 2221; X64-AVX: ## %bb.0: 2222; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 2223; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2224; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2225; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2226; X64-AVX-NEXT: vmovd %xmm0, %eax 2227; X64-AVX-NEXT: xorb $127, %al 2228; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 2229; X64-AVX-NEXT: vzeroupper 2230; X64-AVX-NEXT: retq 2231 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2232 %2 = icmp sgt <64 x i8> %a0, %1 2233 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 2234 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2235 %5 = icmp sgt <64 x i8> %3, %4 2236 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 2237 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2238 %8 = icmp sgt <64 x i8> %6, %7 2239 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 2240 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2241 %11 = icmp sgt <64 x i8> %9, %10 2242 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 2243 %13 = extractelement <64 x i8> %12, i32 0 2244 ret i8 %13 2245} 2246