1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=XOPAVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=XOPAVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512DQVL 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL 10; 11; 32-bit runs to make sure we do reasonable things for i64 shifts. 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86-AVX1 13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2 14 15; 16; Variable Shifts 17; 18 19define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 20; AVX1-LABEL: var_shift_v4i64: 21; AVX1: # %bb.0: 22; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 23; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 24; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm4 25; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 26; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2 27; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 28; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm3 29; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 30; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 31; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 32; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 33; AVX1-NEXT: retq 34; 35; AVX2-LABEL: var_shift_v4i64: 36; AVX2: # %bb.0: 37; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 38; AVX2-NEXT: retq 39; 40; XOPAVX1-LABEL: var_shift_v4i64: 41; XOPAVX1: # %bb.0: 42; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 43; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 44; XOPAVX1-NEXT: vpshlq %xmm2, %xmm3, %xmm2 45; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0 46; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 47; XOPAVX1-NEXT: retq 48; 49; XOPAVX2-LABEL: var_shift_v4i64: 50; XOPAVX2: # %bb.0: 51; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 52; XOPAVX2-NEXT: retq 53; 54; AVX512-LABEL: var_shift_v4i64: 55; AVX512: # %bb.0: 56; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 57; AVX512-NEXT: retq 58; 59; AVX512VL-LABEL: var_shift_v4i64: 60; AVX512VL: # %bb.0: 61; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 62; AVX512VL-NEXT: retq 63; 64; X86-AVX1-LABEL: var_shift_v4i64: 65; X86-AVX1: # %bb.0: 66; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 67; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 68; X86-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm4 69; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 70; X86-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2 71; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 72; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm3 73; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 74; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 75; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 76; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 77; X86-AVX1-NEXT: retl 78; 79; X86-AVX2-LABEL: var_shift_v4i64: 80; X86-AVX2: # %bb.0: 81; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 82; X86-AVX2-NEXT: retl 83 %shift = shl <4 x i64> %a, %b 84 ret <4 x i64> %shift 85} 86 87define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 88; AVX1-LABEL: var_shift_v8i32: 89; AVX1: # %bb.0: 90; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 91; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 92; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 93; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 94; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 95; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 96; AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 97; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 98; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 99; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 100; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 101; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 102; AVX1-NEXT: retq 103; 104; AVX2-LABEL: var_shift_v8i32: 105; AVX2: # %bb.0: 106; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 107; AVX2-NEXT: retq 108; 109; XOPAVX1-LABEL: var_shift_v8i32: 110; XOPAVX1: # %bb.0: 111; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 112; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 113; XOPAVX1-NEXT: vpshld %xmm2, %xmm3, %xmm2 114; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0 115; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 116; XOPAVX1-NEXT: retq 117; 118; XOPAVX2-LABEL: var_shift_v8i32: 119; XOPAVX2: # %bb.0: 120; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 121; XOPAVX2-NEXT: retq 122; 123; AVX512-LABEL: var_shift_v8i32: 124; AVX512: # %bb.0: 125; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 126; AVX512-NEXT: retq 127; 128; AVX512VL-LABEL: var_shift_v8i32: 129; AVX512VL: # %bb.0: 130; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 131; AVX512VL-NEXT: retq 132; 133; X86-AVX1-LABEL: var_shift_v8i32: 134; X86-AVX1: # %bb.0: 135; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 136; X86-AVX1-NEXT: vpslld $23, %xmm2, %xmm2 137; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216] 138; X86-AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 139; X86-AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 140; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 141; X86-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 142; X86-AVX1-NEXT: vpslld $23, %xmm1, %xmm1 143; X86-AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 144; X86-AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 145; X86-AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 146; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 147; X86-AVX1-NEXT: retl 148; 149; X86-AVX2-LABEL: var_shift_v8i32: 150; X86-AVX2: # %bb.0: 151; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 152; X86-AVX2-NEXT: retl 153 %shift = shl <8 x i32> %a, %b 154 ret <8 x i32> %shift 155} 156 157define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 158; AVX1-LABEL: var_shift_v16i16: 159; AVX1: # %bb.0: 160; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 161; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7] 162; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 163; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216] 164; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 165; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 166; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 167; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 168; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 169; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 170; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 171; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 172; AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2 173; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7] 174; AVX1-NEXT: vpslld $23, %xmm3, %xmm3 175; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 176; AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 177; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 178; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 179; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1 180; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 181; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 182; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 183; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 184; AVX1-NEXT: retq 185; 186; AVX2-LABEL: var_shift_v16i16: 187; AVX2: # %bb.0: 188; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 189; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 190; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 191; AVX2-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 192; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 193; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 194; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 195; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 196; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 197; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 198; AVX2-NEXT: retq 199; 200; XOPAVX1-LABEL: var_shift_v16i16: 201; XOPAVX1: # %bb.0: 202; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 203; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 204; XOPAVX1-NEXT: vpshlw %xmm2, %xmm3, %xmm2 205; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0 206; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 207; XOPAVX1-NEXT: retq 208; 209; XOPAVX2-LABEL: var_shift_v16i16: 210; XOPAVX2: # %bb.0: 211; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 212; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 213; XOPAVX2-NEXT: vpshlw %xmm2, %xmm3, %xmm2 214; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0 215; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 216; XOPAVX2-NEXT: retq 217; 218; AVX512DQ-LABEL: var_shift_v16i16: 219; AVX512DQ: # %bb.0: 220; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 221; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 222; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 223; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 224; AVX512DQ-NEXT: retq 225; 226; AVX512BW-LABEL: var_shift_v16i16: 227; AVX512BW: # %bb.0: 228; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 229; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 230; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 231; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 232; AVX512BW-NEXT: retq 233; 234; AVX512DQVL-LABEL: var_shift_v16i16: 235; AVX512DQVL: # %bb.0: 236; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 237; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 238; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 239; AVX512DQVL-NEXT: vpmovdw %zmm0, %ymm0 240; AVX512DQVL-NEXT: retq 241; 242; AVX512BWVL-LABEL: var_shift_v16i16: 243; AVX512BWVL: # %bb.0: 244; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 245; AVX512BWVL-NEXT: retq 246; 247; X86-AVX1-LABEL: var_shift_v16i16: 248; X86-AVX1: # %bb.0: 249; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 250; X86-AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7] 251; X86-AVX1-NEXT: vpslld $23, %xmm3, %xmm3 252; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216] 253; X86-AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 254; X86-AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 255; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero 256; X86-AVX1-NEXT: vpslld $23, %xmm2, %xmm2 257; X86-AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 258; X86-AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 259; X86-AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 260; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 261; X86-AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2 262; X86-AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7] 263; X86-AVX1-NEXT: vpslld $23, %xmm3, %xmm3 264; X86-AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3 265; X86-AVX1-NEXT: vcvttps2dq %xmm3, %xmm3 266; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 267; X86-AVX1-NEXT: vpslld $23, %xmm1, %xmm1 268; X86-AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1 269; X86-AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 270; X86-AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 271; X86-AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 272; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 273; X86-AVX1-NEXT: retl 274; 275; X86-AVX2-LABEL: var_shift_v16i16: 276; X86-AVX2: # %bb.0: 277; X86-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 278; X86-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 279; X86-AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 280; X86-AVX2-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 281; X86-AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 282; X86-AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 283; X86-AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 284; X86-AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 285; X86-AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 286; X86-AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 287; X86-AVX2-NEXT: retl 288 %shift = shl <16 x i16> %a, %b 289 ret <16 x i16> %shift 290} 291 292define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 293; AVX1-LABEL: var_shift_v32i8: 294; AVX1: # %bb.0: 295; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 296; AVX1-NEXT: vpsllw $4, %xmm2, %xmm3 297; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 298; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 299; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 300; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 301; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 302; AVX1-NEXT: vpsllw $2, %xmm2, %xmm3 303; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 304; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 305; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 306; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 307; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm3 308; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 309; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 310; AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 311; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 312; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 313; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 314; AVX1-NEXT: vpsllw $2, %xmm0, %xmm3 315; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 316; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 317; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 318; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm3 319; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 320; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 321; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 322; AVX1-NEXT: retq 323; 324; AVX2-LABEL: var_shift_v32i8: 325; AVX2: # %bb.0: 326; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 327; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 328; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 329; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 330; AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 331; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 332; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 333; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 334; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 335; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 336; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 337; AVX2-NEXT: retq 338; 339; XOPAVX1-LABEL: var_shift_v32i8: 340; XOPAVX1: # %bb.0: 341; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 342; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 343; XOPAVX1-NEXT: vpshlb %xmm2, %xmm3, %xmm2 344; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 345; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 346; XOPAVX1-NEXT: retq 347; 348; XOPAVX2-LABEL: var_shift_v32i8: 349; XOPAVX2: # %bb.0: 350; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 351; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 352; XOPAVX2-NEXT: vpshlb %xmm2, %xmm3, %xmm2 353; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 354; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 355; XOPAVX2-NEXT: retq 356; 357; AVX512DQ-LABEL: var_shift_v32i8: 358; AVX512DQ: # %bb.0: 359; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1 360; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2 361; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 362; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 363; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2 364; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 365; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 366; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 367; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2 368; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm1 369; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 370; AVX512DQ-NEXT: retq 371; 372; AVX512BW-LABEL: var_shift_v32i8: 373; AVX512BW: # %bb.0: 374; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 375; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 376; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 377; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 378; AVX512BW-NEXT: retq 379; 380; AVX512DQVL-LABEL: var_shift_v32i8: 381; AVX512DQVL: # %bb.0: 382; AVX512DQVL-NEXT: vpsllw $5, %ymm1, %ymm1 383; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm2 384; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 385; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 386; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm2 387; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 388; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 389; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 390; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 391; AVX512DQVL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 392; AVX512DQVL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 393; AVX512DQVL-NEXT: retq 394; 395; AVX512BWVL-LABEL: var_shift_v32i8: 396; AVX512BWVL: # %bb.0: 397; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 398; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 399; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 400; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 401; AVX512BWVL-NEXT: retq 402; 403; X86-AVX1-LABEL: var_shift_v32i8: 404; X86-AVX1: # %bb.0: 405; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 406; X86-AVX1-NEXT: vpsllw $4, %xmm2, %xmm3 407; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] 408; X86-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 409; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 410; X86-AVX1-NEXT: vpsllw $5, %xmm5, %xmm5 411; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 412; X86-AVX1-NEXT: vpsllw $2, %xmm2, %xmm3 413; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 414; X86-AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 415; X86-AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 416; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 417; X86-AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm3 418; X86-AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm5 419; X86-AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm2, %xmm2 420; X86-AVX1-NEXT: vpsllw $4, %xmm0, %xmm3 421; X86-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 422; X86-AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 423; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 424; X86-AVX1-NEXT: vpsllw $2, %xmm0, %xmm3 425; X86-AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 426; X86-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 427; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 428; X86-AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm3 429; X86-AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1 430; X86-AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 431; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 432; X86-AVX1-NEXT: retl 433; 434; X86-AVX2-LABEL: var_shift_v32i8: 435; X86-AVX2: # %bb.0: 436; X86-AVX2-NEXT: vpsllw $5, %ymm1, %ymm1 437; X86-AVX2-NEXT: vpsllw $4, %ymm0, %ymm2 438; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 439; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 440; X86-AVX2-NEXT: vpsllw $2, %ymm0, %ymm2 441; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm2, %ymm2 442; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 443; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 444; X86-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm2 445; X86-AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1 446; X86-AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 447; X86-AVX2-NEXT: retl 448 %shift = shl <32 x i8> %a, %b 449 ret <32 x i8> %shift 450} 451 452; 453; Uniform Variable Shifts 454; 455 456define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 457; AVX1-LABEL: splatvar_shift_v4i64: 458; AVX1: # %bb.0: 459; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 460; AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 461; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 462; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 463; AVX1-NEXT: retq 464; 465; AVX2-LABEL: splatvar_shift_v4i64: 466; AVX2: # %bb.0: 467; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 468; AVX2-NEXT: retq 469; 470; XOPAVX1-LABEL: splatvar_shift_v4i64: 471; XOPAVX1: # %bb.0: 472; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 473; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 474; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 475; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 476; XOPAVX1-NEXT: retq 477; 478; XOPAVX2-LABEL: splatvar_shift_v4i64: 479; XOPAVX2: # %bb.0: 480; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 481; XOPAVX2-NEXT: retq 482; 483; AVX512-LABEL: splatvar_shift_v4i64: 484; AVX512: # %bb.0: 485; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0 486; AVX512-NEXT: retq 487; 488; AVX512VL-LABEL: splatvar_shift_v4i64: 489; AVX512VL: # %bb.0: 490; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 491; AVX512VL-NEXT: retq 492; 493; X86-AVX1-LABEL: splatvar_shift_v4i64: 494; X86-AVX1: # %bb.0: 495; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 496; X86-AVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2 497; X86-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 498; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 499; X86-AVX1-NEXT: retl 500; 501; X86-AVX2-LABEL: splatvar_shift_v4i64: 502; X86-AVX2: # %bb.0: 503; X86-AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 504; X86-AVX2-NEXT: retl 505 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 506 %shift = shl <4 x i64> %a, %splat 507 ret <4 x i64> %shift 508} 509 510define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 511; AVX1-LABEL: splatvar_shift_v8i32: 512; AVX1: # %bb.0: 513; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 514; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 515; AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 516; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 517; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 518; AVX1-NEXT: retq 519; 520; AVX2-LABEL: splatvar_shift_v8i32: 521; AVX2: # %bb.0: 522; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 523; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 524; AVX2-NEXT: retq 525; 526; XOPAVX1-LABEL: splatvar_shift_v8i32: 527; XOPAVX1: # %bb.0: 528; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 529; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 530; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 531; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 532; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 533; XOPAVX1-NEXT: retq 534; 535; XOPAVX2-LABEL: splatvar_shift_v8i32: 536; XOPAVX2: # %bb.0: 537; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 538; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 539; XOPAVX2-NEXT: retq 540; 541; AVX512-LABEL: splatvar_shift_v8i32: 542; AVX512: # %bb.0: 543; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 544; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0 545; AVX512-NEXT: retq 546; 547; AVX512VL-LABEL: splatvar_shift_v8i32: 548; AVX512VL: # %bb.0: 549; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 550; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 551; AVX512VL-NEXT: retq 552; 553; X86-AVX1-LABEL: splatvar_shift_v8i32: 554; X86-AVX1: # %bb.0: 555; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 556; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 557; X86-AVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2 558; X86-AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 559; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 560; X86-AVX1-NEXT: retl 561; 562; X86-AVX2-LABEL: splatvar_shift_v8i32: 563; X86-AVX2: # %bb.0: 564; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 565; X86-AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 566; X86-AVX2-NEXT: retl 567 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 568 %shift = shl <8 x i32> %a, %splat 569 ret <8 x i32> %shift 570} 571 572define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 573; AVX1-LABEL: splatvar_shift_v16i16: 574; AVX1: # %bb.0: 575; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 576; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 577; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 578; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 579; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 580; AVX1-NEXT: retq 581; 582; AVX2-LABEL: splatvar_shift_v16i16: 583; AVX2: # %bb.0: 584; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 585; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 586; AVX2-NEXT: retq 587; 588; XOPAVX1-LABEL: splatvar_shift_v16i16: 589; XOPAVX1: # %bb.0: 590; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 591; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 592; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 593; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 594; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 595; XOPAVX1-NEXT: retq 596; 597; XOPAVX2-LABEL: splatvar_shift_v16i16: 598; XOPAVX2: # %bb.0: 599; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 600; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 601; XOPAVX2-NEXT: retq 602; 603; AVX512-LABEL: splatvar_shift_v16i16: 604; AVX512: # %bb.0: 605; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 606; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0 607; AVX512-NEXT: retq 608; 609; AVX512VL-LABEL: splatvar_shift_v16i16: 610; AVX512VL: # %bb.0: 611; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 612; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 613; AVX512VL-NEXT: retq 614; 615; X86-AVX1-LABEL: splatvar_shift_v16i16: 616; X86-AVX1: # %bb.0: 617; X86-AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 618; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 619; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 620; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 621; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 622; X86-AVX1-NEXT: retl 623; 624; X86-AVX2-LABEL: splatvar_shift_v16i16: 625; X86-AVX2: # %bb.0: 626; X86-AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 627; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 628; X86-AVX2-NEXT: retl 629 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 630 %shift = shl <16 x i16> %a, %splat 631 ret <16 x i16> %shift 632} 633 634define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 635; AVX1-LABEL: splatvar_shift_v32i8: 636; AVX1: # %bb.0: 637; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 638; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 639; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 640; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 641; AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3 642; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 643; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3 644; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 645; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 646; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 647; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 648; AVX1-NEXT: retq 649; 650; AVX2-LABEL: splatvar_shift_v32i8: 651; AVX2: # %bb.0: 652; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 653; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 654; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 655; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 656; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 657; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 658; AVX2-NEXT: retq 659; 660; XOPAVX1-LABEL: splatvar_shift_v32i8: 661; XOPAVX1: # %bb.0: 662; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 663; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 664; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 665; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2 666; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 667; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 668; XOPAVX1-NEXT: retq 669; 670; XOPAVX2-LABEL: splatvar_shift_v32i8: 671; XOPAVX2: # %bb.0: 672; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 673; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 674; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2 675; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 676; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 677; XOPAVX2-NEXT: retq 678; 679; AVX512DQ-LABEL: splatvar_shift_v32i8: 680; AVX512DQ: # %bb.0: 681; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 682; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 683; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 684; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1 685; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 686; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 687; AVX512DQ-NEXT: retq 688; 689; AVX512BW-LABEL: splatvar_shift_v32i8: 690; AVX512BW: # %bb.0: 691; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 692; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 693; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 694; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 695; AVX512BW-NEXT: retq 696; 697; AVX512DQVL-LABEL: splatvar_shift_v32i8: 698; AVX512DQVL: # %bb.0: 699; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 700; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 701; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 702; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm2, %xmm1 703; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 704; AVX512DQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 705; AVX512DQVL-NEXT: retq 706; 707; AVX512BWVL-LABEL: splatvar_shift_v32i8: 708; AVX512BWVL: # %bb.0: 709; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 710; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 711; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0 712; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 713; AVX512BWVL-NEXT: retq 714; 715; X86-AVX1-LABEL: splatvar_shift_v32i8: 716; X86-AVX1: # %bb.0: 717; X86-AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 718; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 719; X86-AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 720; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 721; X86-AVX1-NEXT: vpsllw %xmm1, %xmm3, %xmm3 722; X86-AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 723; X86-AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3 724; X86-AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 725; X86-AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 726; X86-AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 727; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 728; X86-AVX1-NEXT: retl 729; 730; X86-AVX2-LABEL: splatvar_shift_v32i8: 731; X86-AVX2: # %bb.0: 732; X86-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 733; X86-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 734; X86-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 735; X86-AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 736; X86-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 737; X86-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 738; X86-AVX2-NEXT: retl 739 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 740 %shift = shl <32 x i8> %a, %splat 741 ret <32 x i8> %shift 742} 743 744; 745; Constant Shifts 746; 747 748define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { 749; AVX1-LABEL: constant_shift_v4i64: 750; AVX1: # %bb.0: 751; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 752; AVX1-NEXT: vpsllq $62, %xmm1, %xmm2 753; AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 754; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 755; AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 756; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 757; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 758; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 759; AVX1-NEXT: retq 760; 761; AVX2-LABEL: constant_shift_v4i64: 762; AVX2: # %bb.0: 763; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 764; AVX2-NEXT: retq 765; 766; XOPAVX1-LABEL: constant_shift_v4i64: 767; XOPAVX1: # %bb.0: 768; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm1 769; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 770; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0 771; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 772; XOPAVX1-NEXT: retq 773; 774; XOPAVX2-LABEL: constant_shift_v4i64: 775; XOPAVX2: # %bb.0: 776; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 777; XOPAVX2-NEXT: retq 778; 779; AVX512-LABEL: constant_shift_v4i64: 780; AVX512: # %bb.0: 781; AVX512-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 782; AVX512-NEXT: retq 783; 784; AVX512VL-LABEL: constant_shift_v4i64: 785; AVX512VL: # %bb.0: 786; AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 787; AVX512VL-NEXT: retq 788; 789; X86-AVX1-LABEL: constant_shift_v4i64: 790; X86-AVX1: # %bb.0: 791; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 792; X86-AVX1-NEXT: vpsllq $62, %xmm1, %xmm2 793; X86-AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 794; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 795; X86-AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 796; X86-AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 797; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 798; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 799; X86-AVX1-NEXT: retl 800; 801; X86-AVX2-LABEL: constant_shift_v4i64: 802; X86-AVX2: # %bb.0: 803; X86-AVX2-NEXT: vpsllvq {{\.LCPI.*}}, %ymm0, %ymm0 804; X86-AVX2-NEXT: retl 805 %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62> 806 ret <4 x i64> %shift 807} 808 809define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { 810; AVX1-LABEL: constant_shift_v8i32: 811; AVX1: # %bb.0: 812; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1 813; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 814; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 815; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 816; AVX1-NEXT: retq 817; 818; AVX2-LABEL: constant_shift_v8i32: 819; AVX2: # %bb.0: 820; AVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 821; AVX2-NEXT: retq 822; 823; XOPAVX1-LABEL: constant_shift_v8i32: 824; XOPAVX1: # %bb.0: 825; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1 826; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 827; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0 828; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 829; XOPAVX1-NEXT: retq 830; 831; XOPAVX2-LABEL: constant_shift_v8i32: 832; XOPAVX2: # %bb.0: 833; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 834; XOPAVX2-NEXT: retq 835; 836; AVX512-LABEL: constant_shift_v8i32: 837; AVX512: # %bb.0: 838; AVX512-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 839; AVX512-NEXT: retq 840; 841; AVX512VL-LABEL: constant_shift_v8i32: 842; AVX512VL: # %bb.0: 843; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 844; AVX512VL-NEXT: retq 845; 846; X86-AVX1-LABEL: constant_shift_v8i32: 847; X86-AVX1: # %bb.0: 848; X86-AVX1-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm1 849; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 850; X86-AVX1-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 851; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 852; X86-AVX1-NEXT: retl 853; 854; X86-AVX2-LABEL: constant_shift_v8i32: 855; X86-AVX2: # %bb.0: 856; X86-AVX2-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 857; X86-AVX2-NEXT: retl 858 %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7> 859 ret <8 x i32> %shift 860} 861 862define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { 863; AVX1-LABEL: constant_shift_v16i16: 864; AVX1: # %bb.0: 865; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1 866; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 867; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 868; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 869; AVX1-NEXT: retq 870; 871; AVX2-LABEL: constant_shift_v16i16: 872; AVX2: # %bb.0: 873; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 874; AVX2-NEXT: retq 875; 876; XOPAVX1-LABEL: constant_shift_v16i16: 877; XOPAVX1: # %bb.0: 878; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm1 879; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 880; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0 881; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 882; XOPAVX1-NEXT: retq 883; 884; XOPAVX2-LABEL: constant_shift_v16i16: 885; XOPAVX2: # %bb.0: 886; XOPAVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 887; XOPAVX2-NEXT: retq 888; 889; AVX512DQ-LABEL: constant_shift_v16i16: 890; AVX512DQ: # %bb.0: 891; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 892; AVX512DQ-NEXT: retq 893; 894; AVX512BW-LABEL: constant_shift_v16i16: 895; AVX512BW: # %bb.0: 896; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 897; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 898; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 899; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 900; AVX512BW-NEXT: retq 901; 902; AVX512DQVL-LABEL: constant_shift_v16i16: 903; AVX512DQVL: # %bb.0: 904; AVX512DQVL-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 905; AVX512DQVL-NEXT: retq 906; 907; AVX512BWVL-LABEL: constant_shift_v16i16: 908; AVX512BWVL: # %bb.0: 909; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 910; AVX512BWVL-NEXT: retq 911; 912; X86-AVX1-LABEL: constant_shift_v16i16: 913; X86-AVX1: # %bb.0: 914; X86-AVX1-NEXT: vpmullw {{\.LCPI.*}}, %xmm0, %xmm1 915; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 916; X86-AVX1-NEXT: vpmullw {{\.LCPI.*}}, %xmm0, %xmm0 917; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 918; X86-AVX1-NEXT: retl 919; 920; X86-AVX2-LABEL: constant_shift_v16i16: 921; X86-AVX2: # %bb.0: 922; X86-AVX2-NEXT: vpmullw {{\.LCPI.*}}, %ymm0, %ymm0 923; X86-AVX2-NEXT: retl 924 %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 925 ret <16 x i16> %shift 926} 927 928define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { 929; AVX1-LABEL: constant_shift_v32i8: 930; AVX1: # %bb.0: 931; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 932; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 933; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,64,32,16,8,4,2,1] 934; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 935; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 936; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 937; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 938; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128] 939; AVX1-NEXT: vpmullw %xmm5, %xmm1, %xmm1 940; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 941; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 942; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 943; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 944; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 945; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 946; AVX1-NEXT: vpmullw %xmm5, %xmm0, %xmm0 947; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 948; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 949; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 950; AVX1-NEXT: retq 951; 952; AVX2-LABEL: constant_shift_v32i8: 953; AVX2: # %bb.0: 954; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 955; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 956; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 957; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 958; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 959; AVX2-NEXT: vpsllw $2, %ymm0, %ymm1 960; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 961; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 962; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 963; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1 964; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 965; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 966; AVX2-NEXT: retq 967; 968; XOPAVX1-LABEL: constant_shift_v32i8: 969; XOPAVX1: # %bb.0: 970; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 971; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 972; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 973; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 974; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 975; XOPAVX1-NEXT: retq 976; 977; XOPAVX2-LABEL: constant_shift_v32i8: 978; XOPAVX2: # %bb.0: 979; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 980; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 981; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1 982; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0 983; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 984; XOPAVX2-NEXT: retq 985; 986; AVX512DQ-LABEL: constant_shift_v32i8: 987; AVX512DQ: # %bb.0: 988; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm1 989; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 990; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 991; AVX512DQ-NEXT: # ymm2 = mem[0,1,0,1] 992; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 993; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm1 994; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 995; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 996; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 997; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm1 998; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2 999; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1000; AVX512DQ-NEXT: retq 1001; 1002; AVX512BW-LABEL: constant_shift_v32i8: 1003; AVX512BW: # %bb.0: 1004; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1005; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 1006; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1007; AVX512BW-NEXT: retq 1008; 1009; AVX512DQVL-LABEL: constant_shift_v32i8: 1010; AVX512DQVL: # %bb.0: 1011; AVX512DQVL-NEXT: vpsllw $4, %ymm0, %ymm1 1012; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1013; AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 1014; AVX512DQVL-NEXT: # ymm2 = mem[0,1,0,1] 1015; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1016; AVX512DQVL-NEXT: vpsllw $2, %ymm0, %ymm1 1017; AVX512DQVL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 1018; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1019; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1020; AVX512DQVL-NEXT: vpaddb %ymm0, %ymm0, %ymm1 1021; AVX512DQVL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1022; AVX512DQVL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1023; AVX512DQVL-NEXT: retq 1024; 1025; AVX512BWVL-LABEL: constant_shift_v32i8: 1026; AVX512BWVL: # %bb.0: 1027; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 1028; AVX512BWVL-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 1029; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 1030; AVX512BWVL-NEXT: retq 1031; 1032; X86-AVX1-LABEL: constant_shift_v32i8: 1033; X86-AVX1: # %bb.0: 1034; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1035; X86-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1036; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,64,32,16,8,4,2,1] 1037; X86-AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 1038; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 1039; X86-AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 1040; X86-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1041; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128] 1042; X86-AVX1-NEXT: vpmullw %xmm5, %xmm1, %xmm1 1043; X86-AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 1044; X86-AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 1045; X86-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1046; X86-AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 1047; X86-AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 1048; X86-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1049; X86-AVX1-NEXT: vpmullw %xmm5, %xmm0, %xmm0 1050; X86-AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 1051; X86-AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 1052; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1053; X86-AVX1-NEXT: retl 1054; 1055; X86-AVX2-LABEL: constant_shift_v32i8: 1056; X86-AVX2: # %bb.0: 1057; X86-AVX2-NEXT: vpsllw $4, %ymm0, %ymm1 1058; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1 1059; X86-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32] 1060; X86-AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1061; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1062; X86-AVX2-NEXT: vpsllw $2, %ymm0, %ymm1 1063; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm1, %ymm1 1064; X86-AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1065; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1066; X86-AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm1 1067; X86-AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2 1068; X86-AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 1069; X86-AVX2-NEXT: retl 1070 %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 1071 ret <32 x i8> %shift 1072} 1073 1074; 1075; Uniform Constant Shifts 1076; 1077 1078define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind { 1079; AVX1-LABEL: splatconstant_shift_v4i64: 1080; AVX1: # %bb.0: 1081; AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1082; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1083; AVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1084; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1085; AVX1-NEXT: retq 1086; 1087; AVX2-LABEL: splatconstant_shift_v4i64: 1088; AVX2: # %bb.0: 1089; AVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1090; AVX2-NEXT: retq 1091; 1092; XOPAVX1-LABEL: splatconstant_shift_v4i64: 1093; XOPAVX1: # %bb.0: 1094; XOPAVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1095; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1096; XOPAVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1097; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1098; XOPAVX1-NEXT: retq 1099; 1100; XOPAVX2-LABEL: splatconstant_shift_v4i64: 1101; XOPAVX2: # %bb.0: 1102; XOPAVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1103; XOPAVX2-NEXT: retq 1104; 1105; AVX512-LABEL: splatconstant_shift_v4i64: 1106; AVX512: # %bb.0: 1107; AVX512-NEXT: vpsllq $7, %ymm0, %ymm0 1108; AVX512-NEXT: retq 1109; 1110; AVX512VL-LABEL: splatconstant_shift_v4i64: 1111; AVX512VL: # %bb.0: 1112; AVX512VL-NEXT: vpsllq $7, %ymm0, %ymm0 1113; AVX512VL-NEXT: retq 1114; 1115; X86-AVX1-LABEL: splatconstant_shift_v4i64: 1116; X86-AVX1: # %bb.0: 1117; X86-AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 1118; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1119; X86-AVX1-NEXT: vpsllq $7, %xmm0, %xmm0 1120; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1121; X86-AVX1-NEXT: retl 1122; 1123; X86-AVX2-LABEL: splatconstant_shift_v4i64: 1124; X86-AVX2: # %bb.0: 1125; X86-AVX2-NEXT: vpsllq $7, %ymm0, %ymm0 1126; X86-AVX2-NEXT: retl 1127 %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 1128 ret <4 x i64> %shift 1129} 1130 1131define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind { 1132; AVX1-LABEL: splatconstant_shift_v8i32: 1133; AVX1: # %bb.0: 1134; AVX1-NEXT: vpslld $5, %xmm0, %xmm1 1135; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1136; AVX1-NEXT: vpslld $5, %xmm0, %xmm0 1137; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1138; AVX1-NEXT: retq 1139; 1140; AVX2-LABEL: splatconstant_shift_v8i32: 1141; AVX2: # %bb.0: 1142; AVX2-NEXT: vpslld $5, %ymm0, %ymm0 1143; AVX2-NEXT: retq 1144; 1145; XOPAVX1-LABEL: splatconstant_shift_v8i32: 1146; XOPAVX1: # %bb.0: 1147; XOPAVX1-NEXT: vpslld $5, %xmm0, %xmm1 1148; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1149; XOPAVX1-NEXT: vpslld $5, %xmm0, %xmm0 1150; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1151; XOPAVX1-NEXT: retq 1152; 1153; XOPAVX2-LABEL: splatconstant_shift_v8i32: 1154; XOPAVX2: # %bb.0: 1155; XOPAVX2-NEXT: vpslld $5, %ymm0, %ymm0 1156; XOPAVX2-NEXT: retq 1157; 1158; AVX512-LABEL: splatconstant_shift_v8i32: 1159; AVX512: # %bb.0: 1160; AVX512-NEXT: vpslld $5, %ymm0, %ymm0 1161; AVX512-NEXT: retq 1162; 1163; AVX512VL-LABEL: splatconstant_shift_v8i32: 1164; AVX512VL: # %bb.0: 1165; AVX512VL-NEXT: vpslld $5, %ymm0, %ymm0 1166; AVX512VL-NEXT: retq 1167; 1168; X86-AVX1-LABEL: splatconstant_shift_v8i32: 1169; X86-AVX1: # %bb.0: 1170; X86-AVX1-NEXT: vpslld $5, %xmm0, %xmm1 1171; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1172; X86-AVX1-NEXT: vpslld $5, %xmm0, %xmm0 1173; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1174; X86-AVX1-NEXT: retl 1175; 1176; X86-AVX2-LABEL: splatconstant_shift_v8i32: 1177; X86-AVX2: # %bb.0: 1178; X86-AVX2-NEXT: vpslld $5, %ymm0, %ymm0 1179; X86-AVX2-NEXT: retl 1180 %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 1181 ret <8 x i32> %shift 1182} 1183 1184define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind { 1185; AVX1-LABEL: splatconstant_shift_v16i16: 1186; AVX1: # %bb.0: 1187; AVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1188; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1189; AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1190; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1191; AVX1-NEXT: retq 1192; 1193; AVX2-LABEL: splatconstant_shift_v16i16: 1194; AVX2: # %bb.0: 1195; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1196; AVX2-NEXT: retq 1197; 1198; XOPAVX1-LABEL: splatconstant_shift_v16i16: 1199; XOPAVX1: # %bb.0: 1200; XOPAVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1201; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1202; XOPAVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1203; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1204; XOPAVX1-NEXT: retq 1205; 1206; XOPAVX2-LABEL: splatconstant_shift_v16i16: 1207; XOPAVX2: # %bb.0: 1208; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1209; XOPAVX2-NEXT: retq 1210; 1211; AVX512-LABEL: splatconstant_shift_v16i16: 1212; AVX512: # %bb.0: 1213; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0 1214; AVX512-NEXT: retq 1215; 1216; AVX512VL-LABEL: splatconstant_shift_v16i16: 1217; AVX512VL: # %bb.0: 1218; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0 1219; AVX512VL-NEXT: retq 1220; 1221; X86-AVX1-LABEL: splatconstant_shift_v16i16: 1222; X86-AVX1: # %bb.0: 1223; X86-AVX1-NEXT: vpsllw $3, %xmm0, %xmm1 1224; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1225; X86-AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1226; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1227; X86-AVX1-NEXT: retl 1228; 1229; X86-AVX2-LABEL: splatconstant_shift_v16i16: 1230; X86-AVX2: # %bb.0: 1231; X86-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1232; X86-AVX2-NEXT: retl 1233 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1234 ret <16 x i16> %shift 1235} 1236 1237define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { 1238; AVX1-LABEL: splatconstant_shift_v32i8: 1239; AVX1: # %bb.0: 1240; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1241; AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 1242; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] 1243; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1244; AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1245; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1246; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1247; AVX1-NEXT: retq 1248; 1249; AVX2-LABEL: splatconstant_shift_v32i8: 1250; AVX2: # %bb.0: 1251; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1252; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1253; AVX2-NEXT: retq 1254; 1255; XOPAVX1-LABEL: splatconstant_shift_v32i8: 1256; XOPAVX1: # %bb.0: 1257; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1258; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] 1259; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 1260; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 1261; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1262; XOPAVX1-NEXT: retq 1263; 1264; XOPAVX2-LABEL: splatconstant_shift_v32i8: 1265; XOPAVX2: # %bb.0: 1266; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1267; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1268; XOPAVX2-NEXT: retq 1269; 1270; AVX512-LABEL: splatconstant_shift_v32i8: 1271; AVX512: # %bb.0: 1272; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0 1273; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1274; AVX512-NEXT: retq 1275; 1276; AVX512VL-LABEL: splatconstant_shift_v32i8: 1277; AVX512VL: # %bb.0: 1278; AVX512VL-NEXT: vpsllw $3, %ymm0, %ymm0 1279; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1280; AVX512VL-NEXT: retq 1281; 1282; X86-AVX1-LABEL: splatconstant_shift_v32i8: 1283; X86-AVX1: # %bb.0: 1284; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1285; X86-AVX1-NEXT: vpsllw $3, %xmm1, %xmm1 1286; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] 1287; X86-AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1288; X86-AVX1-NEXT: vpsllw $3, %xmm0, %xmm0 1289; X86-AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1290; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1291; X86-AVX1-NEXT: retl 1292; 1293; X86-AVX2-LABEL: splatconstant_shift_v32i8: 1294; X86-AVX2: # %bb.0: 1295; X86-AVX2-NEXT: vpsllw $3, %ymm0, %ymm0 1296; X86-AVX2-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 1297; X86-AVX2-NEXT: retl 1298 %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1299 ret <32 x i8> %shift 1300} 1301