1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s 3 4; AVX2 Logical Shift Left 5 6define <16 x i16> @test_sllw_1(<16 x i16> %InVec) { 7; CHECK-LABEL: test_sllw_1: 8; CHECK: # BB#0: # %entry 9; CHECK-NEXT: retq 10entry: 11 %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 12 ret <16 x i16> %shl 13} 14 15define <16 x i16> @test_sllw_2(<16 x i16> %InVec) { 16; CHECK-LABEL: test_sllw_2: 17; CHECK: # BB#0: # %entry 18; CHECK-NEXT: vpaddw %ymm0, %ymm0, %ymm0 19; CHECK-NEXT: retq 20entry: 21 %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 22 ret <16 x i16> %shl 23} 24 25define <16 x i16> @test_sllw_3(<16 x i16> %InVec) { 26; CHECK-LABEL: test_sllw_3: 27; CHECK: # BB#0: # %entry 28; CHECK-NEXT: vpsllw $15, %ymm0, %ymm0 29; CHECK-NEXT: retq 30entry: 31 %shl = shl <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 32 ret <16 x i16> %shl 33} 34 35define <8 x i32> @test_slld_1(<8 x i32> %InVec) { 36; CHECK-LABEL: test_slld_1: 37; CHECK: # BB#0: # %entry 38; CHECK-NEXT: retq 39entry: 40 %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 41 ret <8 x i32> %shl 42} 43 44define <8 x i32> @test_slld_2(<8 x i32> %InVec) { 45; CHECK-LABEL: test_slld_2: 46; CHECK: # BB#0: # %entry 47; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0 48; CHECK-NEXT: retq 49entry: 50 %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 51 ret <8 x i32> %shl 52} 53 54define <8 x i32> @test_vpslld_var(i32 %shift) { 55; CHECK-LABEL: test_vpslld_var: 56; CHECK: # BB#0: 57; CHECK-NEXT: vmovd %edi, %xmm0 58; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199] 59; CHECK-NEXT: vpslld %xmm0, %ymm1, %ymm0 60; CHECK-NEXT: retq 61 %amt = insertelement <8 x i32> undef, i32 %shift, i32 0 62 %tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt 63 ret <8 x i32> %tmp 64} 65 66define <8 x i32> @test_slld_3(<8 x i32> %InVec) { 67; CHECK-LABEL: test_slld_3: 68; CHECK: # BB#0: # %entry 69; CHECK-NEXT: vpslld $31, %ymm0, %ymm0 70; CHECK-NEXT: retq 71entry: 72 %shl = shl <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 73 ret <8 x i32> %shl 74} 75 76define <4 x i64> @test_sllq_1(<4 x i64> %InVec) { 77; CHECK-LABEL: test_sllq_1: 78; CHECK: # BB#0: # %entry 79; CHECK-NEXT: retq 80entry: 81 %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0> 82 ret <4 x i64> %shl 83} 84 85define <4 x i64> @test_sllq_2(<4 x i64> %InVec) { 86; CHECK-LABEL: test_sllq_2: 87; CHECK: # BB#0: # %entry 88; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0 89; CHECK-NEXT: retq 90entry: 91 %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1> 92 ret <4 x i64> %shl 93} 94 95define <4 x i64> @test_sllq_3(<4 x i64> %InVec) { 96; CHECK-LABEL: test_sllq_3: 97; CHECK: # BB#0: # %entry 98; CHECK-NEXT: vpsllq $63, %ymm0, %ymm0 99; CHECK-NEXT: retq 100entry: 101 %shl = shl <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63> 102 ret <4 x i64> %shl 103} 104 105; AVX2 Arithmetic Shift 106 107define <16 x i16> @test_sraw_1(<16 x i16> %InVec) { 108; CHECK-LABEL: test_sraw_1: 109; CHECK: # BB#0: # %entry 110; CHECK-NEXT: retq 111entry: 112 %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 113 ret <16 x i16> %shl 114} 115 116define <16 x i16> @test_sraw_2(<16 x i16> %InVec) { 117; CHECK-LABEL: test_sraw_2: 118; CHECK: # BB#0: # %entry 119; CHECK-NEXT: vpsraw $1, %ymm0, %ymm0 120; CHECK-NEXT: retq 121entry: 122 %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 123 ret <16 x i16> %shl 124} 125 126define <16 x i16> @test_sraw_3(<16 x i16> %InVec) { 127; CHECK-LABEL: test_sraw_3: 128; CHECK: # BB#0: # %entry 129; CHECK-NEXT: vpsraw $15, %ymm0, %ymm0 130; CHECK-NEXT: retq 131entry: 132 %shl = ashr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 133 ret <16 x i16> %shl 134} 135 136define <8 x i32> @test_srad_1(<8 x i32> %InVec) { 137; CHECK-LABEL: test_srad_1: 138; CHECK: # BB#0: # %entry 139; CHECK-NEXT: retq 140entry: 141 %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 142 ret <8 x i32> %shl 143} 144 145define <8 x i32> @test_srad_2(<8 x i32> %InVec) { 146; CHECK-LABEL: test_srad_2: 147; CHECK: # BB#0: # %entry 148; CHECK-NEXT: vpsrad $1, %ymm0, %ymm0 149; CHECK-NEXT: retq 150entry: 151 %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 152 ret <8 x i32> %shl 153} 154 155define <8 x i32> @test_srad_3(<8 x i32> %InVec) { 156; CHECK-LABEL: test_srad_3: 157; CHECK: # BB#0: # %entry 158; CHECK-NEXT: vpsrad $31, %ymm0, %ymm0 159; CHECK-NEXT: retq 160entry: 161 %shl = ashr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 162 ret <8 x i32> %shl 163} 164 165; SSE Logical Shift Right 166 167define <16 x i16> @test_srlw_1(<16 x i16> %InVec) { 168; CHECK-LABEL: test_srlw_1: 169; CHECK: # BB#0: # %entry 170; CHECK-NEXT: retq 171entry: 172 %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 173 ret <16 x i16> %shl 174} 175 176define <16 x i16> @test_srlw_2(<16 x i16> %InVec) { 177; CHECK-LABEL: test_srlw_2: 178; CHECK: # BB#0: # %entry 179; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm0 180; CHECK-NEXT: retq 181entry: 182 %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 183 ret <16 x i16> %shl 184} 185 186define <16 x i16> @test_srlw_3(<16 x i16> %InVec) { 187; CHECK-LABEL: test_srlw_3: 188; CHECK: # BB#0: # %entry 189; CHECK-NEXT: vpsrlw $15, %ymm0, %ymm0 190; CHECK-NEXT: retq 191entry: 192 %shl = lshr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 193 ret <16 x i16> %shl 194} 195 196define <8 x i32> @test_srld_1(<8 x i32> %InVec) { 197; CHECK-LABEL: test_srld_1: 198; CHECK: # BB#0: # %entry 199; CHECK-NEXT: retq 200entry: 201 %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 202 ret <8 x i32> %shl 203} 204 205define <8 x i32> @test_srld_2(<8 x i32> %InVec) { 206; CHECK-LABEL: test_srld_2: 207; CHECK: # BB#0: # %entry 208; CHECK-NEXT: vpsrld $1, %ymm0, %ymm0 209; CHECK-NEXT: retq 210entry: 211 %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 212 ret <8 x i32> %shl 213} 214 215define <8 x i32> @test_srld_3(<8 x i32> %InVec) { 216; CHECK-LABEL: test_srld_3: 217; CHECK: # BB#0: # %entry 218; CHECK-NEXT: vpsrld $31, %ymm0, %ymm0 219; CHECK-NEXT: retq 220entry: 221 %shl = lshr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 222 ret <8 x i32> %shl 223} 224 225define <4 x i64> @test_srlq_1(<4 x i64> %InVec) { 226; CHECK-LABEL: test_srlq_1: 227; CHECK: # BB#0: # %entry 228; CHECK-NEXT: retq 229entry: 230 %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0> 231 ret <4 x i64> %shl 232} 233 234define <4 x i64> @test_srlq_2(<4 x i64> %InVec) { 235; CHECK-LABEL: test_srlq_2: 236; CHECK: # BB#0: # %entry 237; CHECK-NEXT: vpsrlq $1, %ymm0, %ymm0 238; CHECK-NEXT: retq 239entry: 240 %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1> 241 ret <4 x i64> %shl 242} 243 244define <4 x i64> @test_srlq_3(<4 x i64> %InVec) { 245; CHECK-LABEL: test_srlq_3: 246; CHECK: # BB#0: # %entry 247; CHECK-NEXT: vpsrlq $63, %ymm0, %ymm0 248; CHECK-NEXT: retq 249entry: 250 %shl = lshr <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63> 251 ret <4 x i64> %shl 252} 253 254define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind { 255; CHECK-LABEL: srl_trunc_and_v4i64: 256; CHECK: # BB#0: 257; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6] 258; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3] 259; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 260; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 261; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 262; CHECK-NEXT: vzeroupper 263; CHECK-NEXT: retq 264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8> 265 %trunc = trunc <4 x i64> %and to <4 x i32> 266 %sra = lshr <4 x i32> %x, %trunc 267 ret <4 x i32> %sra 268} 269 270; 271; Vectorized byte shifts 272; 273 274define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { 275; CHECK-LABEL: shl_8i16: 276; CHECK: # BB#0: 277; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 278; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 279; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 280; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 281; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 282; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 283; CHECK-NEXT: vzeroupper 284; CHECK-NEXT: retq 285 %shl = shl <8 x i16> %r, %a 286 ret <8 x i16> %shl 287} 288 289define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { 290; CHECK-LABEL: shl_16i16: 291; CHECK: # BB#0: 292; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 293; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 294; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 295; CHECK-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 296; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 297; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 298; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 299; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 300; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 301; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 302; CHECK-NEXT: retq 303 %shl = shl <16 x i16> %r, %a 304 ret <16 x i16> %shl 305} 306 307define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { 308; CHECK-LABEL: shl_32i8: 309; CHECK: # BB#0: 310; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1 311; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2 312; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 313; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 314; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2 315; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 316; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 317; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 318; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm2 319; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 320; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 321; CHECK-NEXT: retq 322 %shl = shl <32 x i8> %r, %a 323 ret <32 x i8> %shl 324} 325 326define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { 327; CHECK-LABEL: ashr_8i16: 328; CHECK: # BB#0: 329; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 330; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 331; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 332; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 333; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 334; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 335; CHECK-NEXT: vzeroupper 336; CHECK-NEXT: retq 337 %ashr = ashr <8 x i16> %r, %a 338 ret <8 x i16> %ashr 339} 340 341define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { 342; CHECK-LABEL: ashr_16i16: 343; CHECK: # BB#0: 344; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 345; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 346; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 347; CHECK-NEXT: vpsravd %ymm3, %ymm4, %ymm3 348; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 349; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 350; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 351; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 352; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 353; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 354; CHECK-NEXT: retq 355 %ashr = ashr <16 x i16> %r, %a 356 ret <16 x i16> %ashr 357} 358 359define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { 360; CHECK-LABEL: ashr_32i8: 361; CHECK: # BB#0: 362; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1 363; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 364; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 365; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4 366; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 367; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4 368; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 369; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 370; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4 371; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 372; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 373; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2 374; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 375; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 376; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3 377; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 378; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3 379; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 380; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 381; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3 382; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 383; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 384; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0 385; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 386; CHECK-NEXT: retq 387 %ashr = ashr <32 x i8> %r, %a 388 ret <32 x i8> %ashr 389} 390 391define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { 392; CHECK-LABEL: lshr_8i16: 393; CHECK: # BB#0: 394; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 395; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 396; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 397; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 398; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 399; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 400; CHECK-NEXT: vzeroupper 401; CHECK-NEXT: retq 402 %lshr = lshr <8 x i16> %r, %a 403 ret <8 x i16> %lshr 404} 405 406define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { 407; CHECK-LABEL: lshr_16i16: 408; CHECK: # BB#0: 409; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 410; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 411; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] 412; CHECK-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 413; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 414; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 415; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] 416; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 417; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 418; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 419; CHECK-NEXT: retq 420 %lshr = lshr <16 x i16> %r, %a 421 ret <16 x i16> %lshr 422} 423 424define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { 425; CHECK-LABEL: lshr_32i8: 426; CHECK: # BB#0: 427; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1 428; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm2 429; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 430; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 431; CHECK-NEXT: vpsrlw $2, %ymm0, %ymm2 432; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 433; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 434; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 435; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm2 436; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 437; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 438; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 439; CHECK-NEXT: retq 440 %lshr = lshr <32 x i8> %r, %a 441 ret <32 x i8> %lshr 442} 443