1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 3 4;;; Shift left 5define <8 x i32> @vshift00(<8 x i32> %a) { 6; CHECK-LABEL: vshift00: 7; CHECK: # BB#0: 8; CHECK-NEXT: vpslld $2, %xmm0, %xmm1 9; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 10; CHECK-NEXT: vpslld $2, %xmm0, %xmm0 11; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 12; CHECK-NEXT: retq 13 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 14 ret <8 x i32> %s 15} 16 17define <16 x i16> @vshift01(<16 x i16> %a) { 18; CHECK-LABEL: vshift01: 19; CHECK: # BB#0: 20; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1 21; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 22; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0 23; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 24; CHECK-NEXT: retq 25 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 26 ret <16 x i16> %s 27} 28 29define <4 x i64> @vshift02(<4 x i64> %a) { 30; CHECK-LABEL: vshift02: 31; CHECK: # BB#0: 32; CHECK-NEXT: vpsllq $2, %xmm0, %xmm1 33; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 34; CHECK-NEXT: vpsllq $2, %xmm0, %xmm0 35; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 36; CHECK-NEXT: retq 37 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 38 ret <4 x i64> %s 39} 40 41;;; Logical Shift right 42define <8 x i32> @vshift03(<8 x i32> %a) { 43; CHECK-LABEL: vshift03: 44; CHECK: # BB#0: 45; CHECK-NEXT: vpsrld $2, %xmm0, %xmm1 46; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 47; CHECK-NEXT: vpsrld $2, %xmm0, %xmm0 48; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 49; CHECK-NEXT: retq 50 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 51 ret <8 x i32> %s 52} 53 54define <16 x i16> @vshift04(<16 x i16> %a) { 55; CHECK-LABEL: vshift04: 56; CHECK: # BB#0: 57; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1 58; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 59; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 60; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 61; CHECK-NEXT: retq 62 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 63 ret <16 x i16> %s 64} 65 66define <4 x i64> @vshift05(<4 x i64> %a) { 67; CHECK-LABEL: vshift05: 68; CHECK: # BB#0: 69; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm1 70; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 71; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm0 72; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 73; CHECK-NEXT: retq 74 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 75 ret <4 x i64> %s 76} 77 78;;; Arithmetic Shift right 79define <8 x i32> @vshift06(<8 x i32> %a) { 80; CHECK-LABEL: vshift06: 81; CHECK: # BB#0: 82; CHECK-NEXT: vpsrad $2, %xmm0, %xmm1 83; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 84; CHECK-NEXT: vpsrad $2, %xmm0, %xmm0 85; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 86; CHECK-NEXT: retq 87 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 88 ret <8 x i32> %s 89} 90 91define <16 x i16> @vshift07(<16 x i16> %a) { 92; CHECK-LABEL: vshift07: 93; CHECK: # BB#0: 94; CHECK-NEXT: vpsraw $2, %xmm0, %xmm1 95; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 96; CHECK-NEXT: vpsraw $2, %xmm0, %xmm0 97; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 98; CHECK-NEXT: retq 99 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 100 ret <16 x i16> %s 101} 102 103define <32 x i8> @vshift09(<32 x i8> %a) { 104; CHECK-LABEL: vshift09: 105; CHECK: # BB#0: 106; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 107; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1 108; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 109; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 110; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 111; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1 112; CHECK-NEXT: vpsubb %xmm3, %xmm1, %xmm1 113; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 114; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 115; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 116; CHECK-NEXT: vpsubb %xmm3, %xmm0, %xmm0 117; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 118; CHECK-NEXT: retq 119 %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 120 ret <32 x i8> %s 121} 122 123define <32 x i8> @vshift10(<32 x i8> %a) { 124; CHECK-LABEL: vshift10: 125; CHECK: # BB#0: 126; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 127; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 128; CHECK-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 129; CHECK-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 130; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 131; CHECK-NEXT: retq 132 %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 133 ret <32 x i8> %s 134} 135 136define <32 x i8> @vshift11(<32 x i8> %a) { 137; CHECK-LABEL: vshift11: 138; CHECK: # BB#0: 139; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 140; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1 141; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 142; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 143; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0 144; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 145; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 146; CHECK-NEXT: retq 147 %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 148 ret <32 x i8> %s 149} 150 151define <32 x i8> @vshift12(<32 x i8> %a) { 152; CHECK-LABEL: vshift12: 153; CHECK: # BB#0: 154; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 155; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1 156; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] 157; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 158; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0 159; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 160; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 161; CHECK-NEXT: retq 162 %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 163 ret <32 x i8> %s 164} 165 166;;; Support variable shifts 167define <8 x i32> @vshift08(<8 x i32> %a) { 168; CHECK-LABEL: vshift08: 169; CHECK: # BB#0: 170; CHECK-NEXT: vpslld $23, %xmm0, %xmm1 171; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216] 172; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 173; CHECK-NEXT: vcvttps2dq %xmm1, %xmm1 174; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 175; CHECK-NEXT: vpslld $23, %xmm0, %xmm0 176; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 177; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 178; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 179; CHECK-NEXT: retq 180 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a 181 ret <8 x i32> %bitop 182} 183 184; PR15141 185define <4 x i32> @vshift13(<4 x i32> %in) { 186; CHECK-LABEL: vshift13: 187; CHECK: # BB#0: 188; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 189; CHECK-NEXT: retq 190 %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4> 191 ret <4 x i32> %T 192} 193 194;;; Uses shifts for sign extension 195define <16 x i16> @sext_v16i16(<16 x i16> %a) { 196; CHECK-LABEL: sext_v16i16: 197; CHECK: # BB#0: 198; CHECK-NEXT: vpsllw $8, %xmm0, %xmm1 199; CHECK-NEXT: vpsraw $8, %xmm1, %xmm1 200; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 201; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0 202; CHECK-NEXT: vpsraw $8, %xmm0, %xmm0 203; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 204; CHECK-NEXT: retq 205 %b = trunc <16 x i16> %a to <16 x i8> 206 %c = sext <16 x i8> %b to <16 x i16> 207 ret <16 x i16> %c 208} 209 210define <8 x i32> @sext_v8i32(<8 x i32> %a) { 211; CHECK-LABEL: sext_v8i32: 212; CHECK: # BB#0: 213; CHECK-NEXT: vpslld $16, %xmm0, %xmm1 214; CHECK-NEXT: vpsrad $16, %xmm1, %xmm1 215; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 216; CHECK-NEXT: vpslld $16, %xmm0, %xmm0 217; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0 218; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 219; CHECK-NEXT: retq 220 %b = trunc <8 x i32> %a to <8 x i16> 221 %c = sext <8 x i16> %b to <8 x i32> 222 ret <8 x i32> %c 223} 224