1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64 4 5; Splat patterns below 6 7define <4 x i32> @shl4(<4 x i32> %A) nounwind { 8; CHECK-LABEL: shl4: 9; CHECK: # %bb.0: # %entry 10; CHECK-NEXT: movdqa %xmm0, %xmm1 11; CHECK-NEXT: pslld $2, %xmm1 12; CHECK-NEXT: paddd %xmm0, %xmm0 13; CHECK-NEXT: pxor %xmm1, %xmm0 14; CHECK-NEXT: ret{{[l|q]}} 15entry: 16 %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 17 %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 18 %K = xor <4 x i32> %B, %C 19 ret <4 x i32> %K 20} 21 22define <4 x i32> @shr4(<4 x i32> %A) nounwind { 23; CHECK-LABEL: shr4: 24; CHECK: # %bb.0: # %entry 25; CHECK-NEXT: movdqa %xmm0, %xmm1 26; CHECK-NEXT: psrld $2, %xmm1 27; CHECK-NEXT: psrld $1, %xmm0 28; CHECK-NEXT: pxor %xmm1, %xmm0 29; CHECK-NEXT: ret{{[l|q]}} 30entry: 31 %B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 32 %C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 33 %K = xor <4 x i32> %B, %C 34 ret <4 x i32> %K 35} 36 37define <4 x i32> @sra4(<4 x i32> %A) nounwind { 38; CHECK-LABEL: sra4: 39; CHECK: # %bb.0: # %entry 40; CHECK-NEXT: movdqa %xmm0, %xmm1 41; CHECK-NEXT: psrad $2, %xmm1 42; CHECK-NEXT: psrad $1, %xmm0 43; CHECK-NEXT: pxor %xmm1, %xmm0 44; CHECK-NEXT: ret{{[l|q]}} 45entry: 46 %B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 47 %C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 48 %K = xor <4 x i32> %B, %C 49 ret <4 x i32> %K 50} 51 52define <2 x i64> @shl2(<2 x i64> %A) nounwind { 53; CHECK-LABEL: shl2: 54; CHECK: # %bb.0: # %entry 55; CHECK-NEXT: movdqa %xmm0, %xmm1 56; CHECK-NEXT: psllq $2, %xmm1 57; CHECK-NEXT: psllq $9, %xmm0 58; CHECK-NEXT: pxor %xmm1, %xmm0 59; CHECK-NEXT: ret{{[l|q]}} 60entry: 61 %B = shl <2 x i64> %A, < i64 2, i64 2> 62 %C = shl <2 x i64> %A, < i64 9, i64 9> 63 %K = xor <2 x i64> %B, %C 64 ret <2 x i64> %K 65} 66 67define <2 x i64> @shr2(<2 x i64> %A) nounwind { 68; CHECK-LABEL: shr2: 69; CHECK: # %bb.0: # %entry 70; CHECK-NEXT: movdqa %xmm0, %xmm1 71; CHECK-NEXT: psrlq $8, %xmm1 72; CHECK-NEXT: psrlq $1, %xmm0 73; CHECK-NEXT: pxor %xmm1, %xmm0 74; CHECK-NEXT: ret{{[l|q]}} 75entry: 76 %B = lshr <2 x i64> %A, < i64 8, i64 8> 77 %C = lshr <2 x i64> %A, < i64 1, i64 1> 78 %K = xor <2 x i64> %B, %C 79 ret <2 x i64> %K 80} 81 82define <8 x i16> @shl8(<8 x i16> %A) nounwind { 83; CHECK-LABEL: shl8: 84; CHECK: # %bb.0: # %entry 85; CHECK-NEXT: movdqa %xmm0, %xmm1 86; CHECK-NEXT: psllw $2, %xmm1 87; CHECK-NEXT: paddw %xmm0, %xmm0 88; CHECK-NEXT: pxor %xmm1, %xmm0 89; CHECK-NEXT: ret{{[l|q]}} 90entry: 91 %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 92 %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 93 %K = xor <8 x i16> %B, %C 94 ret <8 x i16> %K 95} 96 97define <8 x i16> @shr8(<8 x i16> %A) nounwind { 98; CHECK-LABEL: shr8: 99; CHECK: # %bb.0: # %entry 100; CHECK-NEXT: movdqa %xmm0, %xmm1 101; CHECK-NEXT: psrlw $2, %xmm1 102; CHECK-NEXT: psrlw $1, %xmm0 103; CHECK-NEXT: pxor %xmm1, %xmm0 104; CHECK-NEXT: ret{{[l|q]}} 105entry: 106 %B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 107 %C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 108 %K = xor <8 x i16> %B, %C 109 ret <8 x i16> %K 110} 111 112define <8 x i16> @sra8(<8 x i16> %A) nounwind { 113; CHECK-LABEL: sra8: 114; CHECK: # %bb.0: # %entry 115; CHECK-NEXT: movdqa %xmm0, %xmm1 116; CHECK-NEXT: psraw $2, %xmm1 117; CHECK-NEXT: psraw $1, %xmm0 118; CHECK-NEXT: pxor %xmm1, %xmm0 119; CHECK-NEXT: ret{{[l|q]}} 120entry: 121 %B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 122 %C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 123 %K = xor <8 x i16> %B, %C 124 ret <8 x i16> %K 125} 126 127; non-splat test 128 129define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind { 130; X86-LABEL: sll8_nosplat: 131; X86: # %bb.0: # %entry 132; X86-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4] 133; X86-NEXT: pmullw %xmm0, %xmm1 134; X86-NEXT: pmullw {{\.LCPI.*}}, %xmm0 135; X86-NEXT: pxor %xmm1, %xmm0 136; X86-NEXT: retl 137; 138; X64-LABEL: sll8_nosplat: 139; X64: # %bb.0: # %entry 140; X64-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4] 141; X64-NEXT: pmullw %xmm0, %xmm1 142; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 143; X64-NEXT: pxor %xmm1, %xmm0 144; X64-NEXT: retq 145entry: 146 %B = shl <8 x i16> %A, < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2> 147 %C = shl <8 x i16> %A, < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1> 148 %K = xor <8 x i16> %B, %C 149 ret <8 x i16> %K 150} 151 152define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind { 153; CHECK-LABEL: shr2_nosplat: 154; CHECK: # %bb.0: # %entry 155; CHECK-NEXT: movdqa %xmm0, %xmm2 156; CHECK-NEXT: psrlq $8, %xmm2 157; CHECK-NEXT: movdqa %xmm0, %xmm1 158; CHECK-NEXT: psrlq $1, %xmm1 159; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3] 160; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 161; CHECK-NEXT: xorps %xmm2, %xmm1 162; CHECK-NEXT: movaps %xmm1, %xmm0 163; CHECK-NEXT: ret{{[l|q]}} 164entry: 165 %B = lshr <2 x i64> %A, < i64 8, i64 1> 166 %C = lshr <2 x i64> %A, < i64 1, i64 0> 167 %K = xor <2 x i64> %B, %C 168 ret <2 x i64> %K 169} 170 171; Other shifts 172 173define <2 x i32> @shl2_other(<2 x i32> %A) nounwind { 174; CHECK-LABEL: shl2_other: 175; CHECK: # %bb.0: # %entry 176; CHECK-NEXT: movdqa %xmm0, %xmm1 177; CHECK-NEXT: pslld $2, %xmm1 178; CHECK-NEXT: pslld $9, %xmm0 179; CHECK-NEXT: pxor %xmm1, %xmm0 180; CHECK-NEXT: ret{{[l|q]}} 181entry: 182 %B = shl <2 x i32> %A, < i32 2, i32 2> 183 %C = shl <2 x i32> %A, < i32 9, i32 9> 184 %K = xor <2 x i32> %B, %C 185 ret <2 x i32> %K 186} 187 188define <2 x i32> @shr2_other(<2 x i32> %A) nounwind { 189; CHECK-LABEL: shr2_other: 190; CHECK: # %bb.0: # %entry 191; CHECK-NEXT: movdqa %xmm0, %xmm1 192; CHECK-NEXT: psrld $8, %xmm1 193; CHECK-NEXT: psrld $1, %xmm0 194; CHECK-NEXT: pxor %xmm1, %xmm0 195; CHECK-NEXT: ret{{[l|q]}} 196entry: 197 %B = lshr <2 x i32> %A, < i32 8, i32 8> 198 %C = lshr <2 x i32> %A, < i32 1, i32 1> 199 %K = xor <2 x i32> %B, %C 200 ret <2 x i32> %K 201} 202 203define <16 x i8> @shl9(<16 x i8> %A) nounwind { 204; X86-LABEL: shl9: 205; X86: # %bb.0: 206; X86-NEXT: psllw $3, %xmm0 207; X86-NEXT: pand {{\.LCPI.*}}, %xmm0 208; X86-NEXT: retl 209; 210; X64-LABEL: shl9: 211; X64: # %bb.0: 212; X64-NEXT: psllw $3, %xmm0 213; X64-NEXT: pand {{.*}}(%rip), %xmm0 214; X64-NEXT: retq 215 %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 216 ret <16 x i8> %B 217} 218 219define <16 x i8> @shr9(<16 x i8> %A) nounwind { 220; X86-LABEL: shr9: 221; X86: # %bb.0: 222; X86-NEXT: psrlw $3, %xmm0 223; X86-NEXT: pand {{\.LCPI.*}}, %xmm0 224; X86-NEXT: retl 225; 226; X64-LABEL: shr9: 227; X64: # %bb.0: 228; X64-NEXT: psrlw $3, %xmm0 229; X64-NEXT: pand {{.*}}(%rip), %xmm0 230; X64-NEXT: retq 231 %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 232 ret <16 x i8> %B 233} 234 235define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind { 236; CHECK-LABEL: sra_v16i8_7: 237; CHECK: # %bb.0: 238; CHECK-NEXT: pxor %xmm1, %xmm1 239; CHECK-NEXT: pcmpgtb %xmm0, %xmm1 240; CHECK-NEXT: movdqa %xmm1, %xmm0 241; CHECK-NEXT: ret{{[l|q]}} 242 %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 243 ret <16 x i8> %B 244} 245 246define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind { 247; X86-LABEL: sra_v16i8: 248; X86: # %bb.0: 249; X86-NEXT: psrlw $3, %xmm0 250; X86-NEXT: pand {{\.LCPI.*}}, %xmm0 251; X86-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 252; X86-NEXT: pxor %xmm1, %xmm0 253; X86-NEXT: psubb %xmm1, %xmm0 254; X86-NEXT: retl 255; 256; X64-LABEL: sra_v16i8: 257; X64: # %bb.0: 258; X64-NEXT: psrlw $3, %xmm0 259; X64-NEXT: pand {{.*}}(%rip), %xmm0 260; X64-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 261; X64-NEXT: pxor %xmm1, %xmm0 262; X64-NEXT: psubb %xmm1, %xmm0 263; X64-NEXT: retq 264 %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 265 ret <16 x i8> %B 266} 267