1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512VL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512,AVX512VBMI2 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVBMI2 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2 14 15; Just one 32-bit run to make sure we do reasonable things for i64 cases. 16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE2 17 18declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 19 20; 21; Variable Shifts 22; 23 24define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { 25; SSE2-LABEL: var_funnnel_v2i32: 26; SSE2: # %bb.0: 27; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 28; SSE2-NEXT: pslld $23, %xmm1 29; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 30; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 31; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 32; SSE2-NEXT: pmuludq %xmm1, %xmm0 33; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 34; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 35; SSE2-NEXT: pmuludq %xmm2, %xmm1 36; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 37; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 38; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 39; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 40; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 41; SSE2-NEXT: por %xmm3, %xmm0 42; SSE2-NEXT: retq 43; 44; SSE41-LABEL: var_funnnel_v2i32: 45; SSE41: # %bb.0: 46; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 47; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 48; SSE41-NEXT: pslld $23, %xmm1 49; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1 50; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 51; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 52; SSE41-NEXT: pmuludq %xmm2, %xmm3 53; SSE41-NEXT: pmuludq %xmm1, %xmm0 54; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 55; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 56; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 57; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 58; SSE41-NEXT: por %xmm1, %xmm0 59; SSE41-NEXT: retq 60; 61; AVX1-LABEL: var_funnnel_v2i32: 62; AVX1: # %bb.0: 63; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 64; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 65; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 66; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1 67; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 68; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 69; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 70; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 71; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 72; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 73; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 74; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 75; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 76; AVX1-NEXT: retq 77; 78; AVX2-LABEL: var_funnnel_v2i32: 79; AVX2: # %bb.0: 80; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] 81; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 82; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 83; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] 84; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 85; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 86; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 87; AVX2-NEXT: retq 88; 89; AVX512F-LABEL: var_funnnel_v2i32: 90; AVX512F: # %bb.0: 91; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 92; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 93; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 94; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 95; AVX512F-NEXT: vzeroupper 96; AVX512F-NEXT: retq 97; 98; AVX512VL-LABEL: var_funnnel_v2i32: 99; AVX512VL: # %bb.0: 100; AVX512VL-NEXT: vprolvd %xmm1, %xmm0, %xmm0 101; AVX512VL-NEXT: retq 102; 103; AVX512BW-LABEL: var_funnnel_v2i32: 104; AVX512BW: # %bb.0: 105; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 106; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 107; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 108; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 109; AVX512BW-NEXT: vzeroupper 110; AVX512BW-NEXT: retq 111; 112; AVX512VLBW-LABEL: var_funnnel_v2i32: 113; AVX512VLBW: # %bb.0: 114; AVX512VLBW-NEXT: vprolvd %xmm1, %xmm0, %xmm0 115; AVX512VLBW-NEXT: retq 116; 117; AVX512VBMI2-LABEL: var_funnnel_v2i32: 118; AVX512VBMI2: # %bb.0: 119; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 120; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 121; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 122; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 123; AVX512VBMI2-NEXT: vzeroupper 124; AVX512VBMI2-NEXT: retq 125; 126; AVX512VLVBMI2-LABEL: var_funnnel_v2i32: 127; AVX512VLVBMI2: # %bb.0: 128; AVX512VLVBMI2-NEXT: vprolvd %xmm1, %xmm0, %xmm0 129; AVX512VLVBMI2-NEXT: retq 130; 131; XOP-LABEL: var_funnnel_v2i32: 132; XOP: # %bb.0: 133; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0 134; XOP-NEXT: retq 135; 136; X86-SSE2-LABEL: var_funnnel_v2i32: 137; X86-SSE2: # %bb.0: 138; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 139; X86-SSE2-NEXT: pslld $23, %xmm1 140; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1 141; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 142; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 143; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 144; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 145; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 146; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 147; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 148; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 149; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 150; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 151; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 152; X86-SSE2-NEXT: por %xmm3, %xmm0 153; X86-SSE2-NEXT: retl 154 %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt) 155 ret <2 x i32> %res 156} 157 158; 159; Uniform Variable Shifts 160; 161 162define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { 163; SSE2-LABEL: splatvar_funnnel_v2i32: 164; SSE2: # %bb.0: 165; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 166; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 167; SSE2-NEXT: pslld $23, %xmm1 168; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 169; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 170; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 171; SSE2-NEXT: pmuludq %xmm1, %xmm0 172; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 173; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 174; SSE2-NEXT: pmuludq %xmm2, %xmm1 175; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 176; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 177; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 178; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 179; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 180; SSE2-NEXT: por %xmm3, %xmm0 181; SSE2-NEXT: retq 182; 183; SSE41-LABEL: splatvar_funnnel_v2i32: 184; SSE41: # %bb.0: 185; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 186; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 187; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 188; SSE41-NEXT: pslld $23, %xmm1 189; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1 190; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 191; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 192; SSE41-NEXT: pmuludq %xmm2, %xmm3 193; SSE41-NEXT: pmuludq %xmm1, %xmm0 194; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 195; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 196; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 197; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 198; SSE41-NEXT: por %xmm1, %xmm0 199; SSE41-NEXT: retq 200; 201; AVX1-LABEL: splatvar_funnnel_v2i32: 202; AVX1: # %bb.0: 203; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 204; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 205; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 206; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 207; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1 208; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 209; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 210; AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 211; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 212; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 213; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 214; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 215; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 216; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 217; AVX1-NEXT: retq 218; 219; AVX2-LABEL: splatvar_funnnel_v2i32: 220; AVX2: # %bb.0: 221; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 222; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] 223; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 224; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero 225; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm2 226; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] 227; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 228; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 229; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 230; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 231; AVX2-NEXT: retq 232; 233; AVX512F-LABEL: splatvar_funnnel_v2i32: 234; AVX512F: # %bb.0: 235; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 236; AVX512F-NEXT: vpbroadcastd %xmm1, %xmm1 237; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 238; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 239; AVX512F-NEXT: vzeroupper 240; AVX512F-NEXT: retq 241; 242; AVX512VL-LABEL: splatvar_funnnel_v2i32: 243; AVX512VL: # %bb.0: 244; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1 245; AVX512VL-NEXT: vprolvd %xmm1, %xmm0, %xmm0 246; AVX512VL-NEXT: retq 247; 248; AVX512BW-LABEL: splatvar_funnnel_v2i32: 249; AVX512BW: # %bb.0: 250; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 251; AVX512BW-NEXT: vpbroadcastd %xmm1, %xmm1 252; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 253; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 254; AVX512BW-NEXT: vzeroupper 255; AVX512BW-NEXT: retq 256; 257; AVX512VLBW-LABEL: splatvar_funnnel_v2i32: 258; AVX512VLBW: # %bb.0: 259; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %xmm1 260; AVX512VLBW-NEXT: vprolvd %xmm1, %xmm0, %xmm0 261; AVX512VLBW-NEXT: retq 262; 263; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32: 264; AVX512VBMI2: # %bb.0: 265; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 266; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %xmm1 267; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 268; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 269; AVX512VBMI2-NEXT: vzeroupper 270; AVX512VBMI2-NEXT: retq 271; 272; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32: 273; AVX512VLVBMI2: # %bb.0: 274; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %xmm1 275; AVX512VLVBMI2-NEXT: vprolvd %xmm1, %xmm0, %xmm0 276; AVX512VLVBMI2-NEXT: retq 277; 278; XOPAVX1-LABEL: splatvar_funnnel_v2i32: 279; XOPAVX1: # %bb.0: 280; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 281; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 282; XOPAVX1-NEXT: retq 283; 284; XOPAVX2-LABEL: splatvar_funnnel_v2i32: 285; XOPAVX2: # %bb.0: 286; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 287; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0 288; XOPAVX2-NEXT: retq 289; 290; X86-SSE2-LABEL: splatvar_funnnel_v2i32: 291; X86-SSE2: # %bb.0: 292; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 293; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1 294; X86-SSE2-NEXT: pslld $23, %xmm1 295; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1 296; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 297; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 298; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 299; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 300; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 301; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 302; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 303; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 304; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 305; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 306; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 307; X86-SSE2-NEXT: por %xmm3, %xmm0 308; X86-SSE2-NEXT: retl 309 %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer 310 %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat) 311 ret <2 x i32> %res 312} 313 314; 315; Constant Shifts 316; 317 318define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { 319; SSE2-LABEL: constant_funnnel_v2i32: 320; SSE2: # %bb.0: 321; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,32,1,1] 322; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 323; SSE2-NEXT: pmuludq %xmm1, %xmm0 324; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 325; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 326; SSE2-NEXT: pmuludq %xmm2, %xmm1 327; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 328; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 329; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 330; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 331; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 332; SSE2-NEXT: por %xmm3, %xmm0 333; SSE2-NEXT: retq 334; 335; SSE41-LABEL: constant_funnnel_v2i32: 336; SSE41: # %bb.0: 337; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [16,32,1,1] 338; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 339; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 340; SSE41-NEXT: pmuludq %xmm2, %xmm3 341; SSE41-NEXT: pmuludq %xmm1, %xmm0 342; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 343; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 344; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 345; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 346; SSE41-NEXT: por %xmm1, %xmm0 347; SSE41-NEXT: retq 348; 349; AVX1-LABEL: constant_funnnel_v2i32: 350; AVX1: # %bb.0: 351; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,32,1,1] 352; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 353; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 354; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 355; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 356; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 357; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 358; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 359; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 360; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 361; AVX1-NEXT: retq 362; 363; AVX2-LABEL: constant_funnnel_v2i32: 364; AVX2: # %bb.0: 365; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 366; AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 367; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 368; AVX2-NEXT: retq 369; 370; AVX512F-LABEL: constant_funnnel_v2i32: 371; AVX512F: # %bb.0: 372; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 373; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u> 374; AVX512F-NEXT: vprolvd %zmm1, %zmm0, %zmm0 375; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 376; AVX512F-NEXT: vzeroupper 377; AVX512F-NEXT: retq 378; 379; AVX512VL-LABEL: constant_funnnel_v2i32: 380; AVX512VL: # %bb.0: 381; AVX512VL-NEXT: vprolvd {{.*}}(%rip), %xmm0, %xmm0 382; AVX512VL-NEXT: retq 383; 384; AVX512BW-LABEL: constant_funnnel_v2i32: 385; AVX512BW: # %bb.0: 386; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 387; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u> 388; AVX512BW-NEXT: vprolvd %zmm1, %zmm0, %zmm0 389; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 390; AVX512BW-NEXT: vzeroupper 391; AVX512BW-NEXT: retq 392; 393; AVX512VLBW-LABEL: constant_funnnel_v2i32: 394; AVX512VLBW: # %bb.0: 395; AVX512VLBW-NEXT: vprolvd {{.*}}(%rip), %xmm0, %xmm0 396; AVX512VLBW-NEXT: retq 397; 398; AVX512VBMI2-LABEL: constant_funnnel_v2i32: 399; AVX512VBMI2: # %bb.0: 400; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 401; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u> 402; AVX512VBMI2-NEXT: vprolvd %zmm1, %zmm0, %zmm0 403; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 404; AVX512VBMI2-NEXT: vzeroupper 405; AVX512VBMI2-NEXT: retq 406; 407; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32: 408; AVX512VLVBMI2: # %bb.0: 409; AVX512VLVBMI2-NEXT: vprolvd {{.*}}(%rip), %xmm0, %xmm0 410; AVX512VLVBMI2-NEXT: retq 411; 412; XOP-LABEL: constant_funnnel_v2i32: 413; XOP: # %bb.0: 414; XOP-NEXT: vprotd {{.*}}(%rip), %xmm0, %xmm0 415; XOP-NEXT: retq 416; 417; X86-SSE2-LABEL: constant_funnnel_v2i32: 418; X86-SSE2: # %bb.0: 419; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,32,1,1] 420; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 421; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 422; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 423; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 424; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 425; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 426; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 427; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 428; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 429; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 430; X86-SSE2-NEXT: por %xmm3, %xmm0 431; X86-SSE2-NEXT: retl 432 %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>) 433 ret <2 x i32> %res 434} 435 436; 437; Uniform Constant Shifts 438; 439 440define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind { 441; SSE2-LABEL: splatconstant_funnnel_v2i32: 442; SSE2: # %bb.0: 443; SSE2-NEXT: movdqa %xmm0, %xmm2 444; SSE2-NEXT: psrld $28, %xmm2 445; SSE2-NEXT: movdqa %xmm0, %xmm1 446; SSE2-NEXT: pslld $4, %xmm1 447; SSE2-NEXT: por %xmm2, %xmm1 448; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 449; SSE2-NEXT: movaps %xmm1, %xmm0 450; SSE2-NEXT: retq 451; 452; SSE41-LABEL: splatconstant_funnnel_v2i32: 453; SSE41: # %bb.0: 454; SSE41-NEXT: movdqa %xmm0, %xmm2 455; SSE41-NEXT: psrld $28, %xmm2 456; SSE41-NEXT: movdqa %xmm0, %xmm1 457; SSE41-NEXT: pslld $4, %xmm1 458; SSE41-NEXT: por %xmm2, %xmm1 459; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] 460; SSE41-NEXT: movdqa %xmm1, %xmm0 461; SSE41-NEXT: retq 462; 463; AVX1-LABEL: splatconstant_funnnel_v2i32: 464; AVX1: # %bb.0: 465; AVX1-NEXT: vpsrld $28, %xmm0, %xmm1 466; AVX1-NEXT: vpslld $4, %xmm0, %xmm2 467; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 468; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 469; AVX1-NEXT: retq 470; 471; AVX2-LABEL: splatconstant_funnnel_v2i32: 472; AVX2: # %bb.0: 473; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 474; AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 475; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 476; AVX2-NEXT: retq 477; 478; AVX512F-LABEL: splatconstant_funnnel_v2i32: 479; AVX512F: # %bb.0: 480; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 481; AVX512F-NEXT: vprold $4, %zmm0, %zmm0 482; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 483; AVX512F-NEXT: vzeroupper 484; AVX512F-NEXT: retq 485; 486; AVX512VL-LABEL: splatconstant_funnnel_v2i32: 487; AVX512VL: # %bb.0: 488; AVX512VL-NEXT: vprold $4, %xmm0, %xmm0 489; AVX512VL-NEXT: retq 490; 491; AVX512BW-LABEL: splatconstant_funnnel_v2i32: 492; AVX512BW: # %bb.0: 493; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 494; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0 495; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 496; AVX512BW-NEXT: vzeroupper 497; AVX512BW-NEXT: retq 498; 499; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32: 500; AVX512VLBW: # %bb.0: 501; AVX512VLBW-NEXT: vprold $4, %xmm0, %xmm0 502; AVX512VLBW-NEXT: retq 503; 504; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32: 505; AVX512VBMI2: # %bb.0: 506; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 507; AVX512VBMI2-NEXT: vprold $4, %zmm0, %zmm0 508; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 509; AVX512VBMI2-NEXT: vzeroupper 510; AVX512VBMI2-NEXT: retq 511; 512; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32: 513; AVX512VLVBMI2: # %bb.0: 514; AVX512VLVBMI2-NEXT: vprold $4, %xmm0, %xmm0 515; AVX512VLVBMI2-NEXT: retq 516; 517; XOP-LABEL: splatconstant_funnnel_v2i32: 518; XOP: # %bb.0: 519; XOP-NEXT: vprotd $4, %xmm0, %xmm0 520; XOP-NEXT: retq 521; 522; X86-SSE2-LABEL: splatconstant_funnnel_v2i32: 523; X86-SSE2: # %bb.0: 524; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 525; X86-SSE2-NEXT: psrld $28, %xmm2 526; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 527; X86-SSE2-NEXT: pslld $4, %xmm1 528; X86-SSE2-NEXT: por %xmm2, %xmm1 529; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 530; X86-SSE2-NEXT: movaps %xmm1, %xmm0 531; X86-SSE2-NEXT: retl 532 %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>) 533 ret <2 x i32> %res 534} 535