1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX,X64,SKX64 3; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL,X64,KNL64 4; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX,X86,SKX32 5; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL,X86,KNL32 6 7;expand 128 -> 256 include <4 x float> <2 x double> 8define <8 x float> @expand(<4 x float> %a) { 9; SKX-LABEL: expand: 10; SKX: # %bb.0: 11; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 12; SKX-NEXT: movb $5, %al 13; SKX-NEXT: kmovd %eax, %k1 14; SKX-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 15; SKX-NEXT: ret{{[l|q]}} 16; 17; KNL-LABEL: expand: 18; KNL: # %bb.0: 19; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 20; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 21; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4,5,6,7] 22; KNL-NEXT: ret{{[l|q]}} 23 %res = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 5, i32 1, i32 5, i32 5, i32 5, i32 5, i32 5> 24 ret <8 x float> %res 25} 26 27define <8 x float> @expand1(<4 x float> %a ) { 28; SKX-LABEL: expand1: 29; SKX: # %bb.0: 30; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 31; SKX-NEXT: movb $-86, %al 32; SKX-NEXT: kmovd %eax, %k1 33; SKX-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 34; SKX-NEXT: ret{{[l|q]}} 35; 36; KNL-LABEL: expand1: 37; KNL: # %bb.0: 38; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 39; KNL-NEXT: vmovaps {{.*#+}} ymm1 = [16,0,18,1,20,2,22,3] 40; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 41; KNL-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 42; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 43; KNL-NEXT: ret{{[l|q]}} 44 %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 45 ret <8 x float> %res 46} 47 48;Expand 128 -> 256 test <2 x double> -> <4 x double> 49define <4 x double> @expand2(<2 x double> %a) { 50; CHECK-LABEL: expand2: 51; CHECK: # %bb.0: 52; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 53; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = zero,zero,ymm0[0,1] 54; CHECK-NEXT: vmovaps %xmm0, %xmm0 55; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 56; CHECK-NEXT: ret{{[l|q]}} 57 %res = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 2, i32 1> 58 ret <4 x double> %res 59} 60 61;expand 128 -> 256 include case <4 x i32> <8 x i32> 62define <8 x i32> @expand3(<4 x i32> %a ) { 63; SKX-LABEL: expand3: 64; SKX: # %bb.0: 65; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 66; SKX-NEXT: movb $-127, %al 67; SKX-NEXT: kmovd %eax, %k1 68; SKX-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} 69; SKX-NEXT: ret{{[l|q]}} 70; 71; KNL-LABEL: expand3: 72; KNL: # %bb.0: 73; KNL-NEXT: vbroadcastsd %xmm0, %ymm0 74; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 75; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6],ymm0[7] 76; KNL-NEXT: ret{{[l|q]}} 77 %res = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <8 x i32> <i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,i32 5> 78 ret <8 x i32> %res 79} 80 81;expand 128 -> 256 include case <2 x i64> <4 x i64> 82define <4 x i64> @expand4(<2 x i64> %a ) { 83; SKX-LABEL: expand4: 84; SKX: # %bb.0: 85; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 86; SKX-NEXT: movb $9, %al 87; SKX-NEXT: kmovd %eax, %k1 88; SKX-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} 89; SKX-NEXT: ret{{[l|q]}} 90; 91; KNL-LABEL: expand4: 92; KNL: # %bb.0: 93; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 94; KNL-NEXT: vperm2f128 {{.*#+}} ymm1 = zero,zero,ymm0[0,1] 95; KNL-NEXT: vmovaps %xmm0, %xmm0 96; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 97; KNL-NEXT: ret{{[l|q]}} 98 %res = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <4 x i32> <i32 2, i32 0, i32 0, i32 3> 99 ret <4 x i64> %res 100} 101 102;Negative test for 128-> 256 103define <8 x float> @expand5(<4 x float> %a ) { 104; SKX-LABEL: expand5: 105; SKX: # %bb.0: 106; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 107; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 108; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0] 109; SKX-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0 110; SKX-NEXT: ret{{[l|q]}} 111; 112; KNL-LABEL: expand5: 113; KNL: # %bb.0: 114; KNL-NEXT: vbroadcastss %xmm0, %ymm0 115; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 116; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 117; KNL-NEXT: ret{{[l|q]}} 118 %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4> 119 ret <8 x float> %res 120} 121 122;expand 256 -> 512 include <8 x float> <16 x float> 123define <8 x float> @expand6(<4 x float> %a ) { 124; CHECK-LABEL: expand6: 125; CHECK: # %bb.0: 126; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 127; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 128; CHECK-NEXT: ret{{[l|q]}} 129 %res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 130 ret <8 x float> %res 131} 132 133define <16 x float> @expand7(<8 x float> %a) { 134; SKX-LABEL: expand7: 135; SKX: # %bb.0: 136; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 137; SKX-NEXT: movw $1285, %ax # imm = 0x505 138; SKX-NEXT: kmovd %eax, %k1 139; SKX-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 140; SKX-NEXT: ret{{[l|q]}} 141; 142; KNL-LABEL: expand7: 143; KNL: # %bb.0: 144; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 145; KNL-NEXT: movw $1285, %ax # imm = 0x505 146; KNL-NEXT: kmovw %eax, %k1 147; KNL-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 148; KNL-NEXT: ret{{[l|q]}} 149 %res = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 2, i32 8, i32 3, i32 8, i32 8, i32 8, i32 8, i32 8> 150 ret <16 x float> %res 151} 152 153define <16 x float> @expand8(<8 x float> %a ) { 154; SKX-LABEL: expand8: 155; SKX: # %bb.0: 156; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 157; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA 158; SKX-NEXT: kmovd %eax, %k1 159; SKX-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 160; SKX-NEXT: ret{{[l|q]}} 161; 162; KNL-LABEL: expand8: 163; KNL: # %bb.0: 164; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 165; KNL-NEXT: movw $-21846, %ax # imm = 0xAAAA 166; KNL-NEXT: kmovw %eax, %k1 167; KNL-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} 168; KNL-NEXT: ret{{[l|q]}} 169 %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 170 ret <16 x float> %res 171} 172 173;expand 256 -> 512 include <4 x double> <8 x double> 174define <8 x double> @expand9(<4 x double> %a) { 175; SKX-LABEL: expand9: 176; SKX: # %bb.0: 177; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 178; SKX-NEXT: movb $-127, %al 179; SKX-NEXT: kmovd %eax, %k1 180; SKX-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} 181; SKX-NEXT: ret{{[l|q]}} 182; 183; KNL-LABEL: expand9: 184; KNL: # %bb.0: 185; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 186; KNL-NEXT: movb $-127, %al 187; KNL-NEXT: kmovw %eax, %k1 188; KNL-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} 189; KNL-NEXT: ret{{[l|q]}} 190 %res = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 1> 191 ret <8 x double> %res 192} 193 194define <16 x i32> @expand10(<8 x i32> %a ) { 195; SKX-LABEL: expand10: 196; SKX: # %bb.0: 197; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 198; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA 199; SKX-NEXT: kmovd %eax, %k1 200; SKX-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} 201; SKX-NEXT: ret{{[l|q]}} 202; 203; KNL-LABEL: expand10: 204; KNL: # %bb.0: 205; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 206; KNL-NEXT: movw $-21846, %ax # imm = 0xAAAA 207; KNL-NEXT: kmovw %eax, %k1 208; KNL-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} 209; KNL-NEXT: ret{{[l|q]}} 210 %res = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 211 ret <16 x i32> %res 212} 213 214define <8 x i64> @expand11(<4 x i64> %a) { 215; SKX-LABEL: expand11: 216; SKX: # %bb.0: 217; SKX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 218; SKX-NEXT: movb $-127, %al 219; SKX-NEXT: kmovd %eax, %k1 220; SKX-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} 221; SKX-NEXT: ret{{[l|q]}} 222; 223; KNL-LABEL: expand11: 224; KNL: # %bb.0: 225; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 226; KNL-NEXT: movb $-127, %al 227; KNL-NEXT: kmovw %eax, %k1 228; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} 229; KNL-NEXT: ret{{[l|q]}} 230 %res = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 1> 231 ret <8 x i64> %res 232} 233 234;Negative test for 256-> 512 235define <16 x float> @expand12(<8 x float> %a) { 236; CHECK-LABEL: expand12: 237; CHECK: # %bb.0: 238; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 239; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16] 240; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 241; CHECK-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 242; CHECK-NEXT: vmovaps %zmm1, %zmm0 243; CHECK-NEXT: ret{{[l|q]}} 244 %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8,i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> 245 ret <16 x float> %res 246} 247 248define <16 x float> @expand13(<8 x float> %a ) { 249; CHECK-LABEL: expand13: 250; CHECK: # %bb.0: 251; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 252; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 253; CHECK-NEXT: ret{{[l|q]}} 254 %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 255 ret <16 x float> %res 256} 257 258; The function checks for a case where the vector is mixed values vector ,and the mask points on zero elements from this vector. 259 260define <8 x float> @expand14(<4 x float> %a) { 261; SKX-LABEL: expand14: 262; SKX: # %bb.0: 263; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 264; SKX-NEXT: movb $20, %al 265; SKX-NEXT: kmovd %eax, %k1 266; SKX-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} 267; SKX-NEXT: ret{{[l|q]}} 268; 269; KNL-LABEL: expand14: 270; KNL: # %bb.0: 271; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 272; KNL-NEXT: vmovaps {{.*#+}} ymm1 = [16,17,0,19,1,21,22,23] 273; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 274; KNL-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 275; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 276; KNL-NEXT: ret{{[l|q]}} 277 %addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0> 278 %res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> <i32 3, i32 3, i32 4, i32 0, i32 5, i32 0, i32 0, i32 0> 279 ret <8 x float> %res 280} 281 282;Negative test. 283define <8 x float> @expand15(<4 x float> %a) { 284; SKX-LABEL: expand15: 285; SKX: # %bb.0: 286; SKX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 287; SKX-NEXT: vmovaps {{.*#+}} ymm1 = <u,u,0,u,1,u,u,u> 288; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 289; SKX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3],ymm0[4],mem[5,6,7] 290; SKX-NEXT: ret{{[l|q]}} 291; 292; KNL-LABEL: expand15: 293; KNL: # %bb.0: 294; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3] 295; KNL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] 296; KNL-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1],ymm0[2],mem[3],ymm0[4],mem[5,6,7] 297; KNL-NEXT: ret{{[l|q]}} 298 %addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0> 299 %res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 0, i32 5, i32 0, i32 0, i32 0> 300 ret <8 x float> %res 301} 302 303 304; Shuffle to blend test 305 306define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){ 307; SKX64-LABEL: test_mm512_mask_blend_epi8: 308; SKX64: # %bb.0: # %entry 309; SKX64-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA 310; SKX64-NEXT: kmovq %rax, %k1 311; SKX64-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} 312; SKX64-NEXT: retq 313; 314; KNL-LABEL: test_mm512_mask_blend_epi8: 315; KNL: # %bb.0: # %entry 316; KNL-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 317; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 318; KNL-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 319; KNL-NEXT: ret{{[l|q]}} 320; 321; SKX32-LABEL: test_mm512_mask_blend_epi8: 322; SKX32: # %bb.0: # %entry 323; SKX32-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 324; SKX32-NEXT: kmovd %eax, %k0 325; SKX32-NEXT: kunpckdq %k0, %k0, %k1 326; SKX32-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} 327; SKX32-NEXT: retl 328entry: 329 %0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32> <i32 64, i32 1, i32 66, i32 3, i32 68, i32 5, i32 70, i32 7, i32 72, i32 9, i32 74, i32 11, i32 76, i32 13, i32 78, i32 15, i32 80, i32 17, i32 82, i32 19, i32 84, i32 21, i32 86, i32 23, i32 88, i32 25, i32 90, i32 27, i32 92, i32 29, i32 94, i32 31, i32 96, i32 33, i32 98, i32 35, i32 100, i32 37, i32 102, i32 39, i32 104, i32 41, i32 106, i32 43, i32 108, i32 45, i32 110, i32 47, i32 112, i32 49, i32 114, i32 51, i32 116, i32 53, i32 118, i32 55, i32 120, i32 57, i32 122, i32 59, i32 124, i32 61, i32 126, i32 63> 330 ret <64 x i8> %0 331} 332 333define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){ 334; SKX-LABEL: test_mm512_mask_blend_epi16: 335; SKX: # %bb.0: # %entry 336; SKX-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 337; SKX-NEXT: kmovd %eax, %k1 338; SKX-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} 339; SKX-NEXT: ret{{[l|q]}} 340; 341; KNL64-LABEL: test_mm512_mask_blend_epi16: 342; KNL64: # %bb.0: # %entry 343; KNL64-NEXT: vpternlogd $216, {{.*}}(%rip){1to16}, %zmm1, %zmm0 344; KNL64-NEXT: retq 345; 346; KNL32-LABEL: test_mm512_mask_blend_epi16: 347; KNL32: # %bb.0: # %entry 348; KNL32-NEXT: vpternlogd $216, {{\.LCPI.*}}{1to16}, %zmm1, %zmm0 349; KNL32-NEXT: retl 350entry: 351 %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31> 352 ret <32 x i16> %0 353} 354 355define <16 x i32> @test_mm512_mask_blend_epi32(<16 x i32> %A, <16 x i32> %W){ 356; SKX-LABEL: test_mm512_mask_blend_epi32: 357; SKX: # %bb.0: # %entry 358; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA 359; SKX-NEXT: kmovd %eax, %k1 360; SKX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 361; SKX-NEXT: ret{{[l|q]}} 362; 363; KNL-LABEL: test_mm512_mask_blend_epi32: 364; KNL: # %bb.0: # %entry 365; KNL-NEXT: movw $-21846, %ax # imm = 0xAAAA 366; KNL-NEXT: kmovw %eax, %k1 367; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 368; KNL-NEXT: ret{{[l|q]}} 369entry: 370 %0 = shufflevector <16 x i32> %A, <16 x i32> %W, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 371 ret <16 x i32> %0 372} 373 374define <8 x i64> @test_mm512_mask_blend_epi64(<8 x i64> %A, <8 x i64> %W){ 375; SKX-LABEL: test_mm512_mask_blend_epi64: 376; SKX: # %bb.0: # %entry 377; SKX-NEXT: movb $-86, %al 378; SKX-NEXT: kmovd %eax, %k1 379; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 380; SKX-NEXT: ret{{[l|q]}} 381; 382; KNL-LABEL: test_mm512_mask_blend_epi64: 383; KNL: # %bb.0: # %entry 384; KNL-NEXT: movb $-86, %al 385; KNL-NEXT: kmovw %eax, %k1 386; KNL-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 387; KNL-NEXT: ret{{[l|q]}} 388entry: 389 %0 = shufflevector <8 x i64> %A, <8 x i64> %W, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 390 ret <8 x i64> %0 391} 392 393define <16 x float> @test_mm512_mask_blend_ps(<16 x float> %A, <16 x float> %W){ 394; SKX-LABEL: test_mm512_mask_blend_ps: 395; SKX: # %bb.0: # %entry 396; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA 397; SKX-NEXT: kmovd %eax, %k1 398; SKX-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 399; SKX-NEXT: ret{{[l|q]}} 400; 401; KNL-LABEL: test_mm512_mask_blend_ps: 402; KNL: # %bb.0: # %entry 403; KNL-NEXT: movw $-21846, %ax # imm = 0xAAAA 404; KNL-NEXT: kmovw %eax, %k1 405; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 406; KNL-NEXT: ret{{[l|q]}} 407entry: 408 %0 = shufflevector <16 x float> %A, <16 x float> %W, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 409 ret <16 x float> %0 410} 411 412define <8 x double> @test_mm512_mask_blend_pd(<8 x double> %A, <8 x double> %W){ 413; SKX-LABEL: test_mm512_mask_blend_pd: 414; SKX: # %bb.0: # %entry 415; SKX-NEXT: movb $-88, %al 416; SKX-NEXT: kmovd %eax, %k1 417; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 418; SKX-NEXT: ret{{[l|q]}} 419; 420; KNL-LABEL: test_mm512_mask_blend_pd: 421; KNL: # %bb.0: # %entry 422; KNL-NEXT: movb $-88, %al 423; KNL-NEXT: kmovw %eax, %k1 424; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 425; KNL-NEXT: ret{{[l|q]}} 426entry: 427 %0 = shufflevector <8 x double> %A, <8 x double> %W, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 428 ret <8 x double> %0 429} 430 431 432define <32 x i8> @test_mm256_mask_blend_epi8(<32 x i8> %A, <32 x i8> %W){ 433; SKX-LABEL: test_mm256_mask_blend_epi8: 434; SKX: # %bb.0: # %entry 435; SKX-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA 436; SKX-NEXT: kmovd %eax, %k1 437; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} 438; SKX-NEXT: ret{{[l|q]}} 439; 440; KNL-LABEL: test_mm256_mask_blend_epi8: 441; KNL: # %bb.0: # %entry 442; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 443; KNL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 444; KNL-NEXT: ret{{[l|q]}} 445entry: 446 %0 = shufflevector <32 x i8> %A, <32 x i8> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31> 447 ret <32 x i8> %0 448} 449 450define <16 x i8> @test_mm_mask_blend_epi8(<16 x i8> %A, <16 x i8> %W){ 451; SKX-LABEL: test_mm_mask_blend_epi8: 452; SKX: # %bb.0: # %entry 453; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA 454; SKX-NEXT: kmovd %eax, %k1 455; SKX-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} 456; SKX-NEXT: ret{{[l|q]}} 457; 458; KNL-LABEL: test_mm_mask_blend_epi8: 459; KNL: # %bb.0: # %entry 460; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 461; KNL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 462; KNL-NEXT: ret{{[l|q]}} 463entry: 464 %0 = shufflevector <16 x i8> %A, <16 x i8> %W, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 465 ret <16 x i8> %0 466} 467 468; PR34370 469define <8 x float> @test_masked_permps_v8f32(<8 x float>* %vp, <8 x float> %vec2) { 470; SKX64-LABEL: test_masked_permps_v8f32: 471; SKX64: # %bb.0: 472; SKX64-NEXT: vmovaps (%rdi), %ymm2 473; SKX64-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15] 474; SKX64-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1 475; SKX64-NEXT: vmovaps %ymm1, %ymm0 476; SKX64-NEXT: retq 477; 478; KNL64-LABEL: test_masked_permps_v8f32: 479; KNL64: # %bb.0: 480; KNL64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 481; KNL64-NEXT: vmovaps (%rdi), %ymm1 482; KNL64-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] 483; KNL64-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 484; KNL64-NEXT: vmovaps %ymm1, %ymm0 485; KNL64-NEXT: retq 486; 487; SKX32-LABEL: test_masked_permps_v8f32: 488; SKX32: # %bb.0: 489; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax 490; SKX32-NEXT: vmovaps (%eax), %ymm2 491; SKX32-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15] 492; SKX32-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1 493; SKX32-NEXT: vmovaps %ymm1, %ymm0 494; SKX32-NEXT: retl 495; 496; KNL32-LABEL: test_masked_permps_v8f32: 497; KNL32: # %bb.0: 498; KNL32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 499; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax 500; KNL32-NEXT: vmovaps (%eax), %ymm1 501; KNL32-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] 502; KNL32-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 503; KNL32-NEXT: vmovaps %ymm1, %ymm0 504; KNL32-NEXT: retl 505 %vec = load <8 x float>, <8 x float>* %vp 506 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 3, i32 0, i32 7, i32 6, i32 3, i32 0> 507 %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> %vec2 508 ret <8 x float> %res 509} 510 511define <16 x float> @test_masked_permps_v16f32(<16 x float>* %vp, <16 x float> %vec2) { 512; X64-LABEL: test_masked_permps_v16f32: 513; X64: # %bb.0: 514; X64-NEXT: vmovaps (%rdi), %zmm2 515; X64-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] 516; X64-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 517; X64-NEXT: vmovaps %zmm1, %zmm0 518; X64-NEXT: retq 519; 520; X86-LABEL: test_masked_permps_v16f32: 521; X86: # %bb.0: 522; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 523; X86-NEXT: vmovaps (%eax), %zmm2 524; X86-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] 525; X86-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 526; X86-NEXT: vmovaps %zmm1, %zmm0 527; X86-NEXT: retl 528 %vec = load <16 x float>, <16 x float>* %vp 529 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 13, i32 11, i32 9, i32 14, i32 12, i32 10, i32 8, i32 7, i32 6, i32 3, i32 0, i32 7, i32 6, i32 3, i32 0> 530 %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, <16 x float> %shuf, <16 x float> %vec2 531 ret <16 x float> %res 532} 533 534define void @test_demandedelts_pshufb_v32i8_v16i8(<2 x i32>* %src, <8 x i32>* %dst) { 535; SKX64-LABEL: test_demandedelts_pshufb_v32i8_v16i8: 536; SKX64: # %bb.0: 537; SKX64-NEXT: vmovdqa 32(%rdi), %xmm0 538; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 539; SKX64-NEXT: vmovdqa %ymm0, 672(%rsi) 540; SKX64-NEXT: vmovdqa 208(%rdi), %xmm0 541; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero 542; SKX64-NEXT: vmovdqa %ymm0, 832(%rsi) 543; SKX64-NEXT: vzeroupper 544; SKX64-NEXT: retq 545; 546; KNL64-LABEL: test_demandedelts_pshufb_v32i8_v16i8: 547; KNL64: # %bb.0: 548; KNL64-NEXT: vmovdqa 32(%rdi), %xmm0 549; KNL64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 550; KNL64-NEXT: vmovdqa %ymm0, 672(%rsi) 551; KNL64-NEXT: vmovdqa 208(%rdi), %xmm0 552; KNL64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero 553; KNL64-NEXT: vmovdqa %ymm0, 832(%rsi) 554; KNL64-NEXT: retq 555; 556; SKX32-LABEL: test_demandedelts_pshufb_v32i8_v16i8: 557; SKX32: # %bb.0: 558; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax 559; SKX32-NEXT: movl {{[0-9]+}}(%esp), %ecx 560; SKX32-NEXT: vmovdqa 32(%ecx), %xmm0 561; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 562; SKX32-NEXT: vmovdqa %ymm0, 672(%eax) 563; SKX32-NEXT: vmovdqa 208(%ecx), %xmm0 564; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero 565; SKX32-NEXT: vmovdqa %ymm0, 832(%eax) 566; SKX32-NEXT: vzeroupper 567; SKX32-NEXT: retl 568; 569; KNL32-LABEL: test_demandedelts_pshufb_v32i8_v16i8: 570; KNL32: # %bb.0: 571; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax 572; KNL32-NEXT: vmovdqa 32(%eax), %xmm0 573; KNL32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 574; KNL32-NEXT: movl {{[0-9]+}}(%esp), %ecx 575; KNL32-NEXT: vmovdqa %ymm0, 672(%ecx) 576; KNL32-NEXT: vmovdqa 208(%eax), %xmm0 577; KNL32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero 578; KNL32-NEXT: vmovdqa %ymm0, 832(%ecx) 579; KNL32-NEXT: retl 580 %t64 = bitcast <2 x i32>* %src to <16 x i32>* 581 %t87 = load <16 x i32>, <16 x i32>* %t64, align 64 582 %t88 = extractelement <16 x i32> %t87, i64 11 583 %t89 = insertelement <8 x i32> <i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %t88, i64 0 584 %t90 = insertelement <8 x i32> %t89, i32 %t88, i64 1 585 %ptridx49.i = getelementptr inbounds <8 x i32>, <8 x i32>* %dst, i64 21 586 store <8 x i32> %t90, <8 x i32>* %ptridx49.i, align 32 587 %ptridx56.i = getelementptr inbounds <2 x i32>, <2 x i32>* %src, i64 24 588 %t00 = bitcast <2 x i32>* %ptridx56.i to <16 x i32>* 589 %t09 = load <16 x i32>, <16 x i32>* %t00, align 64 590 %t10 = extractelement <16 x i32> %t09, i64 5 591 %t11 = insertelement <8 x i32> <i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %t10, i64 0 592 %t12 = extractelement <16 x i32> %t09, i64 4 593 %t13 = insertelement <8 x i32> %t11, i32 %t12, i64 1 594 %ptridx64.i = getelementptr inbounds <8 x i32>, <8 x i32>* %dst, i64 26 595 store <8 x i32> %t13, <8 x i32>* %ptridx64.i, align 32 596 ret void 597} 598 599define <32 x float> @PR47534(<8 x float> %tmp) { 600; CHECK-LABEL: PR47534: 601; CHECK: # %bb.0: 602; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 603; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 604; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [7,25,26,27,7,29,30,31,7,25,26,27,7,29,30,31] 605; CHECK-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 606; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 607; CHECK-NEXT: ret{{[l|q]}} 608 %tmp1 = shufflevector <8 x float> %tmp, <8 x float> undef, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 609 %tmp2 = shufflevector <32 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <32 x float> undef, <32 x i32> <i32 39, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 30, i32 31> 610 %tmp18 = shufflevector <32 x float> %tmp2, <32 x float> %tmp1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 39, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 30, i32 31> 611 ret <32 x float> %tmp18 612} 613 614%union1= type { <16 x float> } 615@src1 = external dso_local local_unnamed_addr global %union1, align 64 616 617define void @PR43170(<16 x float>* %a0) { 618; SKX64-LABEL: PR43170: 619; SKX64: # %bb.0: # %entry 620; SKX64-NEXT: vmovaps {{.*}}(%rip), %ymm0 621; SKX64-NEXT: vmovaps %zmm0, (%rdi) 622; SKX64-NEXT: vzeroupper 623; SKX64-NEXT: retq 624; 625; KNL64-LABEL: PR43170: 626; KNL64: # %bb.0: # %entry 627; KNL64-NEXT: vmovaps {{.*}}(%rip), %ymm0 628; KNL64-NEXT: vmovaps %zmm0, (%rdi) 629; KNL64-NEXT: retq 630; 631; SKX32-LABEL: PR43170: 632; SKX32: # %bb.0: # %entry 633; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax 634; SKX32-NEXT: vmovaps src1, %ymm0 635; SKX32-NEXT: vmovaps %zmm0, (%eax) 636; SKX32-NEXT: vzeroupper 637; SKX32-NEXT: retl 638; 639; KNL32-LABEL: PR43170: 640; KNL32: # %bb.0: # %entry 641; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax 642; KNL32-NEXT: vmovaps src1, %ymm0 643; KNL32-NEXT: vmovaps %zmm0, (%eax) 644; KNL32-NEXT: retl 645entry: 646 %0 = load <8 x float>, <8 x float>* bitcast (%union1* @src1 to <8 x float>*), align 64 647 %1 = shufflevector <8 x float> %0, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 648 store <16 x float> %1, <16 x float>* %a0, align 64 649 ret void 650} 651