1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW 5 6define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 7; KNL-LABEL: zext_8x8mem_to_8x16: 8; KNL: # %bb.0: 9; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 10; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 11; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 12; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 13; KNL-NEXT: retq 14; 15; SKX-LABEL: zext_8x8mem_to_8x16: 16; SKX: # %bb.0: 17; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 18; SKX-NEXT: vpmovw2m %xmm0, %k1 19; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 20; SKX-NEXT: retq 21; 22; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16: 23; AVX512DQNOBW: # %bb.0: 24; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 25; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0 26; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0 27; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0 28; AVX512DQNOBW-NEXT: retq 29 %a = load <8 x i8>,<8 x i8> *%i,align 1 30 %x = zext <8 x i8> %a to <8 x i16> 31 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 32 ret <8 x i16> %ret 33} 34 35define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 36; KNL-LABEL: sext_8x8mem_to_8x16: 37; KNL: # %bb.0: 38; KNL-NEXT: vpmovsxbw (%rdi), %xmm1 39; KNL-NEXT: vpsllw $15, %xmm0, %xmm0 40; KNL-NEXT: vpsraw $15, %xmm0, %xmm0 41; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 42; KNL-NEXT: retq 43; 44; SKX-LABEL: sext_8x8mem_to_8x16: 45; SKX: # %bb.0: 46; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 47; SKX-NEXT: vpmovw2m %xmm0, %k1 48; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} 49; SKX-NEXT: retq 50; 51; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16: 52; AVX512DQNOBW: # %bb.0: 53; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1 54; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0 55; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0 56; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0 57; AVX512DQNOBW-NEXT: retq 58 %a = load <8 x i8>,<8 x i8> *%i,align 1 59 %x = sext <8 x i8> %a to <8 x i16> 60 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 61 ret <8 x i16> %ret 62} 63 64 65define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 66; KNL-LABEL: zext_16x8mem_to_16x16: 67; KNL: # %bb.0: 68; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 69; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 70; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 71; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 72; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 73; KNL-NEXT: retq 74; 75; SKX-LABEL: zext_16x8mem_to_16x16: 76; SKX: # %bb.0: 77; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 78; SKX-NEXT: vpmovb2m %xmm0, %k1 79; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 80; SKX-NEXT: retq 81; 82; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16: 83; AVX512DQNOBW: # %bb.0: 84; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 85; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 86; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 87; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 88; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 89; AVX512DQNOBW-NEXT: retq 90 %a = load <16 x i8>,<16 x i8> *%i,align 1 91 %x = zext <16 x i8> %a to <16 x i16> 92 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 93 ret <16 x i16> %ret 94} 95 96define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 97; KNL-LABEL: sext_16x8mem_to_16x16: 98; KNL: # %bb.0: 99; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 100; KNL-NEXT: vpmovsxbw (%rdi), %ymm1 101; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 102; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 103; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 104; KNL-NEXT: retq 105; 106; SKX-LABEL: sext_16x8mem_to_16x16: 107; SKX: # %bb.0: 108; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 109; SKX-NEXT: vpmovb2m %xmm0, %k1 110; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} 111; SKX-NEXT: retq 112; 113; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16: 114; AVX512DQNOBW: # %bb.0: 115; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 116; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm1 117; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 118; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 119; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 120; AVX512DQNOBW-NEXT: retq 121 %a = load <16 x i8>,<16 x i8> *%i,align 1 122 %x = sext <16 x i8> %a to <16 x i16> 123 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 124 ret <16 x i16> %ret 125} 126 127define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 128; ALL-LABEL: zext_16x8_to_16x16: 129; ALL: # %bb.0: 130; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 131; ALL-NEXT: retq 132 %x = zext <16 x i8> %a to <16 x i16> 133 ret <16 x i16> %x 134} 135 136define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 137; KNL-LABEL: zext_16x8_to_16x16_mask: 138; KNL: # %bb.0: 139; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 140; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 141; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 142; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 143; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 144; KNL-NEXT: retq 145; 146; SKX-LABEL: zext_16x8_to_16x16_mask: 147; SKX: # %bb.0: 148; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 149; SKX-NEXT: vpmovb2m %xmm1, %k1 150; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 151; SKX-NEXT: retq 152; 153; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask: 154; AVX512DQNOBW: # %bb.0: 155; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 156; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 157; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 158; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 159; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 160; AVX512DQNOBW-NEXT: retq 161 %x = zext <16 x i8> %a to <16 x i16> 162 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 163 ret <16 x i16> %ret 164} 165 166define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 167; ALL-LABEL: sext_16x8_to_16x16: 168; ALL: # %bb.0: 169; ALL-NEXT: vpmovsxbw %xmm0, %ymm0 170; ALL-NEXT: retq 171 %x = sext <16 x i8> %a to <16 x i16> 172 ret <16 x i16> %x 173} 174 175define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 176; KNL-LABEL: sext_16x8_to_16x16_mask: 177; KNL: # %bb.0: 178; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 179; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 180; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 181; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 182; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 183; KNL-NEXT: retq 184; 185; SKX-LABEL: sext_16x8_to_16x16_mask: 186; SKX: # %bb.0: 187; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 188; SKX-NEXT: vpmovb2m %xmm1, %k1 189; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} 190; SKX-NEXT: retq 191; 192; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask: 193; AVX512DQNOBW: # %bb.0: 194; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 195; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 196; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 197; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 198; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 199; AVX512DQNOBW-NEXT: retq 200 %x = sext <16 x i8> %a to <16 x i16> 201 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 202 ret <16 x i16> %ret 203} 204 205define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 206; KNL-LABEL: zext_32x8mem_to_32x16: 207; KNL: # %bb.0: 208; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 209; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 210; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 211; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 212; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 213; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 214; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 215; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 216; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 217; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 218; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 219; KNL-NEXT: vpandq %zmm2, %zmm0, %zmm0 220; KNL-NEXT: retq 221; 222; SKX-LABEL: zext_32x8mem_to_32x16: 223; SKX: # %bb.0: 224; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 225; SKX-NEXT: vpmovb2m %ymm0, %k1 226; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero 227; SKX-NEXT: retq 228; 229; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16: 230; AVX512DQNOBW: # %bb.0: 231; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 232; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 233; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 234; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 235; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 236; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 237; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 238; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 239; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 240; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 241; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 242; AVX512DQNOBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 243; AVX512DQNOBW-NEXT: retq 244 %a = load <32 x i8>,<32 x i8> *%i,align 1 245 %x = zext <32 x i8> %a to <32 x i16> 246 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 247 ret <32 x i16> %ret 248} 249 250define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 251; KNL-LABEL: sext_32x8mem_to_32x16: 252; KNL: # %bb.0: 253; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 254; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 255; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 256; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2 257; KNL-NEXT: vpmovsxbw (%rdi), %ymm3 258; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 259; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 260; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 261; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 262; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 263; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 264; KNL-NEXT: vpandq %zmm2, %zmm0, %zmm0 265; KNL-NEXT: retq 266; 267; SKX-LABEL: sext_32x8mem_to_32x16: 268; SKX: # %bb.0: 269; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 270; SKX-NEXT: vpmovb2m %ymm0, %k1 271; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} 272; SKX-NEXT: retq 273; 274; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16: 275; AVX512DQNOBW: # %bb.0: 276; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 277; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 278; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 279; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2 280; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3 281; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 282; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0 283; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0 284; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 285; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 286; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 287; AVX512DQNOBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 288; AVX512DQNOBW-NEXT: retq 289 %a = load <32 x i8>,<32 x i8> *%i,align 1 290 %x = sext <32 x i8> %a to <32 x i16> 291 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 292 ret <32 x i16> %ret 293} 294 295define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 296; KNL-LABEL: zext_32x8_to_32x16: 297; KNL: # %bb.0: 298; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 299; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 300; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 301; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 302; KNL-NEXT: retq 303; 304; SKX-LABEL: zext_32x8_to_32x16: 305; SKX: # %bb.0: 306; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 307; SKX-NEXT: retq 308; 309; AVX512DQNOBW-LABEL: zext_32x8_to_32x16: 310; AVX512DQNOBW: # %bb.0: 311; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 312; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 313; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 314; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 315; AVX512DQNOBW-NEXT: retq 316 %x = zext <32 x i8> %a to <32 x i16> 317 ret <32 x i16> %x 318} 319 320define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 321; KNL-LABEL: zext_32x8_to_32x16_mask: 322; KNL: # %bb.0: 323; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 324; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 325; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 326; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 327; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 328; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 329; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 330; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 331; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 332; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 333; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 334; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 335; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 336; KNL-NEXT: retq 337; 338; SKX-LABEL: zext_32x8_to_32x16_mask: 339; SKX: # %bb.0: 340; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 341; SKX-NEXT: vpmovb2m %ymm1, %k1 342; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 343; SKX-NEXT: retq 344; 345; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask: 346; AVX512DQNOBW: # %bb.0: 347; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 348; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 349; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 350; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 351; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 352; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 353; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 354; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 355; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 356; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm2 357; AVX512DQNOBW-NEXT: vpsraw $15, %ymm2, %ymm2 358; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 359; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0 360; AVX512DQNOBW-NEXT: retq 361 %x = zext <32 x i8> %a to <32 x i16> 362 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 363 ret <32 x i16> %ret 364} 365 366define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 367; KNL-LABEL: sext_32x8_to_32x16: 368; KNL: # %bb.0: 369; KNL-NEXT: vpmovsxbw %xmm0, %ymm1 370; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 371; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 372; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 373; KNL-NEXT: retq 374; 375; SKX-LABEL: sext_32x8_to_32x16: 376; SKX: # %bb.0: 377; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 378; SKX-NEXT: retq 379; 380; AVX512DQNOBW-LABEL: sext_32x8_to_32x16: 381; AVX512DQNOBW: # %bb.0: 382; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm1 383; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 384; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 385; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 386; AVX512DQNOBW-NEXT: retq 387 %x = sext <32 x i8> %a to <32 x i16> 388 ret <32 x i16> %x 389} 390 391define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 392; KNL-LABEL: sext_32x8_to_32x16_mask: 393; KNL: # %bb.0: 394; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 395; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 396; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 397; KNL-NEXT: vpmovsxbw %xmm0, %ymm3 398; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 399; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 400; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 401; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 402; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 403; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 404; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 405; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 406; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 407; KNL-NEXT: retq 408; 409; SKX-LABEL: sext_32x8_to_32x16_mask: 410; SKX: # %bb.0: 411; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 412; SKX-NEXT: vpmovb2m %ymm1, %k1 413; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} 414; SKX-NEXT: retq 415; 416; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask: 417; AVX512DQNOBW: # %bb.0: 418; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 419; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 420; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 421; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm3 422; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 423; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0 424; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 425; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 426; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 427; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm2 428; AVX512DQNOBW-NEXT: vpsraw $15, %ymm2, %ymm2 429; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 430; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm1, %zmm0 431; AVX512DQNOBW-NEXT: retq 432 %x = sext <32 x i8> %a to <32 x i16> 433 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 434 ret <32 x i16> %ret 435} 436 437define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 438; KNL-LABEL: zext_4x8mem_to_4x32: 439; KNL: # %bb.0: 440; KNL-NEXT: vpslld $31, %xmm0, %xmm0 441; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 442; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 443; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 444; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 445; KNL-NEXT: vzeroupper 446; KNL-NEXT: retq 447; 448; AVX512DQ-LABEL: zext_4x8mem_to_4x32: 449; AVX512DQ: # %bb.0: 450; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 451; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 452; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 453; AVX512DQ-NEXT: retq 454 %a = load <4 x i8>,<4 x i8> *%i,align 1 455 %x = zext <4 x i8> %a to <4 x i32> 456 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 457 ret <4 x i32> %ret 458} 459 460define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 461; KNL-LABEL: sext_4x8mem_to_4x32: 462; KNL: # %bb.0: 463; KNL-NEXT: vpslld $31, %xmm0, %xmm0 464; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 465; KNL-NEXT: vpmovsxbd (%rdi), %xmm0 466; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 467; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 468; KNL-NEXT: vzeroupper 469; KNL-NEXT: retq 470; 471; AVX512DQ-LABEL: sext_4x8mem_to_4x32: 472; AVX512DQ: # %bb.0: 473; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 474; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 475; AVX512DQ-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} 476; AVX512DQ-NEXT: retq 477 %a = load <4 x i8>,<4 x i8> *%i,align 1 478 %x = sext <4 x i8> %a to <4 x i32> 479 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 480 ret <4 x i32> %ret 481} 482 483define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 484; KNL-LABEL: zext_8x8mem_to_8x32: 485; KNL: # %bb.0: 486; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 487; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 488; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 489; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 490; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 491; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 492; KNL-NEXT: retq 493; 494; SKX-LABEL: zext_8x8mem_to_8x32: 495; SKX: # %bb.0: 496; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 497; SKX-NEXT: vpmovw2m %xmm0, %k1 498; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 499; SKX-NEXT: retq 500; 501; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32: 502; AVX512DQNOBW: # %bb.0: 503; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 504; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 505; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 506; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 507; AVX512DQNOBW-NEXT: retq 508 %a = load <8 x i8>,<8 x i8> *%i,align 1 509 %x = zext <8 x i8> %a to <8 x i32> 510 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 511 ret <8 x i32> %ret 512} 513 514define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 515; KNL-LABEL: sext_8x8mem_to_8x32: 516; KNL: # %bb.0: 517; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 518; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 519; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 520; KNL-NEXT: vpmovsxbd (%rdi), %ymm0 521; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 522; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 523; KNL-NEXT: retq 524; 525; SKX-LABEL: sext_8x8mem_to_8x32: 526; SKX: # %bb.0: 527; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 528; SKX-NEXT: vpmovw2m %xmm0, %k1 529; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} 530; SKX-NEXT: retq 531; 532; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32: 533; AVX512DQNOBW: # %bb.0: 534; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 535; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 536; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 537; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} 538; AVX512DQNOBW-NEXT: retq 539 %a = load <8 x i8>,<8 x i8> *%i,align 1 540 %x = sext <8 x i8> %a to <8 x i32> 541 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 542 ret <8 x i32> %ret 543} 544 545define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 546; KNL-LABEL: zext_16x8mem_to_16x32: 547; KNL: # %bb.0: 548; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 549; KNL-NEXT: vpslld $31, %zmm0, %zmm0 550; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 551; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 552; KNL-NEXT: retq 553; 554; SKX-LABEL: zext_16x8mem_to_16x32: 555; SKX: # %bb.0: 556; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 557; SKX-NEXT: vpmovb2m %xmm0, %k1 558; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 559; SKX-NEXT: retq 560; 561; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32: 562; AVX512DQNOBW: # %bb.0: 563; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 564; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 565; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 566; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero 567; AVX512DQNOBW-NEXT: retq 568 %a = load <16 x i8>,<16 x i8> *%i,align 1 569 %x = zext <16 x i8> %a to <16 x i32> 570 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 571 ret <16 x i32> %ret 572} 573 574define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 575; KNL-LABEL: sext_16x8mem_to_16x32: 576; KNL: # %bb.0: 577; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 578; KNL-NEXT: vpslld $31, %zmm0, %zmm0 579; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 580; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 581; KNL-NEXT: retq 582; 583; SKX-LABEL: sext_16x8mem_to_16x32: 584; SKX: # %bb.0: 585; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 586; SKX-NEXT: vpmovb2m %xmm0, %k1 587; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 588; SKX-NEXT: retq 589; 590; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32: 591; AVX512DQNOBW: # %bb.0: 592; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 593; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 594; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 595; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} 596; AVX512DQNOBW-NEXT: retq 597 %a = load <16 x i8>,<16 x i8> *%i,align 1 598 %x = sext <16 x i8> %a to <16 x i32> 599 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 600 ret <16 x i32> %ret 601} 602 603define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 604; KNL-LABEL: zext_16x8_to_16x32_mask: 605; KNL: # %bb.0: 606; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 607; KNL-NEXT: vpslld $31, %zmm1, %zmm1 608; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 609; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 610; KNL-NEXT: retq 611; 612; SKX-LABEL: zext_16x8_to_16x32_mask: 613; SKX: # %bb.0: 614; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 615; SKX-NEXT: vpmovb2m %xmm1, %k1 616; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 617; SKX-NEXT: retq 618; 619; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask: 620; AVX512DQNOBW: # %bb.0: 621; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 622; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 623; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 624; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 625; AVX512DQNOBW-NEXT: retq 626 %x = zext <16 x i8> %a to <16 x i32> 627 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 628 ret <16 x i32> %ret 629} 630 631define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 632; KNL-LABEL: sext_16x8_to_16x32_mask: 633; KNL: # %bb.0: 634; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 635; KNL-NEXT: vpslld $31, %zmm1, %zmm1 636; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 637; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 638; KNL-NEXT: retq 639; 640; SKX-LABEL: sext_16x8_to_16x32_mask: 641; SKX: # %bb.0: 642; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 643; SKX-NEXT: vpmovb2m %xmm1, %k1 644; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 645; SKX-NEXT: retq 646; 647; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask: 648; AVX512DQNOBW: # %bb.0: 649; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 650; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 651; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 652; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} 653; AVX512DQNOBW-NEXT: retq 654 %x = sext <16 x i8> %a to <16 x i32> 655 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 656 ret <16 x i32> %ret 657} 658 659define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 660; ALL-LABEL: zext_16x8_to_16x32: 661; ALL: # %bb.0: 662; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 663; ALL-NEXT: retq 664 %x = zext <16 x i8> %i to <16 x i32> 665 ret <16 x i32> %x 666} 667 668define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 669; ALL-LABEL: sext_16x8_to_16x32: 670; ALL: # %bb.0: 671; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 672; ALL-NEXT: retq 673 %x = sext <16 x i8> %i to <16 x i32> 674 ret <16 x i32> %x 675} 676 677define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 678; KNL-LABEL: zext_2x8mem_to_2x64: 679; KNL: # %bb.0: 680; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 681; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 682; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 683; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 684; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 685; KNL-NEXT: vzeroupper 686; KNL-NEXT: retq 687; 688; AVX512DQ-LABEL: zext_2x8mem_to_2x64: 689; AVX512DQ: # %bb.0: 690; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 691; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 692; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 693; AVX512DQ-NEXT: retq 694 %a = load <2 x i8>,<2 x i8> *%i,align 1 695 %x = zext <2 x i8> %a to <2 x i64> 696 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 697 ret <2 x i64> %ret 698} 699define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 700; KNL-LABEL: sext_2x8mem_to_2x64mask: 701; KNL: # %bb.0: 702; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 703; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 704; KNL-NEXT: vpmovsxbq (%rdi), %xmm0 705; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 706; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 707; KNL-NEXT: vzeroupper 708; KNL-NEXT: retq 709; 710; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask: 711; AVX512DQ: # %bb.0: 712; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 713; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 714; AVX512DQ-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} 715; AVX512DQ-NEXT: retq 716 %a = load <2 x i8>,<2 x i8> *%i,align 1 717 %x = sext <2 x i8> %a to <2 x i64> 718 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 719 ret <2 x i64> %ret 720} 721define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { 722; ALL-LABEL: sext_2x8mem_to_2x64: 723; ALL: # %bb.0: 724; ALL-NEXT: vpmovsxbq (%rdi), %xmm0 725; ALL-NEXT: retq 726 %a = load <2 x i8>,<2 x i8> *%i,align 1 727 %x = sext <2 x i8> %a to <2 x i64> 728 ret <2 x i64> %x 729} 730 731define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 732; KNL-LABEL: zext_4x8mem_to_4x64: 733; KNL: # %bb.0: 734; KNL-NEXT: vpslld $31, %xmm0, %xmm0 735; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 736; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 737; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 738; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 739; KNL-NEXT: retq 740; 741; AVX512DQ-LABEL: zext_4x8mem_to_4x64: 742; AVX512DQ: # %bb.0: 743; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 744; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 745; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 746; AVX512DQ-NEXT: retq 747 %a = load <4 x i8>,<4 x i8> *%i,align 1 748 %x = zext <4 x i8> %a to <4 x i64> 749 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 750 ret <4 x i64> %ret 751} 752 753define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 754; KNL-LABEL: sext_4x8mem_to_4x64mask: 755; KNL: # %bb.0: 756; KNL-NEXT: vpslld $31, %xmm0, %xmm0 757; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 758; KNL-NEXT: vpmovsxbq (%rdi), %ymm0 759; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 760; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 761; KNL-NEXT: retq 762; 763; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask: 764; AVX512DQ: # %bb.0: 765; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 766; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 767; AVX512DQ-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} 768; AVX512DQ-NEXT: retq 769 %a = load <4 x i8>,<4 x i8> *%i,align 1 770 %x = sext <4 x i8> %a to <4 x i64> 771 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 772 ret <4 x i64> %ret 773} 774 775define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { 776; ALL-LABEL: sext_4x8mem_to_4x64: 777; ALL: # %bb.0: 778; ALL-NEXT: vpmovsxbq (%rdi), %ymm0 779; ALL-NEXT: retq 780 %a = load <4 x i8>,<4 x i8> *%i,align 1 781 %x = sext <4 x i8> %a to <4 x i64> 782 ret <4 x i64> %x 783} 784 785define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 786; KNL-LABEL: zext_8x8mem_to_8x64: 787; KNL: # %bb.0: 788; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 789; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 790; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 791; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 792; KNL-NEXT: retq 793; 794; SKX-LABEL: zext_8x8mem_to_8x64: 795; SKX: # %bb.0: 796; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 797; SKX-NEXT: vpmovw2m %xmm0, %k1 798; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 799; SKX-NEXT: retq 800; 801; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64: 802; AVX512DQNOBW: # %bb.0: 803; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 804; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 805; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 806; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 807; AVX512DQNOBW-NEXT: retq 808 %a = load <8 x i8>,<8 x i8> *%i,align 1 809 %x = zext <8 x i8> %a to <8 x i64> 810 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 811 ret <8 x i64> %ret 812} 813 814define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 815; KNL-LABEL: sext_8x8mem_to_8x64mask: 816; KNL: # %bb.0: 817; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 818; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 819; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 820; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 821; KNL-NEXT: retq 822; 823; SKX-LABEL: sext_8x8mem_to_8x64mask: 824; SKX: # %bb.0: 825; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 826; SKX-NEXT: vpmovw2m %xmm0, %k1 827; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 828; SKX-NEXT: retq 829; 830; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask: 831; AVX512DQNOBW: # %bb.0: 832; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 833; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 834; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 835; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} 836; AVX512DQNOBW-NEXT: retq 837 %a = load <8 x i8>,<8 x i8> *%i,align 1 838 %x = sext <8 x i8> %a to <8 x i64> 839 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 840 ret <8 x i64> %ret 841} 842 843define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { 844; ALL-LABEL: sext_8x8mem_to_8x64: 845; ALL: # %bb.0: 846; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 847; ALL-NEXT: retq 848 %a = load <8 x i8>,<8 x i8> *%i,align 1 849 %x = sext <8 x i8> %a to <8 x i64> 850 ret <8 x i64> %x 851} 852 853define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 854; KNL-LABEL: zext_4x16mem_to_4x32: 855; KNL: # %bb.0: 856; KNL-NEXT: vpslld $31, %xmm0, %xmm0 857; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 858; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 859; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 860; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 861; KNL-NEXT: vzeroupper 862; KNL-NEXT: retq 863; 864; AVX512DQ-LABEL: zext_4x16mem_to_4x32: 865; AVX512DQ: # %bb.0: 866; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 867; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 868; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 869; AVX512DQ-NEXT: retq 870 %a = load <4 x i16>,<4 x i16> *%i,align 1 871 %x = zext <4 x i16> %a to <4 x i32> 872 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 873 ret <4 x i32> %ret 874} 875 876define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 877; KNL-LABEL: sext_4x16mem_to_4x32mask: 878; KNL: # %bb.0: 879; KNL-NEXT: vpslld $31, %xmm0, %xmm0 880; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 881; KNL-NEXT: vpmovsxwd (%rdi), %xmm0 882; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 883; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 884; KNL-NEXT: vzeroupper 885; KNL-NEXT: retq 886; 887; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask: 888; AVX512DQ: # %bb.0: 889; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 890; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 891; AVX512DQ-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} 892; AVX512DQ-NEXT: retq 893 %a = load <4 x i16>,<4 x i16> *%i,align 1 894 %x = sext <4 x i16> %a to <4 x i32> 895 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 896 ret <4 x i32> %ret 897} 898 899define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { 900; ALL-LABEL: sext_4x16mem_to_4x32: 901; ALL: # %bb.0: 902; ALL-NEXT: vpmovsxwd (%rdi), %xmm0 903; ALL-NEXT: retq 904 %a = load <4 x i16>,<4 x i16> *%i,align 1 905 %x = sext <4 x i16> %a to <4 x i32> 906 ret <4 x i32> %x 907} 908 909 910define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 911; KNL-LABEL: zext_8x16mem_to_8x32: 912; KNL: # %bb.0: 913; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 914; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 915; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 916; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 917; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 918; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 919; KNL-NEXT: retq 920; 921; SKX-LABEL: zext_8x16mem_to_8x32: 922; SKX: # %bb.0: 923; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 924; SKX-NEXT: vpmovw2m %xmm0, %k1 925; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 926; SKX-NEXT: retq 927; 928; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32: 929; AVX512DQNOBW: # %bb.0: 930; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 931; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 932; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 933; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 934; AVX512DQNOBW-NEXT: retq 935 %a = load <8 x i16>,<8 x i16> *%i,align 1 936 %x = zext <8 x i16> %a to <8 x i32> 937 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 938 ret <8 x i32> %ret 939} 940 941define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 942; KNL-LABEL: sext_8x16mem_to_8x32mask: 943; KNL: # %bb.0: 944; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 945; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 946; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 947; KNL-NEXT: vpmovsxwd (%rdi), %ymm0 948; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 949; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 950; KNL-NEXT: retq 951; 952; SKX-LABEL: sext_8x16mem_to_8x32mask: 953; SKX: # %bb.0: 954; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 955; SKX-NEXT: vpmovw2m %xmm0, %k1 956; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} 957; SKX-NEXT: retq 958; 959; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask: 960; AVX512DQNOBW: # %bb.0: 961; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 962; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 963; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 964; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} 965; AVX512DQNOBW-NEXT: retq 966 %a = load <8 x i16>,<8 x i16> *%i,align 1 967 %x = sext <8 x i16> %a to <8 x i32> 968 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 969 ret <8 x i32> %ret 970} 971 972define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { 973; ALL-LABEL: sext_8x16mem_to_8x32: 974; ALL: # %bb.0: 975; ALL-NEXT: vpmovsxwd (%rdi), %ymm0 976; ALL-NEXT: retq 977 %a = load <8 x i16>,<8 x i16> *%i,align 1 978 %x = sext <8 x i16> %a to <8 x i32> 979 ret <8 x i32> %x 980} 981 982define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 983; KNL-LABEL: zext_8x16_to_8x32mask: 984; KNL: # %bb.0: 985; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 986; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 987; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 988; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 989; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 990; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 991; KNL-NEXT: retq 992; 993; SKX-LABEL: zext_8x16_to_8x32mask: 994; SKX: # %bb.0: 995; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 996; SKX-NEXT: vpmovw2m %xmm1, %k1 997; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 998; SKX-NEXT: retq 999; 1000; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask: 1001; AVX512DQNOBW: # %bb.0: 1002; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 1003; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 1004; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 1005; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1006; AVX512DQNOBW-NEXT: retq 1007 %x = zext <8 x i16> %a to <8 x i32> 1008 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 1009 ret <8 x i32> %ret 1010} 1011 1012define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { 1013; ALL-LABEL: zext_8x16_to_8x32: 1014; ALL: # %bb.0: 1015; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1016; ALL-NEXT: retq 1017 %x = zext <8 x i16> %a to <8 x i32> 1018 ret <8 x i32> %x 1019} 1020 1021define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 1022; KNL-LABEL: zext_16x16mem_to_16x32: 1023; KNL: # %bb.0: 1024; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1025; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1026; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1027; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1028; KNL-NEXT: retq 1029; 1030; SKX-LABEL: zext_16x16mem_to_16x32: 1031; SKX: # %bb.0: 1032; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1033; SKX-NEXT: vpmovb2m %xmm0, %k1 1034; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1035; SKX-NEXT: retq 1036; 1037; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32: 1038; AVX512DQNOBW: # %bb.0: 1039; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 1040; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 1041; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 1042; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1043; AVX512DQNOBW-NEXT: retq 1044 %a = load <16 x i16>,<16 x i16> *%i,align 1 1045 %x = zext <16 x i16> %a to <16 x i32> 1046 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 1047 ret <16 x i32> %ret 1048} 1049 1050define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 1051; KNL-LABEL: sext_16x16mem_to_16x32mask: 1052; KNL: # %bb.0: 1053; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1054; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1055; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1056; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 1057; KNL-NEXT: retq 1058; 1059; SKX-LABEL: sext_16x16mem_to_16x32mask: 1060; SKX: # %bb.0: 1061; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1062; SKX-NEXT: vpmovb2m %xmm0, %k1 1063; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 1064; SKX-NEXT: retq 1065; 1066; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask: 1067; AVX512DQNOBW: # %bb.0: 1068; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 1069; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 1070; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 1071; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} 1072; AVX512DQNOBW-NEXT: retq 1073 %a = load <16 x i16>,<16 x i16> *%i,align 1 1074 %x = sext <16 x i16> %a to <16 x i32> 1075 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 1076 ret <16 x i32> %ret 1077} 1078 1079define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { 1080; ALL-LABEL: sext_16x16mem_to_16x32: 1081; ALL: # %bb.0: 1082; ALL-NEXT: vpmovsxwd (%rdi), %zmm0 1083; ALL-NEXT: retq 1084 %a = load <16 x i16>,<16 x i16> *%i,align 1 1085 %x = sext <16 x i16> %a to <16 x i32> 1086 ret <16 x i32> %x 1087} 1088define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { 1089; KNL-LABEL: zext_16x16_to_16x32mask: 1090; KNL: # %bb.0: 1091; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1092; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1093; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 1094; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1095; KNL-NEXT: retq 1096; 1097; SKX-LABEL: zext_16x16_to_16x32mask: 1098; SKX: # %bb.0: 1099; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 1100; SKX-NEXT: vpmovb2m %xmm1, %k1 1101; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1102; SKX-NEXT: retq 1103; 1104; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask: 1105; AVX512DQNOBW: # %bb.0: 1106; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 1107; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 1108; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 1109; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1110; AVX512DQNOBW-NEXT: retq 1111 %x = zext <16 x i16> %a to <16 x i32> 1112 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 1113 ret <16 x i32> %ret 1114} 1115 1116define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { 1117; ALL-LABEL: zext_16x16_to_16x32: 1118; ALL: # %bb.0: 1119; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1120; ALL-NEXT: retq 1121 %x = zext <16 x i16> %a to <16 x i32> 1122 ret <16 x i32> %x 1123} 1124 1125define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 1126; KNL-LABEL: zext_2x16mem_to_2x64: 1127; KNL: # %bb.0: 1128; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1129; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1130; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1131; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1132; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1133; KNL-NEXT: vzeroupper 1134; KNL-NEXT: retq 1135; 1136; AVX512DQ-LABEL: zext_2x16mem_to_2x64: 1137; AVX512DQ: # %bb.0: 1138; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1139; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 1140; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1141; AVX512DQ-NEXT: retq 1142 %a = load <2 x i16>,<2 x i16> *%i,align 1 1143 %x = zext <2 x i16> %a to <2 x i64> 1144 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1145 ret <2 x i64> %ret 1146} 1147 1148define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 1149; KNL-LABEL: sext_2x16mem_to_2x64mask: 1150; KNL: # %bb.0: 1151; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1152; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1153; KNL-NEXT: vpmovsxwq (%rdi), %xmm0 1154; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1155; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1156; KNL-NEXT: vzeroupper 1157; KNL-NEXT: retq 1158; 1159; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask: 1160; AVX512DQ: # %bb.0: 1161; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1162; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 1163; AVX512DQ-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} 1164; AVX512DQ-NEXT: retq 1165 %a = load <2 x i16>,<2 x i16> *%i,align 1 1166 %x = sext <2 x i16> %a to <2 x i64> 1167 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1168 ret <2 x i64> %ret 1169} 1170 1171define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { 1172; ALL-LABEL: sext_2x16mem_to_2x64: 1173; ALL: # %bb.0: 1174; ALL-NEXT: vpmovsxwq (%rdi), %xmm0 1175; ALL-NEXT: retq 1176 %a = load <2 x i16>,<2 x i16> *%i,align 1 1177 %x = sext <2 x i16> %a to <2 x i64> 1178 ret <2 x i64> %x 1179} 1180 1181define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 1182; KNL-LABEL: zext_4x16mem_to_4x64: 1183; KNL: # %bb.0: 1184; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1185; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1186; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1187; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1188; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1189; KNL-NEXT: retq 1190; 1191; AVX512DQ-LABEL: zext_4x16mem_to_4x64: 1192; AVX512DQ: # %bb.0: 1193; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1194; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 1195; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1196; AVX512DQ-NEXT: retq 1197 %a = load <4 x i16>,<4 x i16> *%i,align 1 1198 %x = zext <4 x i16> %a to <4 x i64> 1199 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1200 ret <4 x i64> %ret 1201} 1202 1203define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 1204; KNL-LABEL: sext_4x16mem_to_4x64mask: 1205; KNL: # %bb.0: 1206; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1207; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1208; KNL-NEXT: vpmovsxwq (%rdi), %ymm0 1209; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1210; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1211; KNL-NEXT: retq 1212; 1213; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask: 1214; AVX512DQ: # %bb.0: 1215; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1216; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 1217; AVX512DQ-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} 1218; AVX512DQ-NEXT: retq 1219 %a = load <4 x i16>,<4 x i16> *%i,align 1 1220 %x = sext <4 x i16> %a to <4 x i64> 1221 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1222 ret <4 x i64> %ret 1223} 1224 1225define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { 1226; ALL-LABEL: sext_4x16mem_to_4x64: 1227; ALL: # %bb.0: 1228; ALL-NEXT: vpmovsxwq (%rdi), %ymm0 1229; ALL-NEXT: retq 1230 %a = load <4 x i16>,<4 x i16> *%i,align 1 1231 %x = sext <4 x i16> %a to <4 x i64> 1232 ret <4 x i64> %x 1233} 1234 1235define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 1236; KNL-LABEL: zext_8x16mem_to_8x64: 1237; KNL: # %bb.0: 1238; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1239; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1240; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1241; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1242; KNL-NEXT: retq 1243; 1244; SKX-LABEL: zext_8x16mem_to_8x64: 1245; SKX: # %bb.0: 1246; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1247; SKX-NEXT: vpmovw2m %xmm0, %k1 1248; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1249; SKX-NEXT: retq 1250; 1251; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64: 1252; AVX512DQNOBW: # %bb.0: 1253; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1254; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1255; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 1256; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1257; AVX512DQNOBW-NEXT: retq 1258 %a = load <8 x i16>,<8 x i16> *%i,align 1 1259 %x = zext <8 x i16> %a to <8 x i64> 1260 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1261 ret <8 x i64> %ret 1262} 1263 1264define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 1265; KNL-LABEL: sext_8x16mem_to_8x64mask: 1266; KNL: # %bb.0: 1267; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1268; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1269; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1270; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1271; KNL-NEXT: retq 1272; 1273; SKX-LABEL: sext_8x16mem_to_8x64mask: 1274; SKX: # %bb.0: 1275; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1276; SKX-NEXT: vpmovw2m %xmm0, %k1 1277; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1278; SKX-NEXT: retq 1279; 1280; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask: 1281; AVX512DQNOBW: # %bb.0: 1282; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1283; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1284; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 1285; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} 1286; AVX512DQNOBW-NEXT: retq 1287 %a = load <8 x i16>,<8 x i16> *%i,align 1 1288 %x = sext <8 x i16> %a to <8 x i64> 1289 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1290 ret <8 x i64> %ret 1291} 1292 1293define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { 1294; ALL-LABEL: sext_8x16mem_to_8x64: 1295; ALL: # %bb.0: 1296; ALL-NEXT: vpmovsxwq (%rdi), %zmm0 1297; ALL-NEXT: retq 1298 %a = load <8 x i16>,<8 x i16> *%i,align 1 1299 %x = sext <8 x i16> %a to <8 x i64> 1300 ret <8 x i64> %x 1301} 1302 1303define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 1304; KNL-LABEL: zext_8x16_to_8x64mask: 1305; KNL: # %bb.0: 1306; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 1307; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 1308; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 1309; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1310; KNL-NEXT: retq 1311; 1312; SKX-LABEL: zext_8x16_to_8x64mask: 1313; SKX: # %bb.0: 1314; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 1315; SKX-NEXT: vpmovw2m %xmm1, %k1 1316; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1317; SKX-NEXT: retq 1318; 1319; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask: 1320; AVX512DQNOBW: # %bb.0: 1321; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 1322; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 1323; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 1324; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1325; AVX512DQNOBW-NEXT: retq 1326 %x = zext <8 x i16> %a to <8 x i64> 1327 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1328 ret <8 x i64> %ret 1329} 1330 1331define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { 1332; ALL-LABEL: zext_8x16_to_8x64: 1333; ALL: # %bb.0: 1334; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1335; ALL-NEXT: retq 1336 %ret = zext <8 x i16> %a to <8 x i64> 1337 ret <8 x i64> %ret 1338} 1339 1340define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 1341; KNL-LABEL: zext_2x32mem_to_2x64: 1342; KNL: # %bb.0: 1343; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1344; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1345; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1346; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1347; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1348; KNL-NEXT: vzeroupper 1349; KNL-NEXT: retq 1350; 1351; AVX512DQ-LABEL: zext_2x32mem_to_2x64: 1352; AVX512DQ: # %bb.0: 1353; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1354; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 1355; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero 1356; AVX512DQ-NEXT: retq 1357 %a = load <2 x i32>,<2 x i32> *%i,align 1 1358 %x = zext <2 x i32> %a to <2 x i64> 1359 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1360 ret <2 x i64> %ret 1361} 1362 1363define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 1364; KNL-LABEL: sext_2x32mem_to_2x64mask: 1365; KNL: # %bb.0: 1366; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 1367; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1368; KNL-NEXT: vpmovsxdq (%rdi), %xmm0 1369; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1370; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1371; KNL-NEXT: vzeroupper 1372; KNL-NEXT: retq 1373; 1374; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask: 1375; AVX512DQ: # %bb.0: 1376; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 1377; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1 1378; AVX512DQ-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} 1379; AVX512DQ-NEXT: retq 1380 %a = load <2 x i32>,<2 x i32> *%i,align 1 1381 %x = sext <2 x i32> %a to <2 x i64> 1382 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 1383 ret <2 x i64> %ret 1384} 1385 1386define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { 1387; ALL-LABEL: sext_2x32mem_to_2x64: 1388; ALL: # %bb.0: 1389; ALL-NEXT: vpmovsxdq (%rdi), %xmm0 1390; ALL-NEXT: retq 1391 %a = load <2 x i32>,<2 x i32> *%i,align 1 1392 %x = sext <2 x i32> %a to <2 x i64> 1393 ret <2 x i64> %x 1394} 1395 1396define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 1397; KNL-LABEL: zext_4x32mem_to_4x64: 1398; KNL: # %bb.0: 1399; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1400; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1401; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1402; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1403; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1404; KNL-NEXT: retq 1405; 1406; AVX512DQ-LABEL: zext_4x32mem_to_4x64: 1407; AVX512DQ: # %bb.0: 1408; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1409; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 1410; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1411; AVX512DQ-NEXT: retq 1412 %a = load <4 x i32>,<4 x i32> *%i,align 1 1413 %x = zext <4 x i32> %a to <4 x i64> 1414 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1415 ret <4 x i64> %ret 1416} 1417 1418define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 1419; KNL-LABEL: sext_4x32mem_to_4x64mask: 1420; KNL: # %bb.0: 1421; KNL-NEXT: vpslld $31, %xmm0, %xmm0 1422; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1423; KNL-NEXT: vpmovsxdq (%rdi), %ymm0 1424; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1425; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1426; KNL-NEXT: retq 1427; 1428; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask: 1429; AVX512DQ: # %bb.0: 1430; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 1431; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1 1432; AVX512DQ-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} 1433; AVX512DQ-NEXT: retq 1434 %a = load <4 x i32>,<4 x i32> *%i,align 1 1435 %x = sext <4 x i32> %a to <4 x i64> 1436 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1437 ret <4 x i64> %ret 1438} 1439 1440define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { 1441; ALL-LABEL: sext_4x32mem_to_4x64: 1442; ALL: # %bb.0: 1443; ALL-NEXT: vpmovsxdq (%rdi), %ymm0 1444; ALL-NEXT: retq 1445 %a = load <4 x i32>,<4 x i32> *%i,align 1 1446 %x = sext <4 x i32> %a to <4 x i64> 1447 ret <4 x i64> %x 1448} 1449 1450define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { 1451; ALL-LABEL: sext_4x32_to_4x64: 1452; ALL: # %bb.0: 1453; ALL-NEXT: vpmovsxdq %xmm0, %ymm0 1454; ALL-NEXT: retq 1455 %x = sext <4 x i32> %a to <4 x i64> 1456 ret <4 x i64> %x 1457} 1458 1459define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { 1460; KNL-LABEL: zext_4x32_to_4x64mask: 1461; KNL: # %bb.0: 1462; KNL-NEXT: vpslld $31, %xmm1, %xmm1 1463; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 1464; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1465; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1466; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1467; KNL-NEXT: retq 1468; 1469; AVX512DQ-LABEL: zext_4x32_to_4x64mask: 1470; AVX512DQ: # %bb.0: 1471; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1 1472; AVX512DQ-NEXT: vpmovd2m %xmm1, %k1 1473; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1474; AVX512DQ-NEXT: retq 1475 %x = zext <4 x i32> %a to <4 x i64> 1476 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 1477 ret <4 x i64> %ret 1478} 1479 1480define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 1481; KNL-LABEL: zext_8x32mem_to_8x64: 1482; KNL: # %bb.0: 1483; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1484; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1485; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1486; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1487; KNL-NEXT: retq 1488; 1489; SKX-LABEL: zext_8x32mem_to_8x64: 1490; SKX: # %bb.0: 1491; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1492; SKX-NEXT: vpmovw2m %xmm0, %k1 1493; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1494; SKX-NEXT: retq 1495; 1496; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64: 1497; AVX512DQNOBW: # %bb.0: 1498; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1499; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1500; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 1501; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1502; AVX512DQNOBW-NEXT: retq 1503 %a = load <8 x i32>,<8 x i32> *%i,align 1 1504 %x = zext <8 x i32> %a to <8 x i64> 1505 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1506 ret <8 x i64> %ret 1507} 1508 1509define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 1510; KNL-LABEL: sext_8x32mem_to_8x64mask: 1511; KNL: # %bb.0: 1512; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1513; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1514; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 1515; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1516; KNL-NEXT: retq 1517; 1518; SKX-LABEL: sext_8x32mem_to_8x64mask: 1519; SKX: # %bb.0: 1520; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1521; SKX-NEXT: vpmovw2m %xmm0, %k1 1522; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1523; SKX-NEXT: retq 1524; 1525; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask: 1526; AVX512DQNOBW: # %bb.0: 1527; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1528; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1529; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 1530; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} 1531; AVX512DQNOBW-NEXT: retq 1532 %a = load <8 x i32>,<8 x i32> *%i,align 1 1533 %x = sext <8 x i32> %a to <8 x i64> 1534 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1535 ret <8 x i64> %ret 1536} 1537 1538define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { 1539; ALL-LABEL: sext_8x32mem_to_8x64: 1540; ALL: # %bb.0: 1541; ALL-NEXT: vpmovsxdq (%rdi), %zmm0 1542; ALL-NEXT: retq 1543 %a = load <8 x i32>,<8 x i32> *%i,align 1 1544 %x = sext <8 x i32> %a to <8 x i64> 1545 ret <8 x i64> %x 1546} 1547 1548define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { 1549; ALL-LABEL: sext_8x32_to_8x64: 1550; ALL: # %bb.0: 1551; ALL-NEXT: vpmovsxdq %ymm0, %zmm0 1552; ALL-NEXT: retq 1553 %x = sext <8 x i32> %a to <8 x i64> 1554 ret <8 x i64> %x 1555} 1556 1557define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { 1558; KNL-LABEL: zext_8x32_to_8x64mask: 1559; KNL: # %bb.0: 1560; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 1561; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 1562; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 1563; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1564; KNL-NEXT: retq 1565; 1566; SKX-LABEL: zext_8x32_to_8x64mask: 1567; SKX: # %bb.0: 1568; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 1569; SKX-NEXT: vpmovw2m %xmm1, %k1 1570; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1571; SKX-NEXT: retq 1572; 1573; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask: 1574; AVX512DQNOBW: # %bb.0: 1575; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 1576; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 1577; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 1578; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 1579; AVX512DQNOBW-NEXT: retq 1580 %x = zext <8 x i32> %a to <8 x i64> 1581 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 1582 ret <8 x i64> %ret 1583} 1584define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { 1585; ALL-LABEL: fptrunc_test: 1586; ALL: # %bb.0: 1587; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0 1588; ALL-NEXT: retq 1589 %b = fptrunc <8 x double> %a to <8 x float> 1590 ret <8 x float> %b 1591} 1592 1593define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { 1594; ALL-LABEL: fpext_test: 1595; ALL: # %bb.0: 1596; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 1597; ALL-NEXT: retq 1598 %b = fpext <8 x float> %a to <8 x double> 1599 ret <8 x double> %b 1600} 1601 1602define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { 1603; KNL-LABEL: zext_16i1_to_16xi32: 1604; KNL: # %bb.0: 1605; KNL-NEXT: kmovw %edi, %k1 1606; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1607; KNL-NEXT: vpsrld $31, %zmm0, %zmm0 1608; KNL-NEXT: retq 1609; 1610; SKX-LABEL: zext_16i1_to_16xi32: 1611; SKX: # %bb.0: 1612; SKX-NEXT: kmovd %edi, %k0 1613; SKX-NEXT: vpmovm2d %k0, %zmm0 1614; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 1615; SKX-NEXT: retq 1616; 1617; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32: 1618; AVX512DQNOBW: # %bb.0: 1619; AVX512DQNOBW-NEXT: kmovw %edi, %k0 1620; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm0 1621; AVX512DQNOBW-NEXT: vpsrld $31, %zmm0, %zmm0 1622; AVX512DQNOBW-NEXT: retq 1623 %a = bitcast i16 %b to <16 x i1> 1624 %c = zext <16 x i1> %a to <16 x i32> 1625 ret <16 x i32> %c 1626} 1627 1628define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { 1629; KNL-LABEL: zext_8i1_to_8xi64: 1630; KNL: # %bb.0: 1631; KNL-NEXT: kmovw %edi, %k1 1632; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1633; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0 1634; KNL-NEXT: retq 1635; 1636; SKX-LABEL: zext_8i1_to_8xi64: 1637; SKX: # %bb.0: 1638; SKX-NEXT: kmovd %edi, %k0 1639; SKX-NEXT: vpmovm2q %k0, %zmm0 1640; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 1641; SKX-NEXT: retq 1642; 1643; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64: 1644; AVX512DQNOBW: # %bb.0: 1645; AVX512DQNOBW-NEXT: kmovw %edi, %k0 1646; AVX512DQNOBW-NEXT: vpmovm2q %k0, %zmm0 1647; AVX512DQNOBW-NEXT: vpsrlq $63, %zmm0, %zmm0 1648; AVX512DQNOBW-NEXT: retq 1649 %a = bitcast i8 %b to <8 x i1> 1650 %c = zext <8 x i1> %a to <8 x i64> 1651 ret <8 x i64> %c 1652} 1653 1654define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { 1655; ALL-LABEL: trunc_16i8_to_16i1: 1656; ALL: # %bb.0: 1657; ALL-NEXT: vpsllw $7, %xmm0, %xmm0 1658; ALL-NEXT: vpmovmskb %xmm0, %eax 1659; ALL-NEXT: # kill: def $ax killed $ax killed $eax 1660; ALL-NEXT: retq 1661 %mask_b = trunc <16 x i8>%a to <16 x i1> 1662 %mask = bitcast <16 x i1> %mask_b to i16 1663 ret i16 %mask 1664} 1665 1666define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { 1667; KNL-LABEL: trunc_16i32_to_16i1: 1668; KNL: # %bb.0: 1669; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1670; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1671; KNL-NEXT: kmovw %k0, %eax 1672; KNL-NEXT: # kill: def $ax killed $ax killed $eax 1673; KNL-NEXT: vzeroupper 1674; KNL-NEXT: retq 1675; 1676; SKX-LABEL: trunc_16i32_to_16i1: 1677; SKX: # %bb.0: 1678; SKX-NEXT: vpslld $31, %zmm0, %zmm0 1679; SKX-NEXT: vpmovd2m %zmm0, %k0 1680; SKX-NEXT: kmovd %k0, %eax 1681; SKX-NEXT: # kill: def $ax killed $ax killed $eax 1682; SKX-NEXT: vzeroupper 1683; SKX-NEXT: retq 1684; 1685; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1: 1686; AVX512DQNOBW: # %bb.0: 1687; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 1688; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0 1689; AVX512DQNOBW-NEXT: kmovw %k0, %eax 1690; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax 1691; AVX512DQNOBW-NEXT: vzeroupper 1692; AVX512DQNOBW-NEXT: retq 1693 %mask_b = trunc <16 x i32>%a to <16 x i1> 1694 %mask = bitcast <16 x i1> %mask_b to i16 1695 ret i16 %mask 1696} 1697 1698define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { 1699; ALL-LABEL: trunc_4i32_to_4i1: 1700; ALL: # %bb.0: 1701; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0 1702; ALL-NEXT: vpslld $31, %xmm0, %xmm0 1703; ALL-NEXT: vpsrad $31, %xmm0, %xmm0 1704; ALL-NEXT: retq 1705 %mask_a = trunc <4 x i32>%a to <4 x i1> 1706 %mask_b = trunc <4 x i32>%b to <4 x i1> 1707 %a_and_b = and <4 x i1>%mask_a, %mask_b 1708 %res = sext <4 x i1>%a_and_b to <4 x i32> 1709 ret <4 x i32>%res 1710} 1711 1712 1713define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { 1714; KNL-LABEL: trunc_8i16_to_8i1: 1715; KNL: # %bb.0: 1716; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1717; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1718; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1719; KNL-NEXT: kmovw %k0, %eax 1720; KNL-NEXT: # kill: def $al killed $al killed $eax 1721; KNL-NEXT: vzeroupper 1722; KNL-NEXT: retq 1723; 1724; SKX-LABEL: trunc_8i16_to_8i1: 1725; SKX: # %bb.0: 1726; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1727; SKX-NEXT: vpmovw2m %xmm0, %k0 1728; SKX-NEXT: kmovd %k0, %eax 1729; SKX-NEXT: # kill: def $al killed $al killed $eax 1730; SKX-NEXT: retq 1731; 1732; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1: 1733; AVX512DQNOBW: # %bb.0: 1734; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 1735; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 1736; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0 1737; AVX512DQNOBW-NEXT: kmovw %k0, %eax 1738; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax 1739; AVX512DQNOBW-NEXT: vzeroupper 1740; AVX512DQNOBW-NEXT: retq 1741 %mask_b = trunc <8 x i16>%a to <8 x i1> 1742 %mask = bitcast <8 x i1> %mask_b to i8 1743 ret i8 %mask 1744} 1745 1746define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1747; KNL-LABEL: sext_8i1_8i32: 1748; KNL: # %bb.0: 1749; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1750; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 1751; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1752; KNL-NEXT: retq 1753; 1754; AVX512DQ-LABEL: sext_8i1_8i32: 1755; AVX512DQ: # %bb.0: 1756; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1757; AVX512DQ-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 1758; AVX512DQ-NEXT: retq 1759 %x = icmp slt <8 x i32> %a1, %a2 1760 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 1761 %y = sext <8 x i1> %x1 to <8 x i32> 1762 ret <8 x i32> %y 1763} 1764 1765 1766define i16 @trunc_i32_to_i1(i32 %a) { 1767; KNL-LABEL: trunc_i32_to_i1: 1768; KNL: # %bb.0: 1769; KNL-NEXT: andl $1, %edi 1770; KNL-NEXT: kmovw %edi, %k0 1771; KNL-NEXT: movw $-4, %ax 1772; KNL-NEXT: kmovw %eax, %k1 1773; KNL-NEXT: kshiftrw $1, %k1, %k1 1774; KNL-NEXT: kshiftlw $1, %k1, %k1 1775; KNL-NEXT: korw %k0, %k1, %k0 1776; KNL-NEXT: kmovw %k0, %eax 1777; KNL-NEXT: # kill: def $ax killed $ax killed $eax 1778; KNL-NEXT: retq 1779; 1780; SKX-LABEL: trunc_i32_to_i1: 1781; SKX: # %bb.0: 1782; SKX-NEXT: andl $1, %edi 1783; SKX-NEXT: kmovw %edi, %k0 1784; SKX-NEXT: movw $-4, %ax 1785; SKX-NEXT: kmovd %eax, %k1 1786; SKX-NEXT: kshiftrw $1, %k1, %k1 1787; SKX-NEXT: kshiftlw $1, %k1, %k1 1788; SKX-NEXT: korw %k0, %k1, %k0 1789; SKX-NEXT: kmovd %k0, %eax 1790; SKX-NEXT: # kill: def $ax killed $ax killed $eax 1791; SKX-NEXT: retq 1792; 1793; AVX512DQNOBW-LABEL: trunc_i32_to_i1: 1794; AVX512DQNOBW: # %bb.0: 1795; AVX512DQNOBW-NEXT: andl $1, %edi 1796; AVX512DQNOBW-NEXT: kmovw %edi, %k0 1797; AVX512DQNOBW-NEXT: movw $-4, %ax 1798; AVX512DQNOBW-NEXT: kmovw %eax, %k1 1799; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 1800; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 1801; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0 1802; AVX512DQNOBW-NEXT: kmovw %k0, %eax 1803; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax 1804; AVX512DQNOBW-NEXT: retq 1805 %a_i = trunc i32 %a to i1 1806 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0 1807 %res = bitcast <16 x i1> %maskv to i16 1808 ret i16 %res 1809} 1810 1811define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1812; KNL-LABEL: sext_8i1_8i16: 1813; KNL: # %bb.0: 1814; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1815; KNL-NEXT: vpmovdw %zmm0, %ymm0 1816; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1817; KNL-NEXT: vzeroupper 1818; KNL-NEXT: retq 1819; 1820; SKX-LABEL: sext_8i1_8i16: 1821; SKX: # %bb.0: 1822; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1823; SKX-NEXT: vpmovm2w %k0, %xmm0 1824; SKX-NEXT: vzeroupper 1825; SKX-NEXT: retq 1826; 1827; AVX512DQNOBW-LABEL: sext_8i1_8i16: 1828; AVX512DQNOBW: # %bb.0: 1829; AVX512DQNOBW-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1830; AVX512DQNOBW-NEXT: vpmovm2d %k0, %ymm0 1831; AVX512DQNOBW-NEXT: vpmovdw %ymm0, %xmm0 1832; AVX512DQNOBW-NEXT: vzeroupper 1833; AVX512DQNOBW-NEXT: retq 1834 %x = icmp slt <8 x i32> %a1, %a2 1835 %y = sext <8 x i1> %x to <8 x i16> 1836 ret <8 x i16> %y 1837} 1838 1839define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { 1840; KNL-LABEL: sext_16i1_16i32: 1841; KNL: # %bb.0: 1842; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1843; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1844; KNL-NEXT: retq 1845; 1846; AVX512DQ-LABEL: sext_16i1_16i32: 1847; AVX512DQ: # %bb.0: 1848; AVX512DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 1849; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 1850; AVX512DQ-NEXT: retq 1851 %x = icmp slt <16 x i32> %a1, %a2 1852 %y = sext <16 x i1> %x to <16 x i32> 1853 ret <16 x i32> %y 1854} 1855 1856define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { 1857; KNL-LABEL: sext_8i1_8i64: 1858; KNL: # %bb.0: 1859; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1860; KNL-NEXT: vpmovsxdq %ymm0, %zmm0 1861; KNL-NEXT: retq 1862; 1863; AVX512DQ-LABEL: sext_8i1_8i64: 1864; AVX512DQ: # %bb.0: 1865; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 1866; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 1867; AVX512DQ-NEXT: retq 1868 %x = icmp slt <8 x i32> %a1, %a2 1869 %y = sext <8 x i1> %x to <8 x i64> 1870 ret <8 x i64> %y 1871} 1872 1873define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { 1874; ALL-LABEL: extload_v8i64: 1875; ALL: # %bb.0: 1876; ALL-NEXT: vpmovsxbq (%rdi), %zmm0 1877; ALL-NEXT: vmovdqa64 %zmm0, (%rsi) 1878; ALL-NEXT: vzeroupper 1879; ALL-NEXT: retq 1880 %sign_load = load <8 x i8>, <8 x i8>* %a 1881 %c = sext <8 x i8> %sign_load to <8 x i64> 1882 store <8 x i64> %c, <8 x i64>* %res 1883 ret void 1884} 1885 1886define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { 1887; KNL-LABEL: test21: 1888; KNL: # %bb.0: 1889; KNL-NEXT: movw $-3, %ax 1890; KNL-NEXT: kmovw %eax, %k1 1891; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1892; KNL-NEXT: kmovw %eax, %k0 1893; KNL-NEXT: kandw %k1, %k0, %k0 1894; KNL-NEXT: kmovw %k1, %k2 1895; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1896; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1897; KNL-NEXT: kmovw %eax, %k1 1898; KNL-NEXT: kshiftlw $15, %k1, %k1 1899; KNL-NEXT: kshiftrw $14, %k1, %k1 1900; KNL-NEXT: korw %k1, %k0, %k0 1901; KNL-NEXT: movw $-5, %ax 1902; KNL-NEXT: kmovw %eax, %k1 1903; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1904; KNL-NEXT: kandw %k1, %k0, %k0 1905; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1906; KNL-NEXT: kmovw %eax, %k1 1907; KNL-NEXT: kshiftlw $15, %k1, %k1 1908; KNL-NEXT: kshiftrw $13, %k1, %k1 1909; KNL-NEXT: korw %k1, %k0, %k0 1910; KNL-NEXT: movw $-9, %ax 1911; KNL-NEXT: kmovw %eax, %k1 1912; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1913; KNL-NEXT: kandw %k1, %k0, %k0 1914; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1915; KNL-NEXT: kmovw %eax, %k1 1916; KNL-NEXT: kshiftlw $15, %k1, %k1 1917; KNL-NEXT: kshiftrw $12, %k1, %k1 1918; KNL-NEXT: korw %k1, %k0, %k0 1919; KNL-NEXT: movw $-17, %ax 1920; KNL-NEXT: kmovw %eax, %k6 1921; KNL-NEXT: kandw %k6, %k0, %k0 1922; KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1923; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1924; KNL-NEXT: kmovw %eax, %k1 1925; KNL-NEXT: kshiftlw $15, %k1, %k1 1926; KNL-NEXT: kshiftrw $11, %k1, %k1 1927; KNL-NEXT: korw %k1, %k0, %k0 1928; KNL-NEXT: movw $-33, %ax 1929; KNL-NEXT: kmovw %eax, %k1 1930; KNL-NEXT: kandw %k1, %k0, %k0 1931; KNL-NEXT: kmovw %k1, %k3 1932; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1933; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1934; KNL-NEXT: kmovw %eax, %k1 1935; KNL-NEXT: kshiftlw $15, %k1, %k1 1936; KNL-NEXT: kshiftrw $10, %k1, %k1 1937; KNL-NEXT: korw %k1, %k0, %k0 1938; KNL-NEXT: movw $-65, %ax 1939; KNL-NEXT: kmovw %eax, %k1 1940; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1941; KNL-NEXT: kandw %k1, %k0, %k0 1942; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1943; KNL-NEXT: kmovw %eax, %k1 1944; KNL-NEXT: kshiftlw $15, %k1, %k1 1945; KNL-NEXT: kshiftrw $9, %k1, %k1 1946; KNL-NEXT: korw %k1, %k0, %k0 1947; KNL-NEXT: movw $-129, %ax 1948; KNL-NEXT: kmovw %eax, %k1 1949; KNL-NEXT: kandw %k1, %k0, %k0 1950; KNL-NEXT: kmovw %k1, %k4 1951; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1952; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1953; KNL-NEXT: kmovw %eax, %k1 1954; KNL-NEXT: kshiftlw $15, %k1, %k1 1955; KNL-NEXT: kshiftrw $8, %k1, %k1 1956; KNL-NEXT: korw %k1, %k0, %k0 1957; KNL-NEXT: movw $-257, %ax # imm = 0xFEFF 1958; KNL-NEXT: kmovw %eax, %k1 1959; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1960; KNL-NEXT: kandw %k1, %k0, %k0 1961; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1962; KNL-NEXT: kmovw %eax, %k1 1963; KNL-NEXT: kshiftlw $15, %k1, %k1 1964; KNL-NEXT: kshiftrw $7, %k1, %k1 1965; KNL-NEXT: korw %k1, %k0, %k0 1966; KNL-NEXT: movw $-513, %ax # imm = 0xFDFF 1967; KNL-NEXT: kmovw %eax, %k1 1968; KNL-NEXT: kandw %k1, %k0, %k0 1969; KNL-NEXT: kmovw %k1, %k5 1970; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1971; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1972; KNL-NEXT: kmovw %eax, %k1 1973; KNL-NEXT: kshiftlw $15, %k1, %k1 1974; KNL-NEXT: kshiftrw $6, %k1, %k1 1975; KNL-NEXT: korw %k1, %k0, %k0 1976; KNL-NEXT: movw $-1025, %ax # imm = 0xFBFF 1977; KNL-NEXT: kmovw %eax, %k1 1978; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1979; KNL-NEXT: kandw %k1, %k0, %k0 1980; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1981; KNL-NEXT: kmovw %eax, %k1 1982; KNL-NEXT: kshiftlw $15, %k1, %k1 1983; KNL-NEXT: kshiftrw $5, %k1, %k1 1984; KNL-NEXT: korw %k1, %k0, %k0 1985; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF 1986; KNL-NEXT: kmovw %eax, %k1 1987; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1988; KNL-NEXT: kandw %k1, %k0, %k0 1989; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1990; KNL-NEXT: kmovw %eax, %k1 1991; KNL-NEXT: kshiftlw $15, %k1, %k1 1992; KNL-NEXT: kshiftrw $4, %k1, %k1 1993; KNL-NEXT: korw %k1, %k0, %k0 1994; KNL-NEXT: movw $-4097, %ax # imm = 0xEFFF 1995; KNL-NEXT: kmovw %eax, %k1 1996; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 1997; KNL-NEXT: kandw %k1, %k0, %k0 1998; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 1999; KNL-NEXT: kmovw %eax, %k1 2000; KNL-NEXT: kshiftlw $15, %k1, %k1 2001; KNL-NEXT: kshiftrw $3, %k1, %k1 2002; KNL-NEXT: korw %k1, %k0, %k0 2003; KNL-NEXT: movw $-8193, %ax # imm = 0xDFFF 2004; KNL-NEXT: kmovw %eax, %k1 2005; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2006; KNL-NEXT: kandw %k1, %k0, %k0 2007; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2008; KNL-NEXT: kmovw %eax, %k1 2009; KNL-NEXT: kshiftlw $15, %k1, %k1 2010; KNL-NEXT: kshiftrw $2, %k1, %k1 2011; KNL-NEXT: korw %k1, %k0, %k1 2012; KNL-NEXT: movw $-16385, %ax # imm = 0xBFFF 2013; KNL-NEXT: kmovw %eax, %k0 2014; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2015; KNL-NEXT: kandw %k0, %k1, %k1 2016; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2017; KNL-NEXT: kmovw %eax, %k7 2018; KNL-NEXT: kshiftlw $14, %k7, %k7 2019; KNL-NEXT: korw %k7, %k1, %k1 2020; KNL-NEXT: kshiftlw $1, %k1, %k1 2021; KNL-NEXT: kshiftrw $1, %k1, %k1 2022; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2023; KNL-NEXT: kmovw %eax, %k7 2024; KNL-NEXT: kshiftlw $15, %k7, %k7 2025; KNL-NEXT: korw %k7, %k1, %k1 2026; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2027; KNL-NEXT: kmovw %edi, %k1 2028; KNL-NEXT: kandw %k2, %k1, %k1 2029; KNL-NEXT: kmovw %esi, %k7 2030; KNL-NEXT: kshiftlw $15, %k7, %k7 2031; KNL-NEXT: kshiftrw $14, %k7, %k7 2032; KNL-NEXT: korw %k7, %k1, %k1 2033; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2034; KNL-NEXT: kandw %k0, %k1, %k1 2035; KNL-NEXT: kmovw %edx, %k7 2036; KNL-NEXT: kshiftlw $15, %k7, %k7 2037; KNL-NEXT: kshiftrw $13, %k7, %k7 2038; KNL-NEXT: korw %k7, %k1, %k1 2039; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2040; KNL-NEXT: kandw %k2, %k1, %k1 2041; KNL-NEXT: kmovw %ecx, %k7 2042; KNL-NEXT: kshiftlw $15, %k7, %k7 2043; KNL-NEXT: kshiftrw $12, %k7, %k7 2044; KNL-NEXT: korw %k7, %k1, %k1 2045; KNL-NEXT: kandw %k6, %k1, %k1 2046; KNL-NEXT: kmovw %r8d, %k7 2047; KNL-NEXT: kshiftlw $15, %k7, %k7 2048; KNL-NEXT: kshiftrw $11, %k7, %k7 2049; KNL-NEXT: korw %k7, %k1, %k1 2050; KNL-NEXT: kandw %k3, %k1, %k1 2051; KNL-NEXT: kmovw %r9d, %k7 2052; KNL-NEXT: kshiftlw $15, %k7, %k7 2053; KNL-NEXT: kshiftrw $10, %k7, %k7 2054; KNL-NEXT: korw %k7, %k1, %k1 2055; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2056; KNL-NEXT: kandw %k6, %k1, %k1 2057; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2058; KNL-NEXT: kmovw %eax, %k7 2059; KNL-NEXT: kshiftlw $15, %k7, %k7 2060; KNL-NEXT: kshiftrw $9, %k7, %k7 2061; KNL-NEXT: korw %k7, %k1, %k1 2062; KNL-NEXT: kandw %k4, %k1, %k1 2063; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2064; KNL-NEXT: kmovw %eax, %k7 2065; KNL-NEXT: kshiftlw $15, %k7, %k7 2066; KNL-NEXT: kshiftrw $8, %k7, %k7 2067; KNL-NEXT: korw %k7, %k1, %k1 2068; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2069; KNL-NEXT: kandw %k3, %k1, %k1 2070; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2071; KNL-NEXT: kmovw %eax, %k7 2072; KNL-NEXT: kshiftlw $15, %k7, %k7 2073; KNL-NEXT: kshiftrw $7, %k7, %k7 2074; KNL-NEXT: korw %k7, %k1, %k1 2075; KNL-NEXT: kandw %k5, %k1, %k1 2076; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2077; KNL-NEXT: kmovw %eax, %k7 2078; KNL-NEXT: kshiftlw $15, %k7, %k7 2079; KNL-NEXT: kshiftrw $6, %k7, %k7 2080; KNL-NEXT: korw %k7, %k1, %k1 2081; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload 2082; KNL-NEXT: kandw %k4, %k1, %k1 2083; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2084; KNL-NEXT: kmovw %eax, %k7 2085; KNL-NEXT: kshiftlw $15, %k7, %k7 2086; KNL-NEXT: kshiftrw $5, %k7, %k7 2087; KNL-NEXT: korw %k7, %k1, %k1 2088; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2089; KNL-NEXT: kandw %k5, %k1, %k1 2090; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2091; KNL-NEXT: kmovw %eax, %k7 2092; KNL-NEXT: kshiftlw $15, %k7, %k7 2093; KNL-NEXT: kshiftrw $4, %k7, %k7 2094; KNL-NEXT: korw %k7, %k1, %k1 2095; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2096; KNL-NEXT: kandw %k7, %k1, %k1 2097; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2098; KNL-NEXT: kmovw %eax, %k7 2099; KNL-NEXT: kshiftlw $15, %k7, %k7 2100; KNL-NEXT: kshiftrw $3, %k7, %k7 2101; KNL-NEXT: korw %k7, %k1, %k1 2102; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2103; KNL-NEXT: kandw %k7, %k1, %k1 2104; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2105; KNL-NEXT: kmovw %eax, %k7 2106; KNL-NEXT: kshiftlw $15, %k7, %k7 2107; KNL-NEXT: kshiftrw $2, %k7, %k7 2108; KNL-NEXT: korw %k7, %k1, %k1 2109; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2110; KNL-NEXT: kandw %k7, %k1, %k1 2111; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2112; KNL-NEXT: kmovw %eax, %k7 2113; KNL-NEXT: kshiftlw $14, %k7, %k7 2114; KNL-NEXT: korw %k7, %k1, %k1 2115; KNL-NEXT: kshiftlw $1, %k1, %k1 2116; KNL-NEXT: kshiftrw $1, %k1, %k1 2117; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2118; KNL-NEXT: kmovw %eax, %k7 2119; KNL-NEXT: kshiftlw $15, %k7, %k7 2120; KNL-NEXT: korw %k7, %k1, %k1 2121; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2122; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2123; KNL-NEXT: kmovw %eax, %k1 2124; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2125; KNL-NEXT: kandw %k7, %k1, %k1 2126; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2127; KNL-NEXT: kmovw %eax, %k7 2128; KNL-NEXT: kshiftlw $15, %k7, %k7 2129; KNL-NEXT: kshiftrw $14, %k7, %k7 2130; KNL-NEXT: korw %k7, %k1, %k1 2131; KNL-NEXT: kandw %k0, %k1, %k1 2132; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2133; KNL-NEXT: kmovw %eax, %k7 2134; KNL-NEXT: kshiftlw $15, %k7, %k7 2135; KNL-NEXT: kshiftrw $13, %k7, %k7 2136; KNL-NEXT: korw %k7, %k1, %k1 2137; KNL-NEXT: kandw %k2, %k1, %k1 2138; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2139; KNL-NEXT: kmovw %eax, %k7 2140; KNL-NEXT: kshiftlw $15, %k7, %k7 2141; KNL-NEXT: kshiftrw $12, %k7, %k7 2142; KNL-NEXT: korw %k7, %k1, %k1 2143; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2144; KNL-NEXT: kandw %k0, %k1, %k1 2145; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2146; KNL-NEXT: kmovw %eax, %k7 2147; KNL-NEXT: kshiftlw $15, %k7, %k7 2148; KNL-NEXT: kshiftrw $11, %k7, %k7 2149; KNL-NEXT: korw %k7, %k1, %k1 2150; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2151; KNL-NEXT: kandw %k2, %k1, %k1 2152; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2153; KNL-NEXT: kmovw %eax, %k7 2154; KNL-NEXT: kshiftlw $15, %k7, %k7 2155; KNL-NEXT: kshiftrw $10, %k7, %k7 2156; KNL-NEXT: korw %k7, %k1, %k1 2157; KNL-NEXT: kandw %k6, %k1, %k1 2158; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2159; KNL-NEXT: kmovw %eax, %k7 2160; KNL-NEXT: kshiftlw $15, %k7, %k7 2161; KNL-NEXT: kshiftrw $9, %k7, %k7 2162; KNL-NEXT: korw %k7, %k1, %k1 2163; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2164; KNL-NEXT: kandw %k2, %k1, %k1 2165; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2166; KNL-NEXT: kmovw %eax, %k7 2167; KNL-NEXT: kshiftlw $15, %k7, %k7 2168; KNL-NEXT: kshiftrw $8, %k7, %k7 2169; KNL-NEXT: korw %k7, %k1, %k1 2170; KNL-NEXT: kandw %k3, %k1, %k1 2171; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2172; KNL-NEXT: kmovw %eax, %k7 2173; KNL-NEXT: kshiftlw $15, %k7, %k7 2174; KNL-NEXT: kshiftrw $7, %k7, %k7 2175; KNL-NEXT: korw %k7, %k1, %k1 2176; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2177; KNL-NEXT: kandw %k3, %k1, %k1 2178; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2179; KNL-NEXT: kmovw %eax, %k7 2180; KNL-NEXT: kshiftlw $15, %k7, %k7 2181; KNL-NEXT: kshiftrw $6, %k7, %k7 2182; KNL-NEXT: korw %k7, %k1, %k1 2183; KNL-NEXT: kandw %k4, %k1, %k1 2184; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2185; KNL-NEXT: kmovw %eax, %k7 2186; KNL-NEXT: kshiftlw $15, %k7, %k7 2187; KNL-NEXT: kshiftrw $5, %k7, %k7 2188; KNL-NEXT: korw %k7, %k1, %k1 2189; KNL-NEXT: kandw %k5, %k1, %k1 2190; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2191; KNL-NEXT: kmovw %eax, %k7 2192; KNL-NEXT: kshiftlw $15, %k7, %k7 2193; KNL-NEXT: kshiftrw $4, %k7, %k7 2194; KNL-NEXT: korw %k7, %k1, %k1 2195; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2196; KNL-NEXT: kandw %k2, %k1, %k1 2197; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2198; KNL-NEXT: kmovw %eax, %k7 2199; KNL-NEXT: kshiftlw $15, %k7, %k7 2200; KNL-NEXT: kshiftrw $3, %k7, %k7 2201; KNL-NEXT: korw %k7, %k1, %k1 2202; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2203; KNL-NEXT: kandw %k5, %k1, %k1 2204; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2205; KNL-NEXT: kmovw %eax, %k7 2206; KNL-NEXT: kshiftlw $15, %k7, %k7 2207; KNL-NEXT: kshiftrw $2, %k7, %k7 2208; KNL-NEXT: korw %k7, %k1, %k1 2209; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2210; KNL-NEXT: kandw %k5, %k1, %k1 2211; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2212; KNL-NEXT: kmovw %eax, %k7 2213; KNL-NEXT: kshiftlw $14, %k7, %k7 2214; KNL-NEXT: korw %k7, %k1, %k1 2215; KNL-NEXT: kshiftlw $1, %k1, %k1 2216; KNL-NEXT: kshiftrw $1, %k1, %k1 2217; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2218; KNL-NEXT: kmovw %eax, %k7 2219; KNL-NEXT: kshiftlw $15, %k7, %k7 2220; KNL-NEXT: korw %k7, %k1, %k1 2221; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2222; KNL-NEXT: kmovw %eax, %k7 2223; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2224; KNL-NEXT: kandw %k5, %k7, %k7 2225; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2226; KNL-NEXT: kmovw %eax, %k6 2227; KNL-NEXT: kshiftlw $15, %k6, %k6 2228; KNL-NEXT: kshiftrw $14, %k6, %k6 2229; KNL-NEXT: korw %k6, %k7, %k6 2230; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2231; KNL-NEXT: kandw %k5, %k6, %k6 2232; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2233; KNL-NEXT: kmovw %eax, %k7 2234; KNL-NEXT: kshiftlw $15, %k7, %k7 2235; KNL-NEXT: kshiftrw $13, %k7, %k7 2236; KNL-NEXT: korw %k7, %k6, %k6 2237; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2238; KNL-NEXT: kandw %k5, %k6, %k6 2239; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2240; KNL-NEXT: kmovw %eax, %k7 2241; KNL-NEXT: kshiftlw $15, %k7, %k7 2242; KNL-NEXT: kshiftrw $12, %k7, %k7 2243; KNL-NEXT: korw %k7, %k6, %k6 2244; KNL-NEXT: kandw %k0, %k6, %k6 2245; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2246; KNL-NEXT: kmovw %eax, %k7 2247; KNL-NEXT: kshiftlw $15, %k7, %k7 2248; KNL-NEXT: kshiftrw $11, %k7, %k7 2249; KNL-NEXT: korw %k7, %k6, %k6 2250; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2251; KNL-NEXT: kandw %k0, %k6, %k6 2252; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2253; KNL-NEXT: kmovw %eax, %k7 2254; KNL-NEXT: kshiftlw $15, %k7, %k7 2255; KNL-NEXT: kshiftrw $10, %k7, %k7 2256; KNL-NEXT: korw %k7, %k6, %k6 2257; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2258; KNL-NEXT: kandw %k0, %k6, %k6 2259; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2260; KNL-NEXT: kmovw %eax, %k7 2261; KNL-NEXT: kshiftlw $15, %k7, %k7 2262; KNL-NEXT: kshiftrw $9, %k7, %k7 2263; KNL-NEXT: korw %k7, %k6, %k6 2264; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2265; KNL-NEXT: kandw %k0, %k6, %k6 2266; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2267; KNL-NEXT: kmovw %eax, %k7 2268; KNL-NEXT: kshiftlw $15, %k7, %k7 2269; KNL-NEXT: kshiftrw $8, %k7, %k7 2270; KNL-NEXT: korw %k7, %k6, %k6 2271; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2272; KNL-NEXT: kandw %k0, %k6, %k6 2273; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2274; KNL-NEXT: kmovw %eax, %k7 2275; KNL-NEXT: kshiftlw $15, %k7, %k7 2276; KNL-NEXT: kshiftrw $7, %k7, %k7 2277; KNL-NEXT: korw %k7, %k6, %k6 2278; KNL-NEXT: kandw %k3, %k6, %k6 2279; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2280; KNL-NEXT: kmovw %eax, %k7 2281; KNL-NEXT: kshiftlw $15, %k7, %k7 2282; KNL-NEXT: kshiftrw $6, %k7, %k7 2283; KNL-NEXT: korw %k7, %k6, %k6 2284; KNL-NEXT: kandw %k4, %k6, %k5 2285; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2286; KNL-NEXT: kmovw %eax, %k6 2287; KNL-NEXT: kshiftlw $15, %k6, %k6 2288; KNL-NEXT: kshiftrw $5, %k6, %k6 2289; KNL-NEXT: korw %k6, %k5, %k5 2290; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2291; KNL-NEXT: kandw %k0, %k5, %k4 2292; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2293; KNL-NEXT: kmovw %eax, %k5 2294; KNL-NEXT: kshiftlw $15, %k5, %k5 2295; KNL-NEXT: kshiftrw $4, %k5, %k5 2296; KNL-NEXT: korw %k5, %k4, %k4 2297; KNL-NEXT: kandw %k2, %k4, %k3 2298; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2299; KNL-NEXT: kmovw %eax, %k4 2300; KNL-NEXT: kshiftlw $15, %k4, %k4 2301; KNL-NEXT: kshiftrw $3, %k4, %k4 2302; KNL-NEXT: korw %k4, %k3, %k3 2303; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2304; KNL-NEXT: kandw %k0, %k3, %k2 2305; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2306; KNL-NEXT: kmovw %eax, %k3 2307; KNL-NEXT: kshiftlw $15, %k3, %k3 2308; KNL-NEXT: kshiftrw $2, %k3, %k3 2309; KNL-NEXT: korw %k3, %k2, %k2 2310; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2311; KNL-NEXT: kandw %k0, %k2, %k0 2312; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2313; KNL-NEXT: kmovw %eax, %k2 2314; KNL-NEXT: kshiftlw $14, %k2, %k2 2315; KNL-NEXT: korw %k2, %k0, %k0 2316; KNL-NEXT: kshiftlw $1, %k0, %k0 2317; KNL-NEXT: kshiftrw $1, %k0, %k0 2318; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al 2319; KNL-NEXT: kmovw %eax, %k2 2320; KNL-NEXT: kshiftlw $15, %k2, %k2 2321; KNL-NEXT: korw %k2, %k0, %k2 2322; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} 2323; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} 2324; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2325; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} 2326; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2327; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} 2328; KNL-NEXT: vpmovdw %zmm2, %ymm2 2329; KNL-NEXT: vpmovdw %zmm3, %ymm3 2330; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 2331; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1 2332; KNL-NEXT: vpmovdw %zmm4, %ymm2 2333; KNL-NEXT: vpmovdw %zmm5, %ymm3 2334; KNL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 2335; KNL-NEXT: vpandq %zmm0, %zmm2, %zmm0 2336; KNL-NEXT: retq 2337; 2338; SKX-LABEL: test21: 2339; SKX: # %bb.0: 2340; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 2341; SKX-NEXT: vpmovb2m %zmm2, %k1 2342; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 2343; SKX-NEXT: kshiftrq $32, %k1, %k1 2344; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} 2345; SKX-NEXT: retq 2346; 2347; AVX512DQNOBW-LABEL: test21: 2348; AVX512DQNOBW: # %bb.0: 2349; AVX512DQNOBW-NEXT: movw $-3, %ax 2350; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2351; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2352; AVX512DQNOBW-NEXT: kmovw %eax, %k0 2353; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2354; AVX512DQNOBW-NEXT: kmovw %k1, %k2 2355; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2356; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2357; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2358; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2359; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1 2360; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2361; AVX512DQNOBW-NEXT: movw $-5, %ax 2362; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2363; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2364; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2365; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2366; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2367; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2368; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1 2369; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2370; AVX512DQNOBW-NEXT: movw $-9, %ax 2371; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2372; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2373; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2374; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2375; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2376; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2377; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1 2378; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2379; AVX512DQNOBW-NEXT: movw $-17, %ax 2380; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2381; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 2382; AVX512DQNOBW-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2383; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2384; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2385; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2386; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1 2387; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2388; AVX512DQNOBW-NEXT: movw $-33, %ax 2389; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2390; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2391; AVX512DQNOBW-NEXT: kmovw %k1, %k3 2392; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2393; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2394; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2395; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2396; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1 2397; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2398; AVX512DQNOBW-NEXT: movw $-65, %ax 2399; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2400; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2401; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2402; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2403; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2404; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2405; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1 2406; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2407; AVX512DQNOBW-NEXT: movw $-129, %ax 2408; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2409; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2410; AVX512DQNOBW-NEXT: kmovw %k1, %k4 2411; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2412; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2413; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2414; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2415; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1 2416; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2417; AVX512DQNOBW-NEXT: movw $-257, %ax # imm = 0xFEFF 2418; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2419; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2420; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2421; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2422; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2423; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2424; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1 2425; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2426; AVX512DQNOBW-NEXT: movw $-513, %ax # imm = 0xFDFF 2427; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2428; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2429; AVX512DQNOBW-NEXT: kmovw %k1, %k5 2430; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2431; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2432; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2433; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2434; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1 2435; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2436; AVX512DQNOBW-NEXT: movw $-1025, %ax # imm = 0xFBFF 2437; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2438; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2439; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2440; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2441; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2442; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2443; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1 2444; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2445; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF 2446; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2447; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2448; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2449; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2450; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2451; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2452; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1 2453; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2454; AVX512DQNOBW-NEXT: movw $-4097, %ax # imm = 0xEFFF 2455; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2456; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2457; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2458; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2459; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2460; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2461; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1 2462; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2463; AVX512DQNOBW-NEXT: movw $-8193, %ax # imm = 0xDFFF 2464; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2465; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2466; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2467; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2468; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2469; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 2470; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1 2471; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0 2472; AVX512DQNOBW-NEXT: movw $-16385, %ax # imm = 0xBFFF 2473; AVX512DQNOBW-NEXT: kmovw %eax, %k1 2474; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2475; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2476; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2477; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2478; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 2479; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2480; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 2481; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 2482; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2483; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2484; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2485; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2486; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2487; AVX512DQNOBW-NEXT: kmovw %edi, %k0 2488; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2489; AVX512DQNOBW-NEXT: kmovw %esi, %k7 2490; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2491; AVX512DQNOBW-NEXT: kshiftrw $14, %k7, %k7 2492; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2493; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2494; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2495; AVX512DQNOBW-NEXT: kmovw %edx, %k7 2496; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2497; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 2498; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2499; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2500; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2501; AVX512DQNOBW-NEXT: kmovw %ecx, %k7 2502; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2503; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 2504; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2505; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 2506; AVX512DQNOBW-NEXT: kmovw %r8d, %k7 2507; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2508; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 2509; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2510; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 2511; AVX512DQNOBW-NEXT: kmovw %r9d, %k7 2512; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2513; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 2514; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2515; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload 2516; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 2517; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2518; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2519; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2520; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 2521; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2522; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 2523; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2524; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2525; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2526; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 2527; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2528; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2529; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 2530; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2531; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2532; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2533; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 2534; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2535; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2536; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2537; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2538; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2539; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 2540; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2541; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload 2542; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 2543; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2544; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2545; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2546; AVX512DQNOBW-NEXT: kshiftrw $5, %k7, %k7 2547; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2548; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2549; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2550; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2551; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2552; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2553; AVX512DQNOBW-NEXT: kshiftrw $4, %k7, %k7 2554; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2555; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2556; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 2557; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2558; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2559; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2560; AVX512DQNOBW-NEXT: kshiftrw $3, %k7, %k7 2561; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2562; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2563; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 2564; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2565; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2566; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2567; AVX512DQNOBW-NEXT: kshiftrw $2, %k7, %k7 2568; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2569; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2570; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 2571; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2572; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2573; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 2574; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2575; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 2576; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 2577; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2578; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2579; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2580; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2581; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill 2582; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2583; AVX512DQNOBW-NEXT: kmovw %eax, %k0 2584; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload 2585; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 2586; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2587; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2588; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2589; AVX512DQNOBW-NEXT: kshiftrw $14, %k7, %k7 2590; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2591; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2592; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2593; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2594; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2595; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 2596; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2597; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2598; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2599; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2600; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2601; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 2602; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2603; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2604; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 2605; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2606; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2607; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2608; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 2609; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2610; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2611; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2612; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2613; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2614; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2615; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 2616; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2617; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 2618; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2619; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2620; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2621; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 2622; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2623; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2624; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2625; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2626; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2627; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2628; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 2629; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2630; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 2631; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2632; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2633; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2634; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 2635; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2636; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload 2637; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 2638; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2639; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2640; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2641; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 2642; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2643; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 2644; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2645; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2646; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2647; AVX512DQNOBW-NEXT: kshiftrw $5, %k7, %k7 2648; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2649; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2650; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2651; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2652; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2653; AVX512DQNOBW-NEXT: kshiftrw $4, %k7, %k7 2654; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2655; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload 2656; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 2657; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2658; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2659; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2660; AVX512DQNOBW-NEXT: kshiftrw $3, %k7, %k7 2661; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2662; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2663; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2664; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2665; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2666; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2667; AVX512DQNOBW-NEXT: kshiftrw $2, %k7, %k7 2668; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2669; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2670; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 2671; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2672; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2673; AVX512DQNOBW-NEXT: kshiftlw $14, %k7, %k7 2674; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2675; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 2676; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 2677; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2678; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2679; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2680; AVX512DQNOBW-NEXT: korw %k7, %k0, %k0 2681; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2682; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2683; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2684; AVX512DQNOBW-NEXT: kandw %k5, %k7, %k7 2685; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2686; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2687; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2688; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6 2689; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6 2690; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2691; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2692; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2693; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2694; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2695; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 2696; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2697; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload 2698; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 2699; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2700; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2701; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2702; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 2703; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2704; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 2705; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2706; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2707; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2708; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 2709; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2710; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2711; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 2712; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2713; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2714; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2715; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 2716; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2717; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2718; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 2719; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2720; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2721; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2722; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 2723; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2724; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2725; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 2726; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2727; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2728; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2729; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 2730; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2731; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2732; AVX512DQNOBW-NEXT: kandw %k1, %k6, %k6 2733; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2734; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2735; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2736; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 2737; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2738; AVX512DQNOBW-NEXT: kandw %k3, %k6, %k6 2739; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2740; AVX512DQNOBW-NEXT: kmovw %eax, %k7 2741; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 2742; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 2743; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 2744; AVX512DQNOBW-NEXT: kandw %k4, %k6, %k5 2745; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2746; AVX512DQNOBW-NEXT: kmovw %eax, %k6 2747; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 2748; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 2749; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5 2750; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2751; AVX512DQNOBW-NEXT: kandw %k1, %k5, %k4 2752; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2753; AVX512DQNOBW-NEXT: kmovw %eax, %k5 2754; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5 2755; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5 2756; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4 2757; AVX512DQNOBW-NEXT: kandw %k2, %k4, %k3 2758; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2759; AVX512DQNOBW-NEXT: kmovw %eax, %k4 2760; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4 2761; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4 2762; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3 2763; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2764; AVX512DQNOBW-NEXT: kandw %k1, %k3, %k2 2765; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2766; AVX512DQNOBW-NEXT: kmovw %eax, %k3 2767; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3 2768; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k3 2769; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2 2770; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload 2771; AVX512DQNOBW-NEXT: kandw %k1, %k2, %k1 2772; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2773; AVX512DQNOBW-NEXT: kmovw %eax, %k2 2774; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 2775; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 2776; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 2777; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 2778; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al 2779; AVX512DQNOBW-NEXT: kmovw %eax, %k2 2780; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 2781; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 2782; AVX512DQNOBW-NEXT: vpmovm2d %k1, %zmm2 2783; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm3 2784; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2785; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm4 2786; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload 2787; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm5 2788; AVX512DQNOBW-NEXT: vpmovdw %zmm2, %ymm2 2789; AVX512DQNOBW-NEXT: vpmovdw %zmm3, %ymm3 2790; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 2791; AVX512DQNOBW-NEXT: vpandq %zmm1, %zmm2, %zmm1 2792; AVX512DQNOBW-NEXT: vpmovdw %zmm4, %ymm2 2793; AVX512DQNOBW-NEXT: vpmovdw %zmm5, %ymm3 2794; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 2795; AVX512DQNOBW-NEXT: vpandq %zmm0, %zmm2, %zmm0 2796; AVX512DQNOBW-NEXT: retq 2797 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer 2798 ret <64 x i16> %ret 2799} 2800 2801define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { 2802; ALL-LABEL: shuffle_zext_16x8_to_16x16: 2803; ALL: # %bb.0: 2804; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2805; ALL-NEXT: retq 2806 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 2807 %2 = bitcast <32 x i8> %1 to <16 x i16> 2808 ret <16 x i16> %2 2809} 2810 2811define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { 2812; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask: 2813; KNL: # %bb.0: 2814; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 2815; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2816; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 2817; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 2818; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 2819; KNL-NEXT: retq 2820; 2821; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: 2822; SKX: # %bb.0: 2823; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 2824; SKX-NEXT: vpmovb2m %xmm1, %k1 2825; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2826; SKX-NEXT: retq 2827; 2828; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask: 2829; AVX512DQNOBW: # %bb.0: 2830; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 2831; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2832; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1 2833; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1 2834; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0 2835; AVX512DQNOBW-NEXT: retq 2836 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 2837 %bc = bitcast <32 x i8> %x to <16 x i16> 2838 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer 2839 ret <16 x i16> %ret 2840} 2841 2842define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { 2843; ALL-LABEL: zext_32x8_to_16x16: 2844; ALL: # %bb.0: 2845; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2846; ALL-NEXT: retq 2847 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32> 2848 %2 = bitcast <32 x i8> %1 to <16 x i16> 2849 ret <16 x i16> %2 2850} 2851 2852define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { 2853; ALL-LABEL: zext_32x8_to_8x32: 2854; ALL: # %bb.0: 2855; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2856; ALL-NEXT: retq 2857 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> 2858 %2 = bitcast <32 x i8> %1 to <8 x i32> 2859 ret <8 x i32> %2 2860} 2861 2862define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { 2863; ALL-LABEL: zext_32x8_to_4x64: 2864; ALL: # %bb.0: 2865; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 2866; ALL-NEXT: retq 2867 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 2868 %2 = bitcast <32 x i8> %1 to <4 x i64> 2869 ret <4 x i64> %2 2870} 2871 2872define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { 2873; ALL-LABEL: zext_16x16_to_8x32: 2874; ALL: # %bb.0: 2875; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2876; ALL-NEXT: retq 2877 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16> 2878 %2 = bitcast <16 x i16> %1 to <8 x i32> 2879 ret <8 x i32> %2 2880} 2881 2882define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { 2883; ALL-LABEL: zext_16x16_to_4x64: 2884; ALL: # %bb.0: 2885; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2886; ALL-NEXT: retq 2887 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> 2888 %2 = bitcast <16 x i16> %1 to <4 x i64> 2889 ret <4 x i64> %2 2890} 2891 2892define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { 2893; ALL-LABEL: zext_8x32_to_4x64: 2894; ALL: # %bb.0: 2895; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2896; ALL-NEXT: retq 2897 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> 2898 %2 = bitcast <8 x i32> %1 to <4 x i64> 2899 ret <4 x i64> %2 2900} 2901 2902define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { 2903; KNL-LABEL: zext_64xi1_to_64xi8: 2904; KNL: # %bb.0: 2905; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2906; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2907; KNL-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2 2908; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2909; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2910; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 2911; KNL-NEXT: retq 2912; 2913; SKX-LABEL: zext_64xi1_to_64xi8: 2914; SKX: # %bb.0: 2915; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 2916; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} 2917; SKX-NEXT: retq 2918; 2919; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8: 2920; AVX512DQNOBW: # %bb.0: 2921; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2922; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2923; AVX512DQNOBW-NEXT: vpcmpeqb %ymm2, %ymm3, %ymm2 2924; AVX512DQNOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2925; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2926; AVX512DQNOBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 2927; AVX512DQNOBW-NEXT: retq 2928 %mask = icmp eq <64 x i8> %x, %y 2929 %1 = zext <64 x i1> %mask to <64 x i8> 2930 ret <64 x i8> %1 2931} 2932 2933define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { 2934; KNL-LABEL: zext_32xi1_to_32xi16: 2935; KNL: # %bb.0: 2936; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2937; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2938; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 2939; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2940; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2941; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 2942; KNL-NEXT: retq 2943; 2944; SKX-LABEL: zext_32xi1_to_32xi16: 2945; SKX: # %bb.0: 2946; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 2947; SKX-NEXT: vpmovm2w %k0, %zmm0 2948; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 2949; SKX-NEXT: retq 2950; 2951; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16: 2952; AVX512DQNOBW: # %bb.0: 2953; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2954; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2955; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 2956; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2957; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2958; AVX512DQNOBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 2959; AVX512DQNOBW-NEXT: retq 2960 %mask = icmp eq <32 x i16> %x, %y 2961 %1 = zext <32 x i1> %mask to <32 x i16> 2962 ret <32 x i16> %1 2963} 2964 2965define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { 2966; ALL-LABEL: zext_16xi1_to_16xi16: 2967; ALL: # %bb.0: 2968; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2969; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0 2970; ALL-NEXT: retq 2971 %mask = icmp eq <16 x i16> %x, %y 2972 %1 = zext <16 x i1> %mask to <16 x i16> 2973 ret <16 x i16> %1 2974} 2975 2976 2977define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { 2978; KNL-LABEL: zext_32xi1_to_32xi8: 2979; KNL: # %bb.0: 2980; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2981; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2982; KNL-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 2983; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2984; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 2985; KNL-NEXT: vpmovdb %zmm0, %xmm0 2986; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero 2987; KNL-NEXT: vpmovdb %zmm1, %xmm1 2988; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2989; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 2990; KNL-NEXT: retq 2991; 2992; SKX-LABEL: zext_32xi1_to_32xi8: 2993; SKX: # %bb.0: 2994; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 2995; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} 2996; SKX-NEXT: retq 2997; 2998; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8: 2999; AVX512DQNOBW: # %bb.0: 3000; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 3001; AVX512DQNOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 3002; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 3003; AVX512DQNOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 3004; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3005; AVX512DQNOBW-NEXT: vpmovdb %zmm0, %xmm0 3006; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero 3007; AVX512DQNOBW-NEXT: vpmovdb %zmm1, %xmm1 3008; AVX512DQNOBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3009; AVX512DQNOBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 3010; AVX512DQNOBW-NEXT: retq 3011 %mask = icmp eq <32 x i16> %x, %y 3012 %1 = zext <32 x i1> %mask to <32 x i8> 3013 ret <32 x i8> %1 3014} 3015 3016define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { 3017; KNL-LABEL: zext_4xi1_to_4x32: 3018; KNL: # %bb.0: 3019; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 3020; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3021; KNL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 3022; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 3023; KNL-NEXT: retq 3024; 3025; SKX-LABEL: zext_4xi1_to_4x32: 3026; SKX: # %bb.0: 3027; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 3028; SKX-NEXT: vpmovm2d %k0, %xmm0 3029; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 3030; SKX-NEXT: retq 3031; 3032; AVX512DQNOBW-LABEL: zext_4xi1_to_4x32: 3033; AVX512DQNOBW: # %bb.0: 3034; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 3035; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3036; AVX512DQNOBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 3037; AVX512DQNOBW-NEXT: retq 3038 %mask = icmp eq <4 x i8> %x, %y 3039 %1 = zext <4 x i1> %mask to <4 x i32> 3040 ret <4 x i32> %1 3041} 3042 3043define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { 3044; KNL-LABEL: zext_2xi1_to_2xi64: 3045; KNL: # %bb.0: 3046; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 3047; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 3048; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3049; KNL-NEXT: retq 3050; 3051; SKX-LABEL: zext_2xi1_to_2xi64: 3052; SKX: # %bb.0: 3053; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 3054; SKX-NEXT: vpmovm2q %k0, %xmm0 3055; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 3056; SKX-NEXT: retq 3057; 3058; AVX512DQNOBW-LABEL: zext_2xi1_to_2xi64: 3059; AVX512DQNOBW: # %bb.0: 3060; AVX512DQNOBW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 3061; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 3062; AVX512DQNOBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3063; AVX512DQNOBW-NEXT: retq 3064 %mask = icmp eq <2 x i8> %x, %y 3065 %1 = zext <2 x i1> %mask to <2 x i64> 3066 ret <2 x i64> %1 3067} 3068