1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 10 11define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 12; SSE2-LABEL: zext_16i8_to_8i16: 13; SSE2: # %bb.0: # %entry 14; SSE2-NEXT: pxor %xmm1, %xmm1 15; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 16; SSE2-NEXT: retq 17; 18; SSSE3-LABEL: zext_16i8_to_8i16: 19; SSSE3: # %bb.0: # %entry 20; SSSE3-NEXT: pxor %xmm1, %xmm1 21; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 22; SSSE3-NEXT: retq 23; 24; SSE41-LABEL: zext_16i8_to_8i16: 25; SSE41: # %bb.0: # %entry 26; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 27; SSE41-NEXT: retq 28; 29; AVX-LABEL: zext_16i8_to_8i16: 30; AVX: # %bb.0: # %entry 31; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 32; AVX-NEXT: retq 33entry: 34 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 35 %C = zext <8 x i8> %B to <8 x i16> 36 ret <8 x i16> %C 37} 38 39; PR17654 40define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) { 41; SSE2-LABEL: zext_16i8_to_16i16: 42; SSE2: # %bb.0: # %entry 43; SSE2-NEXT: movdqa %xmm0, %xmm1 44; SSE2-NEXT: pxor %xmm2, %xmm2 45; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 46; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 47; SSE2-NEXT: retq 48; 49; SSSE3-LABEL: zext_16i8_to_16i16: 50; SSSE3: # %bb.0: # %entry 51; SSSE3-NEXT: movdqa %xmm0, %xmm1 52; SSSE3-NEXT: pxor %xmm2, %xmm2 53; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 54; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 55; SSSE3-NEXT: retq 56; 57; SSE41-LABEL: zext_16i8_to_16i16: 58; SSE41: # %bb.0: # %entry 59; SSE41-NEXT: movdqa %xmm0, %xmm1 60; SSE41-NEXT: pxor %xmm2, %xmm2 61; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 62; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 63; SSE41-NEXT: retq 64; 65; AVX1-LABEL: zext_16i8_to_16i16: 66; AVX1: # %bb.0: # %entry 67; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 68; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 69; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 70; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 71; AVX1-NEXT: retq 72; 73; AVX2-LABEL: zext_16i8_to_16i16: 74; AVX2: # %bb.0: # %entry 75; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 76; AVX2-NEXT: retq 77; 78; AVX512-LABEL: zext_16i8_to_16i16: 79; AVX512: # %bb.0: # %entry 80; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 81; AVX512-NEXT: retq 82entry: 83 %B = zext <16 x i8> %A to <16 x i16> 84 ret <16 x i16> %B 85} 86 87define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) { 88; SSE2-LABEL: zext_32i8_to_32i16: 89; SSE2: # %bb.0: # %entry 90; SSE2-NEXT: movdqa %xmm1, %xmm3 91; SSE2-NEXT: movdqa %xmm0, %xmm1 92; SSE2-NEXT: pxor %xmm4, %xmm4 93; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 94; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 95; SSE2-NEXT: movdqa %xmm3, %xmm2 96; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 97; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 98; SSE2-NEXT: retq 99; 100; SSSE3-LABEL: zext_32i8_to_32i16: 101; SSSE3: # %bb.0: # %entry 102; SSSE3-NEXT: movdqa %xmm1, %xmm3 103; SSSE3-NEXT: movdqa %xmm0, %xmm1 104; SSSE3-NEXT: pxor %xmm4, %xmm4 105; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 106; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 107; SSSE3-NEXT: movdqa %xmm3, %xmm2 108; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 109; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 110; SSSE3-NEXT: retq 111; 112; SSE41-LABEL: zext_32i8_to_32i16: 113; SSE41: # %bb.0: # %entry 114; SSE41-NEXT: movdqa %xmm1, %xmm3 115; SSE41-NEXT: movdqa %xmm0, %xmm1 116; SSE41-NEXT: pxor %xmm4, %xmm4 117; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 118; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 119; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 120; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 121; SSE41-NEXT: retq 122; 123; AVX1-LABEL: zext_32i8_to_32i16: 124; AVX1: # %bb.0: # %entry 125; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 126; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 127; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 128; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 129; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 130; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 131; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 132; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 133; AVX1-NEXT: vmovaps %ymm2, %ymm0 134; AVX1-NEXT: retq 135; 136; AVX2-LABEL: zext_32i8_to_32i16: 137; AVX2: # %bb.0: # %entry 138; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 139; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 140; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 141; AVX2-NEXT: vmovdqa %ymm2, %ymm0 142; AVX2-NEXT: retq 143; 144; AVX512F-LABEL: zext_32i8_to_32i16: 145; AVX512F: # %bb.0: # %entry 146; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 147; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 148; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 149; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 150; AVX512F-NEXT: retq 151; 152; AVX512BW-LABEL: zext_32i8_to_32i16: 153; AVX512BW: # %bb.0: # %entry 154; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 155; AVX512BW-NEXT: retq 156entry: 157 %B = zext <32 x i8> %A to <32 x i16> 158 ret <32 x i16> %B 159} 160 161define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 162; SSE2-LABEL: zext_16i8_to_4i32: 163; SSE2: # %bb.0: # %entry 164; SSE2-NEXT: pxor %xmm1, %xmm1 165; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 166; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 167; SSE2-NEXT: retq 168; 169; SSSE3-LABEL: zext_16i8_to_4i32: 170; SSSE3: # %bb.0: # %entry 171; SSSE3-NEXT: pxor %xmm1, %xmm1 172; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 173; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 174; SSSE3-NEXT: retq 175; 176; SSE41-LABEL: zext_16i8_to_4i32: 177; SSE41: # %bb.0: # %entry 178; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 179; SSE41-NEXT: retq 180; 181; AVX-LABEL: zext_16i8_to_4i32: 182; AVX: # %bb.0: # %entry 183; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 184; AVX-NEXT: retq 185entry: 186 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 187 %C = zext <4 x i8> %B to <4 x i32> 188 ret <4 x i32> %C 189} 190 191define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 192; SSE2-LABEL: zext_16i8_to_8i32: 193; SSE2: # %bb.0: # %entry 194; SSE2-NEXT: movdqa %xmm0, %xmm1 195; SSE2-NEXT: pxor %xmm2, %xmm2 196; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 197; SSE2-NEXT: movdqa %xmm1, %xmm0 198; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 199; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 200; SSE2-NEXT: retq 201; 202; SSSE3-LABEL: zext_16i8_to_8i32: 203; SSSE3: # %bb.0: # %entry 204; SSSE3-NEXT: movdqa %xmm0, %xmm1 205; SSSE3-NEXT: pxor %xmm2, %xmm2 206; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 207; SSSE3-NEXT: movdqa %xmm1, %xmm0 208; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 209; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 210; SSSE3-NEXT: retq 211; 212; SSE41-LABEL: zext_16i8_to_8i32: 213; SSE41: # %bb.0: # %entry 214; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 215; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 216; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 217; SSE41-NEXT: movdqa %xmm2, %xmm0 218; SSE41-NEXT: retq 219; 220; AVX1-LABEL: zext_16i8_to_8i32: 221; AVX1: # %bb.0: # %entry 222; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 223; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 224; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 225; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 226; AVX1-NEXT: retq 227; 228; AVX2-LABEL: zext_16i8_to_8i32: 229; AVX2: # %bb.0: # %entry 230; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 231; AVX2-NEXT: retq 232; 233; AVX512-LABEL: zext_16i8_to_8i32: 234; AVX512: # %bb.0: # %entry 235; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 236; AVX512-NEXT: retq 237entry: 238 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 239 %C = zext <8 x i8> %B to <8 x i32> 240 ret <8 x i32> %C 241} 242 243define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 244; SSE2-LABEL: zext_16i8_to_16i32: 245; SSE2: # %bb.0: # %entry 246; SSE2-NEXT: movdqa %xmm0, %xmm3 247; SSE2-NEXT: pxor %xmm4, %xmm4 248; SSE2-NEXT: movdqa %xmm0, %xmm1 249; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 250; SSE2-NEXT: movdqa %xmm1, %xmm0 251; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 252; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 253; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 254; SSE2-NEXT: movdqa %xmm3, %xmm2 255; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 256; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 257; SSE2-NEXT: retq 258; 259; SSSE3-LABEL: zext_16i8_to_16i32: 260; SSSE3: # %bb.0: # %entry 261; SSSE3-NEXT: movdqa %xmm0, %xmm3 262; SSSE3-NEXT: pxor %xmm4, %xmm4 263; SSSE3-NEXT: movdqa %xmm0, %xmm1 264; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 265; SSSE3-NEXT: movdqa %xmm1, %xmm0 266; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 267; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 268; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 269; SSSE3-NEXT: movdqa %xmm3, %xmm2 270; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 271; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 272; SSSE3-NEXT: retq 273; 274; SSE41-LABEL: zext_16i8_to_16i32: 275; SSE41: # %bb.0: # %entry 276; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 277; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 278; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 279; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 280; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 281; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 282; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 283; SSE41-NEXT: movdqa %xmm4, %xmm0 284; SSE41-NEXT: retq 285; 286; AVX1-LABEL: zext_16i8_to_16i32: 287; AVX1: # %bb.0: # %entry 288; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 289; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 290; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 291; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 292; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 293; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 294; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 295; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 296; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 297; AVX1-NEXT: vmovaps %ymm2, %ymm0 298; AVX1-NEXT: retq 299; 300; AVX2-LABEL: zext_16i8_to_16i32: 301; AVX2: # %bb.0: # %entry 302; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 303; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 304; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 305; AVX2-NEXT: vmovdqa %ymm2, %ymm0 306; AVX2-NEXT: retq 307; 308; AVX512-LABEL: zext_16i8_to_16i32: 309; AVX512: # %bb.0: # %entry 310; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 311; AVX512-NEXT: retq 312entry: 313 %B = zext <16 x i8> %A to <16 x i32> 314 ret <16 x i32> %B 315} 316 317define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 318; SSE2-LABEL: zext_16i8_to_2i64: 319; SSE2: # %bb.0: # %entry 320; SSE2-NEXT: pxor %xmm1, %xmm1 321; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 322; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 323; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 324; SSE2-NEXT: retq 325; 326; SSSE3-LABEL: zext_16i8_to_2i64: 327; SSSE3: # %bb.0: # %entry 328; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 329; SSSE3-NEXT: retq 330; 331; SSE41-LABEL: zext_16i8_to_2i64: 332; SSE41: # %bb.0: # %entry 333; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 334; SSE41-NEXT: retq 335; 336; AVX-LABEL: zext_16i8_to_2i64: 337; AVX: # %bb.0: # %entry 338; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 339; AVX-NEXT: retq 340entry: 341 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 342 %C = zext <2 x i8> %B to <2 x i64> 343 ret <2 x i64> %C 344} 345 346define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 347; SSE2-LABEL: zext_16i8_to_4i64: 348; SSE2: # %bb.0: # %entry 349; SSE2-NEXT: movdqa %xmm0, %xmm1 350; SSE2-NEXT: pxor %xmm2, %xmm2 351; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 352; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 353; SSE2-NEXT: movdqa %xmm1, %xmm0 354; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 355; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 356; SSE2-NEXT: retq 357; 358; SSSE3-LABEL: zext_16i8_to_4i64: 359; SSSE3: # %bb.0: # %entry 360; SSSE3-NEXT: movdqa %xmm0, %xmm1 361; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 362; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 363; SSSE3-NEXT: retq 364; 365; SSE41-LABEL: zext_16i8_to_4i64: 366; SSE41: # %bb.0: # %entry 367; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 368; SSE41-NEXT: psrld $16, %xmm0 369; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 370; SSE41-NEXT: movdqa %xmm2, %xmm0 371; SSE41-NEXT: retq 372; 373; AVX1-LABEL: zext_16i8_to_4i64: 374; AVX1: # %bb.0: # %entry 375; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 376; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 377; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 378; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 379; AVX1-NEXT: retq 380; 381; AVX2-LABEL: zext_16i8_to_4i64: 382; AVX2: # %bb.0: # %entry 383; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 384; AVX2-NEXT: retq 385; 386; AVX512-LABEL: zext_16i8_to_4i64: 387; AVX512: # %bb.0: # %entry 388; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 389; AVX512-NEXT: retq 390entry: 391 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 392 %C = zext <4 x i8> %B to <4 x i64> 393 ret <4 x i64> %C 394} 395 396define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 397; SSE2-LABEL: zext_16i8_to_8i64: 398; SSE2: # %bb.0: # %entry 399; SSE2-NEXT: movdqa %xmm0, %xmm3 400; SSE2-NEXT: pxor %xmm4, %xmm4 401; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 402; SSE2-NEXT: movdqa %xmm3, %xmm1 403; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 404; SSE2-NEXT: movdqa %xmm1, %xmm0 405; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 406; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 407; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 408; SSE2-NEXT: movdqa %xmm3, %xmm2 409; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 410; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 411; SSE2-NEXT: retq 412; 413; SSSE3-LABEL: zext_16i8_to_8i64: 414; SSSE3: # %bb.0: # %entry 415; SSSE3-NEXT: movdqa %xmm0, %xmm3 416; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 417; SSSE3-NEXT: movdqa %xmm3, %xmm1 418; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 419; SSSE3-NEXT: movdqa %xmm3, %xmm2 420; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero 421; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero 422; SSSE3-NEXT: retq 423; 424; SSE41-LABEL: zext_16i8_to_8i64: 425; SSE41: # %bb.0: # %entry 426; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 427; SSE41-NEXT: movdqa %xmm0, %xmm1 428; SSE41-NEXT: psrld $16, %xmm1 429; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 430; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 431; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero 432; SSE41-NEXT: psrlq $48, %xmm0 433; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 434; SSE41-NEXT: movdqa %xmm4, %xmm0 435; SSE41-NEXT: retq 436; 437; AVX1-LABEL: zext_16i8_to_8i64: 438; AVX1: # %bb.0: # %entry 439; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 440; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 441; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero 442; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 443; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 444; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 445; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 446; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 447; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 448; AVX1-NEXT: vmovaps %ymm2, %ymm0 449; AVX1-NEXT: retq 450; 451; AVX2-LABEL: zext_16i8_to_8i64: 452; AVX2: # %bb.0: # %entry 453; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 454; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 455; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 456; AVX2-NEXT: vmovdqa %ymm2, %ymm0 457; AVX2-NEXT: retq 458; 459; AVX512-LABEL: zext_16i8_to_8i64: 460; AVX512: # %bb.0: # %entry 461; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 462; AVX512-NEXT: retq 463entry: 464 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 465 %C = zext <8 x i8> %B to <8 x i64> 466 ret <8 x i64> %C 467} 468 469define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 470; SSE2-LABEL: zext_8i16_to_4i32: 471; SSE2: # %bb.0: # %entry 472; SSE2-NEXT: pxor %xmm1, %xmm1 473; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 474; SSE2-NEXT: retq 475; 476; SSSE3-LABEL: zext_8i16_to_4i32: 477; SSSE3: # %bb.0: # %entry 478; SSSE3-NEXT: pxor %xmm1, %xmm1 479; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 480; SSSE3-NEXT: retq 481; 482; SSE41-LABEL: zext_8i16_to_4i32: 483; SSE41: # %bb.0: # %entry 484; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 485; SSE41-NEXT: retq 486; 487; AVX-LABEL: zext_8i16_to_4i32: 488; AVX: # %bb.0: # %entry 489; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 490; AVX-NEXT: retq 491entry: 492 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 493 %C = zext <4 x i16> %B to <4 x i32> 494 ret <4 x i32> %C 495} 496 497define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 498; SSE2-LABEL: zext_8i16_to_8i32: 499; SSE2: # %bb.0: # %entry 500; SSE2-NEXT: movdqa %xmm0, %xmm1 501; SSE2-NEXT: pxor %xmm2, %xmm2 502; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 503; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 504; SSE2-NEXT: retq 505; 506; SSSE3-LABEL: zext_8i16_to_8i32: 507; SSSE3: # %bb.0: # %entry 508; SSSE3-NEXT: movdqa %xmm0, %xmm1 509; SSSE3-NEXT: pxor %xmm2, %xmm2 510; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 511; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 512; SSSE3-NEXT: retq 513; 514; SSE41-LABEL: zext_8i16_to_8i32: 515; SSE41: # %bb.0: # %entry 516; SSE41-NEXT: movdqa %xmm0, %xmm1 517; SSE41-NEXT: pxor %xmm2, %xmm2 518; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 519; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 520; SSE41-NEXT: retq 521; 522; AVX1-LABEL: zext_8i16_to_8i32: 523; AVX1: # %bb.0: # %entry 524; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 525; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 526; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 527; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 528; AVX1-NEXT: retq 529; 530; AVX2-LABEL: zext_8i16_to_8i32: 531; AVX2: # %bb.0: # %entry 532; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 533; AVX2-NEXT: retq 534; 535; AVX512-LABEL: zext_8i16_to_8i32: 536; AVX512: # %bb.0: # %entry 537; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 538; AVX512-NEXT: retq 539entry: 540 %B = zext <8 x i16> %A to <8 x i32> 541 ret <8 x i32>%B 542} 543 544define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 545; SSE2-LABEL: zext_16i16_to_16i32: 546; SSE2: # %bb.0: # %entry 547; SSE2-NEXT: movdqa %xmm1, %xmm3 548; SSE2-NEXT: movdqa %xmm0, %xmm1 549; SSE2-NEXT: pxor %xmm4, %xmm4 550; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 551; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 552; SSE2-NEXT: movdqa %xmm3, %xmm2 553; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 554; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 555; SSE2-NEXT: retq 556; 557; SSSE3-LABEL: zext_16i16_to_16i32: 558; SSSE3: # %bb.0: # %entry 559; SSSE3-NEXT: movdqa %xmm1, %xmm3 560; SSSE3-NEXT: movdqa %xmm0, %xmm1 561; SSSE3-NEXT: pxor %xmm4, %xmm4 562; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 563; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 564; SSSE3-NEXT: movdqa %xmm3, %xmm2 565; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 566; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 567; SSSE3-NEXT: retq 568; 569; SSE41-LABEL: zext_16i16_to_16i32: 570; SSE41: # %bb.0: # %entry 571; SSE41-NEXT: movdqa %xmm1, %xmm3 572; SSE41-NEXT: movdqa %xmm0, %xmm1 573; SSE41-NEXT: pxor %xmm4, %xmm4 574; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 575; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 576; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero 577; SSE41-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 578; SSE41-NEXT: retq 579; 580; AVX1-LABEL: zext_16i16_to_16i32: 581; AVX1: # %bb.0: # %entry 582; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 583; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 584; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 585; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 586; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 587; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 588; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 589; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 590; AVX1-NEXT: vmovaps %ymm2, %ymm0 591; AVX1-NEXT: retq 592; 593; AVX2-LABEL: zext_16i16_to_16i32: 594; AVX2: # %bb.0: # %entry 595; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 596; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 597; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 598; AVX2-NEXT: vmovdqa %ymm2, %ymm0 599; AVX2-NEXT: retq 600; 601; AVX512-LABEL: zext_16i16_to_16i32: 602; AVX512: # %bb.0: # %entry 603; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 604; AVX512-NEXT: retq 605entry: 606 %B = zext <16 x i16> %A to <16 x i32> 607 ret <16 x i32> %B 608} 609 610define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 611; SSE2-LABEL: zext_8i16_to_2i64: 612; SSE2: # %bb.0: # %entry 613; SSE2-NEXT: pxor %xmm1, %xmm1 614; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 615; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 616; SSE2-NEXT: retq 617; 618; SSSE3-LABEL: zext_8i16_to_2i64: 619; SSSE3: # %bb.0: # %entry 620; SSSE3-NEXT: pxor %xmm1, %xmm1 621; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 622; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 623; SSSE3-NEXT: retq 624; 625; SSE41-LABEL: zext_8i16_to_2i64: 626; SSE41: # %bb.0: # %entry 627; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 628; SSE41-NEXT: retq 629; 630; AVX-LABEL: zext_8i16_to_2i64: 631; AVX: # %bb.0: # %entry 632; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 633; AVX-NEXT: retq 634entry: 635 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 636 %C = zext <2 x i16> %B to <2 x i64> 637 ret <2 x i64> %C 638} 639 640define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 641; SSE2-LABEL: zext_8i16_to_4i64: 642; SSE2: # %bb.0: # %entry 643; SSE2-NEXT: movdqa %xmm0, %xmm1 644; SSE2-NEXT: pxor %xmm2, %xmm2 645; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 646; SSE2-NEXT: movdqa %xmm1, %xmm0 647; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 648; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 649; SSE2-NEXT: retq 650; 651; SSSE3-LABEL: zext_8i16_to_4i64: 652; SSSE3: # %bb.0: # %entry 653; SSSE3-NEXT: movdqa %xmm0, %xmm1 654; SSSE3-NEXT: pxor %xmm2, %xmm2 655; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 656; SSSE3-NEXT: movdqa %xmm1, %xmm0 657; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 658; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 659; SSSE3-NEXT: retq 660; 661; SSE41-LABEL: zext_8i16_to_4i64: 662; SSE41: # %bb.0: # %entry 663; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 664; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 665; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 666; SSE41-NEXT: movdqa %xmm2, %xmm0 667; SSE41-NEXT: retq 668; 669; AVX1-LABEL: zext_8i16_to_4i64: 670; AVX1: # %bb.0: # %entry 671; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 672; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 673; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 674; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 675; AVX1-NEXT: retq 676; 677; AVX2-LABEL: zext_8i16_to_4i64: 678; AVX2: # %bb.0: # %entry 679; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 680; AVX2-NEXT: retq 681; 682; AVX512-LABEL: zext_8i16_to_4i64: 683; AVX512: # %bb.0: # %entry 684; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 685; AVX512-NEXT: retq 686entry: 687 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 688 %C = zext <4 x i16> %B to <4 x i64> 689 ret <4 x i64> %C 690} 691 692define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 693; SSE2-LABEL: zext_8i16_to_8i64: 694; SSE2: # %bb.0: # %entry 695; SSE2-NEXT: movdqa %xmm0, %xmm3 696; SSE2-NEXT: pxor %xmm4, %xmm4 697; SSE2-NEXT: movdqa %xmm0, %xmm1 698; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 699; SSE2-NEXT: movdqa %xmm1, %xmm0 700; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 701; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 702; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 703; SSE2-NEXT: movdqa %xmm3, %xmm2 704; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 705; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 706; SSE2-NEXT: retq 707; 708; SSSE3-LABEL: zext_8i16_to_8i64: 709; SSSE3: # %bb.0: # %entry 710; SSSE3-NEXT: movdqa %xmm0, %xmm3 711; SSSE3-NEXT: pxor %xmm4, %xmm4 712; SSSE3-NEXT: movdqa %xmm0, %xmm1 713; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 714; SSSE3-NEXT: movdqa %xmm1, %xmm0 715; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 716; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 717; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 718; SSSE3-NEXT: movdqa %xmm3, %xmm2 719; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 720; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 721; SSSE3-NEXT: retq 722; 723; SSE41-LABEL: zext_8i16_to_8i64: 724; SSE41: # %bb.0: # %entry 725; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 726; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 727; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 728; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 729; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 730; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 731; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 732; SSE41-NEXT: movdqa %xmm4, %xmm0 733; SSE41-NEXT: retq 734; 735; AVX1-LABEL: zext_8i16_to_8i64: 736; AVX1: # %bb.0: # %entry 737; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 738; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 739; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 740; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 741; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 742; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 743; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 744; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 745; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 746; AVX1-NEXT: vmovaps %ymm2, %ymm0 747; AVX1-NEXT: retq 748; 749; AVX2-LABEL: zext_8i16_to_8i64: 750; AVX2: # %bb.0: # %entry 751; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 752; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 753; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 754; AVX2-NEXT: vmovdqa %ymm2, %ymm0 755; AVX2-NEXT: retq 756; 757; AVX512-LABEL: zext_8i16_to_8i64: 758; AVX512: # %bb.0: # %entry 759; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 760; AVX512-NEXT: retq 761entry: 762 %B = zext <8 x i16> %A to <8 x i64> 763 ret <8 x i64> %B 764} 765 766define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 767; SSE2-LABEL: zext_4i32_to_2i64: 768; SSE2: # %bb.0: # %entry 769; SSE2-NEXT: xorps %xmm1, %xmm1 770; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 771; SSE2-NEXT: retq 772; 773; SSSE3-LABEL: zext_4i32_to_2i64: 774; SSSE3: # %bb.0: # %entry 775; SSSE3-NEXT: xorps %xmm1, %xmm1 776; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 777; SSSE3-NEXT: retq 778; 779; SSE41-LABEL: zext_4i32_to_2i64: 780; SSE41: # %bb.0: # %entry 781; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 782; SSE41-NEXT: retq 783; 784; AVX-LABEL: zext_4i32_to_2i64: 785; AVX: # %bb.0: # %entry 786; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 787; AVX-NEXT: retq 788entry: 789 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 790 %C = zext <2 x i32> %B to <2 x i64> 791 ret <2 x i64> %C 792} 793 794define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 795; SSE2-LABEL: zext_4i32_to_4i64: 796; SSE2: # %bb.0: # %entry 797; SSE2-NEXT: movaps %xmm0, %xmm1 798; SSE2-NEXT: xorps %xmm2, %xmm2 799; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 800; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 801; SSE2-NEXT: retq 802; 803; SSSE3-LABEL: zext_4i32_to_4i64: 804; SSSE3: # %bb.0: # %entry 805; SSSE3-NEXT: movaps %xmm0, %xmm1 806; SSSE3-NEXT: xorps %xmm2, %xmm2 807; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 808; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 809; SSSE3-NEXT: retq 810; 811; SSE41-LABEL: zext_4i32_to_4i64: 812; SSE41: # %bb.0: # %entry 813; SSE41-NEXT: movdqa %xmm0, %xmm1 814; SSE41-NEXT: pxor %xmm2, %xmm2 815; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 816; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 817; SSE41-NEXT: retq 818; 819; AVX1-LABEL: zext_4i32_to_4i64: 820; AVX1: # %bb.0: # %entry 821; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 822; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 823; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 824; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 825; AVX1-NEXT: retq 826; 827; AVX2-LABEL: zext_4i32_to_4i64: 828; AVX2: # %bb.0: # %entry 829; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 830; AVX2-NEXT: retq 831; 832; AVX512-LABEL: zext_4i32_to_4i64: 833; AVX512: # %bb.0: # %entry 834; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 835; AVX512-NEXT: retq 836entry: 837 %B = zext <4 x i32> %A to <4 x i64> 838 ret <4 x i64>%B 839} 840 841define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 842; SSE2-LABEL: zext_8i32_to_8i64: 843; SSE2: # %bb.0: # %entry 844; SSE2-NEXT: movaps %xmm1, %xmm3 845; SSE2-NEXT: movaps %xmm0, %xmm1 846; SSE2-NEXT: xorps %xmm4, %xmm4 847; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 848; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 849; SSE2-NEXT: movaps %xmm3, %xmm2 850; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 851; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 852; SSE2-NEXT: retq 853; 854; SSSE3-LABEL: zext_8i32_to_8i64: 855; SSSE3: # %bb.0: # %entry 856; SSSE3-NEXT: movaps %xmm1, %xmm3 857; SSSE3-NEXT: movaps %xmm0, %xmm1 858; SSSE3-NEXT: xorps %xmm4, %xmm4 859; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 860; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 861; SSSE3-NEXT: movaps %xmm3, %xmm2 862; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 863; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 864; SSSE3-NEXT: retq 865; 866; SSE41-LABEL: zext_8i32_to_8i64: 867; SSE41: # %bb.0: # %entry 868; SSE41-NEXT: movdqa %xmm1, %xmm3 869; SSE41-NEXT: movdqa %xmm0, %xmm1 870; SSE41-NEXT: pxor %xmm4, %xmm4 871; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 872; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 873; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero 874; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 875; SSE41-NEXT: retq 876; 877; AVX1-LABEL: zext_8i32_to_8i64: 878; AVX1: # %bb.0: # %entry 879; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 880; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 881; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero 882; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 883; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 884; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 885; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 886; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 887; AVX1-NEXT: vmovaps %ymm2, %ymm0 888; AVX1-NEXT: retq 889; 890; AVX2-LABEL: zext_8i32_to_8i64: 891; AVX2: # %bb.0: # %entry 892; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 893; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 894; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 895; AVX2-NEXT: vmovdqa %ymm2, %ymm0 896; AVX2-NEXT: retq 897; 898; AVX512-LABEL: zext_8i32_to_8i64: 899; AVX512: # %bb.0: # %entry 900; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 901; AVX512-NEXT: retq 902entry: 903 %B = zext <8 x i32> %A to <8 x i64> 904 ret <8 x i64>%B 905} 906 907define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) { 908; SSE2-LABEL: load_zext_2i8_to_2i64: 909; SSE2: # %bb.0: # %entry 910; SSE2-NEXT: movzwl (%rdi), %eax 911; SSE2-NEXT: movd %eax, %xmm0 912; SSE2-NEXT: pxor %xmm1, %xmm1 913; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 914; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 915; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 916; SSE2-NEXT: retq 917; 918; SSSE3-LABEL: load_zext_2i8_to_2i64: 919; SSSE3: # %bb.0: # %entry 920; SSSE3-NEXT: movzwl (%rdi), %eax 921; SSSE3-NEXT: movd %eax, %xmm0 922; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 923; SSSE3-NEXT: retq 924; 925; SSE41-LABEL: load_zext_2i8_to_2i64: 926; SSE41: # %bb.0: # %entry 927; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 928; SSE41-NEXT: retq 929; 930; AVX-LABEL: load_zext_2i8_to_2i64: 931; AVX: # %bb.0: # %entry 932; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 933; AVX-NEXT: retq 934entry: 935 %X = load <2 x i8>, <2 x i8>* %ptr 936 %Y = zext <2 x i8> %X to <2 x i64> 937 ret <2 x i64> %Y 938} 939 940define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) { 941; SSE2-LABEL: load_zext_4i8_to_4i32: 942; SSE2: # %bb.0: # %entry 943; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 944; SSE2-NEXT: pxor %xmm1, %xmm1 945; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 946; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 947; SSE2-NEXT: retq 948; 949; SSSE3-LABEL: load_zext_4i8_to_4i32: 950; SSSE3: # %bb.0: # %entry 951; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 952; SSSE3-NEXT: pxor %xmm1, %xmm1 953; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 954; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 955; SSSE3-NEXT: retq 956; 957; SSE41-LABEL: load_zext_4i8_to_4i32: 958; SSE41: # %bb.0: # %entry 959; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 960; SSE41-NEXT: retq 961; 962; AVX-LABEL: load_zext_4i8_to_4i32: 963; AVX: # %bb.0: # %entry 964; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 965; AVX-NEXT: retq 966entry: 967 %X = load <4 x i8>, <4 x i8>* %ptr 968 %Y = zext <4 x i8> %X to <4 x i32> 969 ret <4 x i32> %Y 970} 971 972define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) { 973; SSE2-LABEL: load_zext_4i8_to_4i64: 974; SSE2: # %bb.0: # %entry 975; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 976; SSE2-NEXT: pxor %xmm2, %xmm2 977; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 978; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 979; SSE2-NEXT: movdqa %xmm1, %xmm0 980; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 981; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 982; SSE2-NEXT: retq 983; 984; SSSE3-LABEL: load_zext_4i8_to_4i64: 985; SSSE3: # %bb.0: # %entry 986; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 987; SSSE3-NEXT: movdqa %xmm1, %xmm0 988; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 989; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 990; SSSE3-NEXT: retq 991; 992; SSE41-LABEL: load_zext_4i8_to_4i64: 993; SSE41: # %bb.0: # %entry 994; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 995; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 996; SSE41-NEXT: retq 997; 998; AVX1-LABEL: load_zext_4i8_to_4i64: 999; AVX1: # %bb.0: # %entry 1000; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1001; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1002; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1003; AVX1-NEXT: retq 1004; 1005; AVX2-LABEL: load_zext_4i8_to_4i64: 1006; AVX2: # %bb.0: # %entry 1007; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1008; AVX2-NEXT: retq 1009; 1010; AVX512-LABEL: load_zext_4i8_to_4i64: 1011; AVX512: # %bb.0: # %entry 1012; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1013; AVX512-NEXT: retq 1014entry: 1015 %X = load <4 x i8>, <4 x i8>* %ptr 1016 %Y = zext <4 x i8> %X to <4 x i64> 1017 ret <4 x i64> %Y 1018} 1019 1020define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) { 1021; SSE2-LABEL: load_zext_8i8_to_8i16: 1022; SSE2: # %bb.0: # %entry 1023; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1024; SSE2-NEXT: pxor %xmm1, %xmm1 1025; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1026; SSE2-NEXT: retq 1027; 1028; SSSE3-LABEL: load_zext_8i8_to_8i16: 1029; SSSE3: # %bb.0: # %entry 1030; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1031; SSSE3-NEXT: pxor %xmm1, %xmm1 1032; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1033; SSSE3-NEXT: retq 1034; 1035; SSE41-LABEL: load_zext_8i8_to_8i16: 1036; SSE41: # %bb.0: # %entry 1037; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1038; SSE41-NEXT: retq 1039; 1040; AVX-LABEL: load_zext_8i8_to_8i16: 1041; AVX: # %bb.0: # %entry 1042; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1043; AVX-NEXT: retq 1044entry: 1045 %X = load <8 x i8>, <8 x i8>* %ptr 1046 %Y = zext <8 x i8> %X to <8 x i16> 1047 ret <8 x i16> %Y 1048} 1049 1050define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) { 1051; SSE2-LABEL: load_zext_8i8_to_8i32: 1052; SSE2: # %bb.0: # %entry 1053; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1054; SSE2-NEXT: pxor %xmm2, %xmm2 1055; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1056; SSE2-NEXT: movdqa %xmm1, %xmm0 1057; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1058; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1059; SSE2-NEXT: retq 1060; 1061; SSSE3-LABEL: load_zext_8i8_to_8i32: 1062; SSSE3: # %bb.0: # %entry 1063; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1064; SSSE3-NEXT: pxor %xmm2, %xmm2 1065; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1066; SSSE3-NEXT: movdqa %xmm1, %xmm0 1067; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1068; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1069; SSSE3-NEXT: retq 1070; 1071; SSE41-LABEL: load_zext_8i8_to_8i32: 1072; SSE41: # %bb.0: # %entry 1073; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1074; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1075; SSE41-NEXT: retq 1076; 1077; AVX1-LABEL: load_zext_8i8_to_8i32: 1078; AVX1: # %bb.0: # %entry 1079; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1080; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1081; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1082; AVX1-NEXT: retq 1083; 1084; AVX2-LABEL: load_zext_8i8_to_8i32: 1085; AVX2: # %bb.0: # %entry 1086; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1087; AVX2-NEXT: retq 1088; 1089; AVX512-LABEL: load_zext_8i8_to_8i32: 1090; AVX512: # %bb.0: # %entry 1091; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1092; AVX512-NEXT: retq 1093entry: 1094 %X = load <8 x i8>, <8 x i8>* %ptr 1095 %Y = zext <8 x i8> %X to <8 x i32> 1096 ret <8 x i32> %Y 1097} 1098 1099define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) { 1100; SSE2-LABEL: load_zext_16i8_to_8i32: 1101; SSE2: # %bb.0: # %entry 1102; SSE2-NEXT: movdqa (%rdi), %xmm1 1103; SSE2-NEXT: pxor %xmm2, %xmm2 1104; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1105; SSE2-NEXT: movdqa %xmm1, %xmm0 1106; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1107; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1108; SSE2-NEXT: retq 1109; 1110; SSSE3-LABEL: load_zext_16i8_to_8i32: 1111; SSSE3: # %bb.0: # %entry 1112; SSSE3-NEXT: movdqa (%rdi), %xmm1 1113; SSSE3-NEXT: pxor %xmm2, %xmm2 1114; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1115; SSSE3-NEXT: movdqa %xmm1, %xmm0 1116; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1117; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1118; SSSE3-NEXT: retq 1119; 1120; SSE41-LABEL: load_zext_16i8_to_8i32: 1121; SSE41: # %bb.0: # %entry 1122; SSE41-NEXT: movdqa (%rdi), %xmm1 1123; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1124; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] 1125; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1126; SSE41-NEXT: retq 1127; 1128; AVX1-LABEL: load_zext_16i8_to_8i32: 1129; AVX1: # %bb.0: # %entry 1130; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1131; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1132; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1133; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1134; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1135; AVX1-NEXT: retq 1136; 1137; AVX2-LABEL: load_zext_16i8_to_8i32: 1138; AVX2: # %bb.0: # %entry 1139; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1140; AVX2-NEXT: retq 1141; 1142; AVX512-LABEL: load_zext_16i8_to_8i32: 1143; AVX512: # %bb.0: # %entry 1144; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1145; AVX512-NEXT: retq 1146entry: 1147 %X = load <16 x i8>, <16 x i8>* %ptr 1148 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1149 %Z = zext <8 x i8> %Y to <8 x i32> 1150 ret <8 x i32> %Z 1151} 1152 1153define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) { 1154; SSE2-LABEL: load_zext_8i8_to_8i64: 1155; SSE2: # %bb.0: # %entry 1156; SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero 1157; SSE2-NEXT: pxor %xmm4, %xmm4 1158; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1159; SSE2-NEXT: movdqa %xmm3, %xmm1 1160; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 1161; SSE2-NEXT: movdqa %xmm1, %xmm0 1162; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 1163; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 1164; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1165; SSE2-NEXT: movdqa %xmm3, %xmm2 1166; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1167; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 1168; SSE2-NEXT: retq 1169; 1170; SSSE3-LABEL: load_zext_8i8_to_8i64: 1171; SSSE3: # %bb.0: # %entry 1172; SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero 1173; SSSE3-NEXT: movdqa %xmm3, %xmm0 1174; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1175; SSSE3-NEXT: movdqa %xmm3, %xmm1 1176; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 1177; SSSE3-NEXT: movdqa %xmm3, %xmm2 1178; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[4],zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero,zero,zero 1179; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[6],zero,zero,zero,zero,zero,zero,zero,xmm3[7],zero,zero,zero,zero,zero,zero,zero 1180; SSSE3-NEXT: retq 1181; 1182; SSE41-LABEL: load_zext_8i8_to_8i64: 1183; SSE41: # %bb.0: # %entry 1184; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1185; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1186; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1187; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1188; SSE41-NEXT: retq 1189; 1190; AVX1-LABEL: load_zext_8i8_to_8i64: 1191; AVX1: # %bb.0: # %entry 1192; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1193; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1194; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1195; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1196; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1197; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1198; AVX1-NEXT: retq 1199; 1200; AVX2-LABEL: load_zext_8i8_to_8i64: 1201; AVX2: # %bb.0: # %entry 1202; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1203; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1204; AVX2-NEXT: retq 1205; 1206; AVX512-LABEL: load_zext_8i8_to_8i64: 1207; AVX512: # %bb.0: # %entry 1208; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 1209; AVX512-NEXT: retq 1210entry: 1211 %X = load <8 x i8>, <8 x i8>* %ptr 1212 %Y = zext <8 x i8> %X to <8 x i64> 1213 ret <8 x i64> %Y 1214} 1215 1216define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) { 1217; SSE2-LABEL: load_zext_16i8_to_16i16: 1218; SSE2: # %bb.0: # %entry 1219; SSE2-NEXT: movdqa (%rdi), %xmm1 1220; SSE2-NEXT: pxor %xmm2, %xmm2 1221; SSE2-NEXT: movdqa %xmm1, %xmm0 1222; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1223; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1224; SSE2-NEXT: retq 1225; 1226; SSSE3-LABEL: load_zext_16i8_to_16i16: 1227; SSSE3: # %bb.0: # %entry 1228; SSSE3-NEXT: movdqa (%rdi), %xmm1 1229; SSSE3-NEXT: pxor %xmm2, %xmm2 1230; SSSE3-NEXT: movdqa %xmm1, %xmm0 1231; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1232; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1233; SSSE3-NEXT: retq 1234; 1235; SSE41-LABEL: load_zext_16i8_to_16i16: 1236; SSE41: # %bb.0: # %entry 1237; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1238; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1239; SSE41-NEXT: retq 1240; 1241; AVX1-LABEL: load_zext_16i8_to_16i16: 1242; AVX1: # %bb.0: # %entry 1243; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1244; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1245; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1246; AVX1-NEXT: retq 1247; 1248; AVX2-LABEL: load_zext_16i8_to_16i16: 1249; AVX2: # %bb.0: # %entry 1250; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1251; AVX2-NEXT: retq 1252; 1253; AVX512-LABEL: load_zext_16i8_to_16i16: 1254; AVX512: # %bb.0: # %entry 1255; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1256; AVX512-NEXT: retq 1257entry: 1258 %X = load <16 x i8>, <16 x i8>* %ptr 1259 %Y = zext <16 x i8> %X to <16 x i16> 1260 ret <16 x i16> %Y 1261} 1262 1263define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) { 1264; SSE2-LABEL: load_zext_2i16_to_2i64: 1265; SSE2: # %bb.0: # %entry 1266; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1267; SSE2-NEXT: pxor %xmm1, %xmm1 1268; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1269; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1270; SSE2-NEXT: retq 1271; 1272; SSSE3-LABEL: load_zext_2i16_to_2i64: 1273; SSSE3: # %bb.0: # %entry 1274; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1275; SSSE3-NEXT: pxor %xmm1, %xmm1 1276; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1277; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1278; SSSE3-NEXT: retq 1279; 1280; SSE41-LABEL: load_zext_2i16_to_2i64: 1281; SSE41: # %bb.0: # %entry 1282; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1283; SSE41-NEXT: retq 1284; 1285; AVX-LABEL: load_zext_2i16_to_2i64: 1286; AVX: # %bb.0: # %entry 1287; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1288; AVX-NEXT: retq 1289entry: 1290 %X = load <2 x i16>, <2 x i16>* %ptr 1291 %Y = zext <2 x i16> %X to <2 x i64> 1292 ret <2 x i64> %Y 1293} 1294 1295define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) { 1296; SSE2-LABEL: load_zext_4i16_to_4i32: 1297; SSE2: # %bb.0: # %entry 1298; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1299; SSE2-NEXT: pxor %xmm1, %xmm1 1300; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1301; SSE2-NEXT: retq 1302; 1303; SSSE3-LABEL: load_zext_4i16_to_4i32: 1304; SSSE3: # %bb.0: # %entry 1305; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1306; SSSE3-NEXT: pxor %xmm1, %xmm1 1307; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1308; SSSE3-NEXT: retq 1309; 1310; SSE41-LABEL: load_zext_4i16_to_4i32: 1311; SSE41: # %bb.0: # %entry 1312; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1313; SSE41-NEXT: retq 1314; 1315; AVX-LABEL: load_zext_4i16_to_4i32: 1316; AVX: # %bb.0: # %entry 1317; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1318; AVX-NEXT: retq 1319entry: 1320 %X = load <4 x i16>, <4 x i16>* %ptr 1321 %Y = zext <4 x i16> %X to <4 x i32> 1322 ret <4 x i32> %Y 1323} 1324 1325define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) { 1326; SSE2-LABEL: load_zext_4i16_to_4i64: 1327; SSE2: # %bb.0: # %entry 1328; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1329; SSE2-NEXT: pxor %xmm2, %xmm2 1330; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1331; SSE2-NEXT: movdqa %xmm1, %xmm0 1332; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1333; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1334; SSE2-NEXT: retq 1335; 1336; SSSE3-LABEL: load_zext_4i16_to_4i64: 1337; SSSE3: # %bb.0: # %entry 1338; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1339; SSSE3-NEXT: pxor %xmm2, %xmm2 1340; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1341; SSSE3-NEXT: movdqa %xmm1, %xmm0 1342; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1343; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1344; SSSE3-NEXT: retq 1345; 1346; SSE41-LABEL: load_zext_4i16_to_4i64: 1347; SSE41: # %bb.0: # %entry 1348; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1349; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1350; SSE41-NEXT: retq 1351; 1352; AVX1-LABEL: load_zext_4i16_to_4i64: 1353; AVX1: # %bb.0: # %entry 1354; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1355; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1356; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1357; AVX1-NEXT: retq 1358; 1359; AVX2-LABEL: load_zext_4i16_to_4i64: 1360; AVX2: # %bb.0: # %entry 1361; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1362; AVX2-NEXT: retq 1363; 1364; AVX512-LABEL: load_zext_4i16_to_4i64: 1365; AVX512: # %bb.0: # %entry 1366; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1367; AVX512-NEXT: retq 1368entry: 1369 %X = load <4 x i16>, <4 x i16>* %ptr 1370 %Y = zext <4 x i16> %X to <4 x i64> 1371 ret <4 x i64> %Y 1372} 1373 1374define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) { 1375; SSE2-LABEL: load_zext_8i16_to_8i32: 1376; SSE2: # %bb.0: # %entry 1377; SSE2-NEXT: movdqa (%rdi), %xmm1 1378; SSE2-NEXT: pxor %xmm2, %xmm2 1379; SSE2-NEXT: movdqa %xmm1, %xmm0 1380; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1381; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1382; SSE2-NEXT: retq 1383; 1384; SSSE3-LABEL: load_zext_8i16_to_8i32: 1385; SSSE3: # %bb.0: # %entry 1386; SSSE3-NEXT: movdqa (%rdi), %xmm1 1387; SSSE3-NEXT: pxor %xmm2, %xmm2 1388; SSSE3-NEXT: movdqa %xmm1, %xmm0 1389; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1390; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1391; SSSE3-NEXT: retq 1392; 1393; SSE41-LABEL: load_zext_8i16_to_8i32: 1394; SSE41: # %bb.0: # %entry 1395; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1396; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1397; SSE41-NEXT: retq 1398; 1399; AVX1-LABEL: load_zext_8i16_to_8i32: 1400; AVX1: # %bb.0: # %entry 1401; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1402; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1403; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1404; AVX1-NEXT: retq 1405; 1406; AVX2-LABEL: load_zext_8i16_to_8i32: 1407; AVX2: # %bb.0: # %entry 1408; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1409; AVX2-NEXT: retq 1410; 1411; AVX512-LABEL: load_zext_8i16_to_8i32: 1412; AVX512: # %bb.0: # %entry 1413; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1414; AVX512-NEXT: retq 1415entry: 1416 %X = load <8 x i16>, <8 x i16>* %ptr 1417 %Y = zext <8 x i16> %X to <8 x i32> 1418 ret <8 x i32> %Y 1419} 1420 1421define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) { 1422; SSE2-LABEL: load_zext_2i32_to_2i64: 1423; SSE2: # %bb.0: # %entry 1424; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1425; SSE2-NEXT: xorps %xmm1, %xmm1 1426; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1427; SSE2-NEXT: retq 1428; 1429; SSSE3-LABEL: load_zext_2i32_to_2i64: 1430; SSSE3: # %bb.0: # %entry 1431; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1432; SSSE3-NEXT: xorps %xmm1, %xmm1 1433; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1434; SSSE3-NEXT: retq 1435; 1436; SSE41-LABEL: load_zext_2i32_to_2i64: 1437; SSE41: # %bb.0: # %entry 1438; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1439; SSE41-NEXT: retq 1440; 1441; AVX-LABEL: load_zext_2i32_to_2i64: 1442; AVX: # %bb.0: # %entry 1443; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1444; AVX-NEXT: retq 1445entry: 1446 %X = load <2 x i32>, <2 x i32>* %ptr 1447 %Y = zext <2 x i32> %X to <2 x i64> 1448 ret <2 x i64> %Y 1449} 1450 1451define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) { 1452; SSE2-LABEL: load_zext_4i32_to_4i64: 1453; SSE2: # %bb.0: # %entry 1454; SSE2-NEXT: movaps (%rdi), %xmm1 1455; SSE2-NEXT: xorps %xmm2, %xmm2 1456; SSE2-NEXT: movaps %xmm1, %xmm0 1457; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1458; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1459; SSE2-NEXT: retq 1460; 1461; SSSE3-LABEL: load_zext_4i32_to_4i64: 1462; SSSE3: # %bb.0: # %entry 1463; SSSE3-NEXT: movaps (%rdi), %xmm1 1464; SSSE3-NEXT: xorps %xmm2, %xmm2 1465; SSSE3-NEXT: movaps %xmm1, %xmm0 1466; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1467; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1468; SSSE3-NEXT: retq 1469; 1470; SSE41-LABEL: load_zext_4i32_to_4i64: 1471; SSE41: # %bb.0: # %entry 1472; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1473; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1474; SSE41-NEXT: retq 1475; 1476; AVX1-LABEL: load_zext_4i32_to_4i64: 1477; AVX1: # %bb.0: # %entry 1478; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1479; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1480; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1481; AVX1-NEXT: retq 1482; 1483; AVX2-LABEL: load_zext_4i32_to_4i64: 1484; AVX2: # %bb.0: # %entry 1485; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1486; AVX2-NEXT: retq 1487; 1488; AVX512-LABEL: load_zext_4i32_to_4i64: 1489; AVX512: # %bb.0: # %entry 1490; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1491; AVX512-NEXT: retq 1492entry: 1493 %X = load <4 x i32>, <4 x i32>* %ptr 1494 %Y = zext <4 x i32> %X to <4 x i64> 1495 ret <4 x i64> %Y 1496} 1497 1498define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { 1499; SSE2-LABEL: zext_8i8_to_8i32: 1500; SSE2: # %bb.0: # %entry 1501; SSE2-NEXT: movdqa %xmm0, %xmm1 1502; SSE2-NEXT: pxor %xmm2, %xmm2 1503; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1504; SSE2-NEXT: movdqa %xmm1, %xmm0 1505; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1506; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1507; SSE2-NEXT: retq 1508; 1509; SSSE3-LABEL: zext_8i8_to_8i32: 1510; SSSE3: # %bb.0: # %entry 1511; SSSE3-NEXT: movdqa %xmm0, %xmm1 1512; SSSE3-NEXT: pxor %xmm2, %xmm2 1513; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1514; SSSE3-NEXT: movdqa %xmm1, %xmm0 1515; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1516; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1517; SSSE3-NEXT: retq 1518; 1519; SSE41-LABEL: zext_8i8_to_8i32: 1520; SSE41: # %bb.0: # %entry 1521; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1522; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1523; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1524; SSE41-NEXT: movdqa %xmm2, %xmm0 1525; SSE41-NEXT: retq 1526; 1527; AVX1-LABEL: zext_8i8_to_8i32: 1528; AVX1: # %bb.0: # %entry 1529; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1530; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1531; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1532; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1533; AVX1-NEXT: retq 1534; 1535; AVX2-LABEL: zext_8i8_to_8i32: 1536; AVX2: # %bb.0: # %entry 1537; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1538; AVX2-NEXT: retq 1539; 1540; AVX512-LABEL: zext_8i8_to_8i32: 1541; AVX512: # %bb.0: # %entry 1542; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1543; AVX512-NEXT: retq 1544entry: 1545 %t = zext <8 x i8> %z to <8 x i32> 1546 ret <8 x i32> %t 1547} 1548 1549define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 1550; SSE2-LABEL: shuf_zext_8i16_to_8i32: 1551; SSE2: # %bb.0: # %entry 1552; SSE2-NEXT: movdqa %xmm0, %xmm1 1553; SSE2-NEXT: pxor %xmm2, %xmm2 1554; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1555; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1556; SSE2-NEXT: retq 1557; 1558; SSSE3-LABEL: shuf_zext_8i16_to_8i32: 1559; SSSE3: # %bb.0: # %entry 1560; SSSE3-NEXT: movdqa %xmm0, %xmm1 1561; SSSE3-NEXT: pxor %xmm2, %xmm2 1562; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1563; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1564; SSSE3-NEXT: retq 1565; 1566; SSE41-LABEL: shuf_zext_8i16_to_8i32: 1567; SSE41: # %bb.0: # %entry 1568; SSE41-NEXT: movdqa %xmm0, %xmm1 1569; SSE41-NEXT: pxor %xmm2, %xmm2 1570; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1571; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1572; SSE41-NEXT: retq 1573; 1574; AVX1-LABEL: shuf_zext_8i16_to_8i32: 1575; AVX1: # %bb.0: # %entry 1576; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1577; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1578; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1579; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1580; AVX1-NEXT: retq 1581; 1582; AVX2-LABEL: shuf_zext_8i16_to_8i32: 1583; AVX2: # %bb.0: # %entry 1584; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1585; AVX2-NEXT: retq 1586; 1587; AVX512-LABEL: shuf_zext_8i16_to_8i32: 1588; AVX512: # %bb.0: # %entry 1589; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1590; AVX512-NEXT: retq 1591entry: 1592 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8> 1593 %Z = bitcast <16 x i16> %B to <8 x i32> 1594 ret <8 x i32> %Z 1595} 1596 1597define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1598; SSE2-LABEL: shuf_zext_4i32_to_4i64: 1599; SSE2: # %bb.0: # %entry 1600; SSE2-NEXT: movaps %xmm0, %xmm1 1601; SSE2-NEXT: xorps %xmm2, %xmm2 1602; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1603; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1604; SSE2-NEXT: retq 1605; 1606; SSSE3-LABEL: shuf_zext_4i32_to_4i64: 1607; SSSE3: # %bb.0: # %entry 1608; SSSE3-NEXT: movaps %xmm0, %xmm1 1609; SSSE3-NEXT: xorps %xmm2, %xmm2 1610; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1611; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1612; SSSE3-NEXT: retq 1613; 1614; SSE41-LABEL: shuf_zext_4i32_to_4i64: 1615; SSE41: # %bb.0: # %entry 1616; SSE41-NEXT: movdqa %xmm0, %xmm1 1617; SSE41-NEXT: pxor %xmm2, %xmm2 1618; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1619; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1620; SSE41-NEXT: retq 1621; 1622; AVX1-LABEL: shuf_zext_4i32_to_4i64: 1623; AVX1: # %bb.0: # %entry 1624; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1625; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1626; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1627; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1628; AVX1-NEXT: retq 1629; 1630; AVX2-LABEL: shuf_zext_4i32_to_4i64: 1631; AVX2: # %bb.0: # %entry 1632; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1633; AVX2-NEXT: retq 1634; 1635; AVX512-LABEL: shuf_zext_4i32_to_4i64: 1636; AVX512: # %bb.0: # %entry 1637; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1638; AVX512-NEXT: retq 1639entry: 1640 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4> 1641 %Z = bitcast <8 x i32> %B to <4 x i64> 1642 ret <4 x i64> %Z 1643} 1644 1645define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) { 1646; SSE2-LABEL: shuf_zext_8i8_to_8i32: 1647; SSE2: # %bb.0: # %entry 1648; SSE2-NEXT: movdqa %xmm0, %xmm1 1649; SSE2-NEXT: pxor %xmm2, %xmm2 1650; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1651; SSE2-NEXT: movdqa %xmm1, %xmm0 1652; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1653; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1654; SSE2-NEXT: retq 1655; 1656; SSSE3-LABEL: shuf_zext_8i8_to_8i32: 1657; SSSE3: # %bb.0: # %entry 1658; SSSE3-NEXT: movdqa %xmm0, %xmm1 1659; SSSE3-NEXT: pxor %xmm2, %xmm2 1660; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1661; SSSE3-NEXT: movdqa %xmm1, %xmm0 1662; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1663; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1664; SSSE3-NEXT: retq 1665; 1666; SSE41-LABEL: shuf_zext_8i8_to_8i32: 1667; SSE41: # %bb.0: # %entry 1668; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1669; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1670; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1671; SSE41-NEXT: movdqa %xmm2, %xmm0 1672; SSE41-NEXT: retq 1673; 1674; AVX1-LABEL: shuf_zext_8i8_to_8i32: 1675; AVX1: # %bb.0: # %entry 1676; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1677; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1678; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1679; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1680; AVX1-NEXT: retq 1681; 1682; AVX2-LABEL: shuf_zext_8i8_to_8i32: 1683; AVX2: # %bb.0: # %entry 1684; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1685; AVX2-NEXT: retq 1686; 1687; AVX512-LABEL: shuf_zext_8i8_to_8i32: 1688; AVX512: # %bb.0: # %entry 1689; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1690; AVX512-NEXT: retq 1691entry: 1692 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8> 1693 %Z = bitcast <32 x i8> %B to <8 x i32> 1694 ret <8 x i32> %Z 1695} 1696 1697define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp { 1698; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6: 1699; SSE2: # %bb.0: # %entry 1700; SSE2-NEXT: pxor %xmm1, %xmm1 1701; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1702; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1703; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1704; SSE2-NEXT: retq 1705; 1706; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6: 1707; SSSE3: # %bb.0: # %entry 1708; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1709; SSSE3-NEXT: retq 1710; 1711; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6: 1712; SSE41: # %bb.0: # %entry 1713; SSE41-NEXT: psrlq $48, %xmm0 1714; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1715; SSE41-NEXT: retq 1716; 1717; AVX1-LABEL: shuf_zext_16i8_to_2i64_offset6: 1718; AVX1: # %bb.0: # %entry 1719; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0 1720; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1721; AVX1-NEXT: retq 1722; 1723; AVX2-SLOW-LABEL: shuf_zext_16i8_to_2i64_offset6: 1724; AVX2-SLOW: # %bb.0: # %entry 1725; AVX2-SLOW-NEXT: vpsrlq $48, %xmm0, %xmm0 1726; AVX2-SLOW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1727; AVX2-SLOW-NEXT: retq 1728; 1729; AVX2-FAST-LABEL: shuf_zext_16i8_to_2i64_offset6: 1730; AVX2-FAST: # %bb.0: # %entry 1731; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1732; AVX2-FAST-NEXT: retq 1733; 1734; AVX512F-LABEL: shuf_zext_16i8_to_2i64_offset6: 1735; AVX512F: # %bb.0: # %entry 1736; AVX512F-NEXT: vpsrlq $48, %xmm0, %xmm0 1737; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1738; AVX512F-NEXT: retq 1739; 1740; AVX512BW-LABEL: shuf_zext_16i8_to_2i64_offset6: 1741; AVX512BW: # %bb.0: # %entry 1742; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1743; AVX512BW-NEXT: retq 1744entry: 1745 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1746 %Z = bitcast <16 x i8> %B to <2 x i64> 1747 ret <2 x i64> %Z 1748} 1749 1750define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp { 1751; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1752; SSE2: # %bb.0: # %entry 1753; SSE2-NEXT: movdqa %xmm0, %xmm1 1754; SSE2-NEXT: psrlq $8, %xmm1 1755; SSE2-NEXT: pxor %xmm2, %xmm2 1756; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1757; SSE2-NEXT: movdqa %xmm1, %xmm0 1758; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1759; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1760; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1761; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1762; SSE2-NEXT: retq 1763; 1764; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11: 1765; SSSE3: # %bb.0: # %entry 1766; SSSE3-NEXT: movdqa %xmm0, %xmm1 1767; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero 1768; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero 1769; SSSE3-NEXT: retq 1770; 1771; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11: 1772; SSE41: # %bb.0: # %entry 1773; SSE41-NEXT: movdqa %xmm0, %xmm1 1774; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1775; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1776; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1777; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1778; SSE41-NEXT: movdqa %xmm2, %xmm0 1779; SSE41-NEXT: retq 1780; 1781; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11: 1782; AVX1: # %bb.0: # %entry 1783; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1784; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1785; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1786; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1787; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1788; AVX1-NEXT: retq 1789; 1790; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1791; AVX2: # %bb.0: # %entry 1792; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1793; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1794; AVX2-NEXT: retq 1795; 1796; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11: 1797; AVX512: # %bb.0: # %entry 1798; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1799; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1800; AVX512-NEXT: retq 1801entry: 1802 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1803 %Z = bitcast <32 x i8> %B to <4 x i64> 1804 ret <4 x i64> %Z 1805} 1806 1807define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp { 1808; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6: 1809; SSE2: # %bb.0: # %entry 1810; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1811; SSE2-NEXT: pxor %xmm1, %xmm1 1812; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1813; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1814; SSE2-NEXT: retq 1815; 1816; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6: 1817; SSSE3: # %bb.0: # %entry 1818; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1819; SSSE3-NEXT: retq 1820; 1821; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6: 1822; SSE41: # %bb.0: # %entry 1823; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1824; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1825; SSE41-NEXT: retq 1826; 1827; AVX1-LABEL: shuf_zext_8i16_to_2i64_offset6: 1828; AVX1: # %bb.0: # %entry 1829; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1830; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1831; AVX1-NEXT: retq 1832; 1833; AVX2-SLOW-LABEL: shuf_zext_8i16_to_2i64_offset6: 1834; AVX2-SLOW: # %bb.0: # %entry 1835; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1836; AVX2-SLOW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1837; AVX2-SLOW-NEXT: retq 1838; 1839; AVX2-FAST-LABEL: shuf_zext_8i16_to_2i64_offset6: 1840; AVX2-FAST: # %bb.0: # %entry 1841; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1842; AVX2-FAST-NEXT: retq 1843; 1844; AVX512F-LABEL: shuf_zext_8i16_to_2i64_offset6: 1845; AVX512F: # %bb.0: # %entry 1846; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1847; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1848; AVX512F-NEXT: retq 1849; 1850; AVX512BW-LABEL: shuf_zext_8i16_to_2i64_offset6: 1851; AVX512BW: # %bb.0: # %entry 1852; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1853; AVX512BW-NEXT: retq 1854entry: 1855 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8> 1856 %Z = bitcast <8 x i16> %B to <2 x i64> 1857 ret <2 x i64> %Z 1858} 1859 1860define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp { 1861; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1862; SSE2: # %bb.0: # %entry 1863; SSE2-NEXT: movdqa %xmm0, %xmm1 1864; SSE2-NEXT: pxor %xmm2, %xmm2 1865; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1866; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1867; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1868; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1869; SSE2-NEXT: retq 1870; 1871; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2: 1872; SSSE3: # %bb.0: # %entry 1873; SSSE3-NEXT: movdqa %xmm0, %xmm1 1874; SSSE3-NEXT: pxor %xmm2, %xmm2 1875; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1876; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1877; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1878; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1879; SSSE3-NEXT: retq 1880; 1881; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2: 1882; SSE41: # %bb.0: # %entry 1883; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1884; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1885; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1886; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1887; SSE41-NEXT: movdqa %xmm2, %xmm0 1888; SSE41-NEXT: retq 1889; 1890; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2: 1891; AVX1: # %bb.0: # %entry 1892; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1893; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1894; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1895; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1896; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1897; AVX1-NEXT: retq 1898; 1899; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1900; AVX2: # %bb.0: # %entry 1901; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1902; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1903; AVX2-NEXT: retq 1904; 1905; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: 1906; AVX512: # %bb.0: # %entry 1907; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1908; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1909; AVX512-NEXT: retq 1910entry: 1911 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8> 1912 %Z = bitcast <16 x i16> %B to <4 x i64> 1913 ret <4 x i64> %Z 1914} 1915 1916define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp { 1917; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1: 1918; SSE2: # %bb.0: # %entry 1919; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1920; SSE2-NEXT: pxor %xmm1, %xmm1 1921; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1922; SSE2-NEXT: retq 1923; 1924; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1: 1925; SSSE3: # %bb.0: # %entry 1926; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1927; SSSE3-NEXT: pxor %xmm1, %xmm1 1928; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1929; SSSE3-NEXT: retq 1930; 1931; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1: 1932; SSE41: # %bb.0: # %entry 1933; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1934; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1935; SSE41-NEXT: retq 1936; 1937; AVX1-LABEL: shuf_zext_8i16_to_4i32_offset1: 1938; AVX1: # %bb.0: # %entry 1939; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1940; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1941; AVX1-NEXT: retq 1942; 1943; AVX2-SLOW-LABEL: shuf_zext_8i16_to_4i32_offset1: 1944; AVX2-SLOW: # %bb.0: # %entry 1945; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1946; AVX2-SLOW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1947; AVX2-SLOW-NEXT: retq 1948; 1949; AVX2-FAST-LABEL: shuf_zext_8i16_to_4i32_offset1: 1950; AVX2-FAST: # %bb.0: # %entry 1951; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero 1952; AVX2-FAST-NEXT: retq 1953; 1954; AVX512F-LABEL: shuf_zext_8i16_to_4i32_offset1: 1955; AVX512F: # %bb.0: # %entry 1956; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1957; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1958; AVX512F-NEXT: retq 1959; 1960; AVX512BW-LABEL: shuf_zext_8i16_to_4i32_offset1: 1961; AVX512BW: # %bb.0: # %entry 1962; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,xmm0[4,5],zero,zero,xmm0[6,7],zero,zero,xmm0[8,9],zero,zero 1963; AVX512BW-NEXT: retq 1964entry: 1965 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8> 1966 %Z = bitcast <8 x i16> %B to <4 x i32> 1967 ret <4 x i32> %Z 1968} 1969 1970define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp { 1971; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1972; SSE2: # %bb.0: # %entry 1973; SSE2-NEXT: movdqa %xmm0, %xmm1 1974; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1975; SSE2-NEXT: pxor %xmm2, %xmm2 1976; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1977; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1978; SSE2-NEXT: retq 1979; 1980; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3: 1981; SSSE3: # %bb.0: # %entry 1982; SSSE3-NEXT: movdqa %xmm0, %xmm1 1983; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1984; SSSE3-NEXT: pxor %xmm2, %xmm2 1985; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1986; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1987; SSSE3-NEXT: retq 1988; 1989; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3: 1990; SSE41: # %bb.0: # %entry 1991; SSE41-NEXT: movdqa %xmm0, %xmm1 1992; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1993; SSE41-NEXT: pxor %xmm2, %xmm2 1994; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1995; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1996; SSE41-NEXT: retq 1997; 1998; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3: 1999; AVX1: # %bb.0: # %entry 2000; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 2001; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2002; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2003; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2004; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2005; AVX1-NEXT: retq 2006; 2007; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3: 2008; AVX2: # %bb.0: # %entry 2009; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 2010; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2011; AVX2-NEXT: retq 2012; 2013; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3: 2014; AVX512: # %bb.0: # %entry 2015; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 2016; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2017; AVX512-NEXT: retq 2018entry: 2019 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8> 2020 %Z = bitcast <16 x i16> %B to <8 x i32> 2021 ret <8 x i32> %Z 2022} 2023 2024define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp { 2025; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8: 2026; SSE2: # %bb.0: # %entry 2027; SSE2-NEXT: pxor %xmm2, %xmm2 2028; SSE2-NEXT: movdqa %xmm1, %xmm0 2029; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2030; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2031; SSE2-NEXT: retq 2032; 2033; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8: 2034; SSSE3: # %bb.0: # %entry 2035; SSSE3-NEXT: pxor %xmm2, %xmm2 2036; SSSE3-NEXT: movdqa %xmm1, %xmm0 2037; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2038; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2039; SSSE3-NEXT: retq 2040; 2041; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8: 2042; SSE41: # %bb.0: # %entry 2043; SSE41-NEXT: pxor %xmm2, %xmm2 2044; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 2045; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2046; SSE41-NEXT: retq 2047; 2048; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8: 2049; AVX1: # %bb.0: # %entry 2050; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2051; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2052; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2053; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2054; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2055; AVX1-NEXT: retq 2056; 2057; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8: 2058; AVX2: # %bb.0: # %entry 2059; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2060; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2061; AVX2-NEXT: retq 2062; 2063; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8: 2064; AVX512: # %bb.0: # %entry 2065; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2066; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2067; AVX512-NEXT: retq 2068entry: 2069 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16> 2070 %Z = bitcast <16 x i16> %B to <8 x i32> 2071 ret <8 x i32> %Z 2072} 2073 2074define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp { 2075; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2: 2076; SSE: # %bb.0: # %entry 2077; SSE-NEXT: xorps %xmm1, %xmm1 2078; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2079; SSE-NEXT: retq 2080; 2081; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2: 2082; AVX: # %bb.0: # %entry 2083; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 2084; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2085; AVX-NEXT: retq 2086entry: 2087 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4> 2088 %Z = bitcast <4 x i32> %B to <2 x i64> 2089 ret <2 x i64> %Z 2090} 2091 2092define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp { 2093; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1: 2094; SSE2: # %bb.0: # %entry 2095; SSE2-NEXT: movdqa %xmm0, %xmm1 2096; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 2097; SSE2-NEXT: pand %xmm1, %xmm0 2098; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2099; SSE2-NEXT: retq 2100; 2101; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1: 2102; SSSE3: # %bb.0: # %entry 2103; SSSE3-NEXT: movdqa %xmm0, %xmm1 2104; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 2105; SSSE3-NEXT: pand %xmm1, %xmm0 2106; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2107; SSSE3-NEXT: retq 2108; 2109; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1: 2110; SSE41: # %bb.0: # %entry 2111; SSE41-NEXT: movdqa %xmm0, %xmm1 2112; SSE41-NEXT: pxor %xmm0, %xmm0 2113; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 2114; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2115; SSE41-NEXT: retq 2116; 2117; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1: 2118; AVX1: # %bb.0: # %entry 2119; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2120; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2121; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2122; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2123; AVX1-NEXT: retq 2124; 2125; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1: 2126; AVX2: # %bb.0: # %entry 2127; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 2128; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2129; AVX2-NEXT: retq 2130; 2131; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1: 2132; AVX512: # %bb.0: # %entry 2133; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 2134; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2135; AVX512-NEXT: retq 2136entry: 2137 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4> 2138 %Z = bitcast <8 x i32> %B to <4 x i64> 2139 ret <4 x i64> %Z 2140} 2141 2142define <32 x i32> @zext_32i8_to_32i32(<32 x i8> %x) { 2143; SSE2-LABEL: zext_32i8_to_32i32: 2144; SSE2: # %bb.0: 2145; SSE2-NEXT: movq %rdi, %rax 2146; SSE2-NEXT: pxor %xmm2, %xmm2 2147; SSE2-NEXT: movdqa %xmm0, %xmm3 2148; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2149; SSE2-NEXT: movdqa %xmm3, %xmm8 2150; SSE2-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3] 2151; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2152; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 2153; SSE2-NEXT: movdqa %xmm0, %xmm5 2154; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] 2155; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2156; SSE2-NEXT: movdqa %xmm1, %xmm6 2157; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2158; SSE2-NEXT: movdqa %xmm6, %xmm7 2159; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3] 2160; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2161; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 2162; SSE2-NEXT: movdqa %xmm1, %xmm4 2163; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2164; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2165; SSE2-NEXT: movdqa %xmm1, 112(%rdi) 2166; SSE2-NEXT: movdqa %xmm4, 96(%rdi) 2167; SSE2-NEXT: movdqa %xmm6, 80(%rdi) 2168; SSE2-NEXT: movdqa %xmm7, 64(%rdi) 2169; SSE2-NEXT: movdqa %xmm0, 48(%rdi) 2170; SSE2-NEXT: movdqa %xmm5, 32(%rdi) 2171; SSE2-NEXT: movdqa %xmm3, 16(%rdi) 2172; SSE2-NEXT: movdqa %xmm8, (%rdi) 2173; SSE2-NEXT: retq 2174; 2175; SSSE3-LABEL: zext_32i8_to_32i32: 2176; SSSE3: # %bb.0: 2177; SSSE3-NEXT: movq %rdi, %rax 2178; SSSE3-NEXT: pxor %xmm2, %xmm2 2179; SSSE3-NEXT: movdqa %xmm0, %xmm3 2180; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2181; SSSE3-NEXT: movdqa %xmm3, %xmm8 2182; SSSE3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm2[0],xmm8[1],xmm2[1],xmm8[2],xmm2[2],xmm8[3],xmm2[3] 2183; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2184; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 2185; SSSE3-NEXT: movdqa %xmm0, %xmm5 2186; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3] 2187; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2188; SSSE3-NEXT: movdqa %xmm1, %xmm6 2189; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2190; SSSE3-NEXT: movdqa %xmm6, %xmm7 2191; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm2[0],xmm7[1],xmm2[1],xmm7[2],xmm2[2],xmm7[3],xmm2[3] 2192; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7] 2193; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 2194; SSSE3-NEXT: movdqa %xmm1, %xmm4 2195; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2196; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2197; SSSE3-NEXT: movdqa %xmm1, 112(%rdi) 2198; SSSE3-NEXT: movdqa %xmm4, 96(%rdi) 2199; SSSE3-NEXT: movdqa %xmm6, 80(%rdi) 2200; SSSE3-NEXT: movdqa %xmm7, 64(%rdi) 2201; SSSE3-NEXT: movdqa %xmm0, 48(%rdi) 2202; SSSE3-NEXT: movdqa %xmm5, 32(%rdi) 2203; SSSE3-NEXT: movdqa %xmm3, 16(%rdi) 2204; SSSE3-NEXT: movdqa %xmm8, (%rdi) 2205; SSSE3-NEXT: retq 2206; 2207; SSE41-LABEL: zext_32i8_to_32i32: 2208; SSE41: # %bb.0: 2209; SSE41-NEXT: movq %rdi, %rax 2210; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2211; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] 2212; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2213; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] 2214; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero 2215; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 2216; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2217; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2218; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,1,1] 2219; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero 2220; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] 2221; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero 2222; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 2223; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2224; SSE41-NEXT: movdqa %xmm1, 112(%rdi) 2225; SSE41-NEXT: movdqa %xmm7, 96(%rdi) 2226; SSE41-NEXT: movdqa %xmm6, 80(%rdi) 2227; SSE41-NEXT: movdqa %xmm5, 64(%rdi) 2228; SSE41-NEXT: movdqa %xmm0, 48(%rdi) 2229; SSE41-NEXT: movdqa %xmm4, 32(%rdi) 2230; SSE41-NEXT: movdqa %xmm3, 16(%rdi) 2231; SSE41-NEXT: movdqa %xmm2, (%rdi) 2232; SSE41-NEXT: retq 2233; 2234; AVX1-LABEL: zext_32i8_to_32i32: 2235; AVX1: # %bb.0: 2236; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2237; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 2238; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2239; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 2240; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2241; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2242; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[1,1,1,1] 2243; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 2244; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 2245; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2246; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2247; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 2248; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2249; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 2250; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 2251; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2252; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 2253; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 2254; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 2255; AVX1-NEXT: vmovaps %ymm4, %ymm0 2256; AVX1-NEXT: retq 2257; 2258; AVX2-LABEL: zext_32i8_to_32i32: 2259; AVX2: # %bb.0: 2260; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2261; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 2262; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero 2263; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2264; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2265; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 2266; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2267; AVX2-NEXT: vmovdqa %ymm4, %ymm0 2268; AVX2-NEXT: retq 2269; 2270; AVX512-LABEL: zext_32i8_to_32i32: 2271; AVX512: # %bb.0: 2272; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2273; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2274; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2275; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0 2276; AVX512-NEXT: retq 2277 %res = zext <32 x i8>%x to <32 x i32> 2278 ret <32 x i32> %res 2279} 2280 2281define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) { 2282; SSE2-LABEL: zext_2i8_to_2i32: 2283; SSE2: # %bb.0: 2284; SSE2-NEXT: movzwl (%rdi), %eax 2285; SSE2-NEXT: movd %eax, %xmm0 2286; SSE2-NEXT: pxor %xmm1, %xmm1 2287; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2288; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2289; SSE2-NEXT: paddd %xmm0, %xmm0 2290; SSE2-NEXT: retq 2291; 2292; SSSE3-LABEL: zext_2i8_to_2i32: 2293; SSSE3: # %bb.0: 2294; SSSE3-NEXT: movzwl (%rdi), %eax 2295; SSSE3-NEXT: movd %eax, %xmm0 2296; SSSE3-NEXT: pxor %xmm1, %xmm1 2297; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2298; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2299; SSSE3-NEXT: paddd %xmm0, %xmm0 2300; SSSE3-NEXT: retq 2301; 2302; SSE41-LABEL: zext_2i8_to_2i32: 2303; SSE41: # %bb.0: 2304; SSE41-NEXT: movzwl (%rdi), %eax 2305; SSE41-NEXT: movd %eax, %xmm0 2306; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2307; SSE41-NEXT: paddd %xmm0, %xmm0 2308; SSE41-NEXT: retq 2309; 2310; AVX-LABEL: zext_2i8_to_2i32: 2311; AVX: # %bb.0: 2312; AVX-NEXT: movzwl (%rdi), %eax 2313; AVX-NEXT: vmovd %eax, %xmm0 2314; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2315; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 2316; AVX-NEXT: retq 2317 %x = load <2 x i8>, <2 x i8>* %addr, align 1 2318 %y = zext <2 x i8> %x to <2 x i32> 2319 %z = add <2 x i32>%y, %y 2320 ret <2 x i32>%z 2321} 2322 2323define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) { 2324; SSE2-LABEL: zext_4i17_to_4i32: 2325; SSE2: # %bb.0: 2326; SSE2-NEXT: movq (%rdi), %rax 2327; SSE2-NEXT: movd %eax, %xmm0 2328; SSE2-NEXT: movq %rax, %rcx 2329; SSE2-NEXT: shrq $17, %rcx 2330; SSE2-NEXT: movd %ecx, %xmm1 2331; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2332; SSE2-NEXT: movl 8(%rdi), %ecx 2333; SSE2-NEXT: shll $13, %ecx 2334; SSE2-NEXT: movq %rax, %rdx 2335; SSE2-NEXT: shrq $51, %rdx 2336; SSE2-NEXT: orl %ecx, %edx 2337; SSE2-NEXT: movd %edx, %xmm1 2338; SSE2-NEXT: shrq $34, %rax 2339; SSE2-NEXT: movd %eax, %xmm2 2340; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2341; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2342; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 2343; SSE2-NEXT: retq 2344; 2345; SSSE3-LABEL: zext_4i17_to_4i32: 2346; SSSE3: # %bb.0: 2347; SSSE3-NEXT: movq (%rdi), %rax 2348; SSSE3-NEXT: movd %eax, %xmm0 2349; SSSE3-NEXT: movq %rax, %rcx 2350; SSSE3-NEXT: shrq $17, %rcx 2351; SSSE3-NEXT: movd %ecx, %xmm1 2352; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2353; SSSE3-NEXT: movl 8(%rdi), %ecx 2354; SSSE3-NEXT: shll $13, %ecx 2355; SSSE3-NEXT: movq %rax, %rdx 2356; SSSE3-NEXT: shrq $51, %rdx 2357; SSSE3-NEXT: orl %ecx, %edx 2358; SSSE3-NEXT: movd %edx, %xmm1 2359; SSSE3-NEXT: shrq $34, %rax 2360; SSSE3-NEXT: movd %eax, %xmm2 2361; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2362; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2363; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 2364; SSSE3-NEXT: retq 2365; 2366; SSE41-LABEL: zext_4i17_to_4i32: 2367; SSE41: # %bb.0: 2368; SSE41-NEXT: movl 8(%rdi), %eax 2369; SSE41-NEXT: shll $13, %eax 2370; SSE41-NEXT: movq (%rdi), %rcx 2371; SSE41-NEXT: movq %rcx, %rdx 2372; SSE41-NEXT: shrq $51, %rdx 2373; SSE41-NEXT: orl %eax, %edx 2374; SSE41-NEXT: movq %rcx, %rax 2375; SSE41-NEXT: shrq $17, %rax 2376; SSE41-NEXT: movd %ecx, %xmm0 2377; SSE41-NEXT: pinsrd $1, %eax, %xmm0 2378; SSE41-NEXT: shrq $34, %rcx 2379; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 2380; SSE41-NEXT: pinsrd $3, %edx, %xmm0 2381; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 2382; SSE41-NEXT: retq 2383; 2384; AVX1-LABEL: zext_4i17_to_4i32: 2385; AVX1: # %bb.0: 2386; AVX1-NEXT: movl 8(%rdi), %eax 2387; AVX1-NEXT: shll $13, %eax 2388; AVX1-NEXT: movq (%rdi), %rcx 2389; AVX1-NEXT: movq %rcx, %rdx 2390; AVX1-NEXT: shrq $51, %rdx 2391; AVX1-NEXT: orl %eax, %edx 2392; AVX1-NEXT: movq %rcx, %rax 2393; AVX1-NEXT: shrq $17, %rax 2394; AVX1-NEXT: vmovd %ecx, %xmm0 2395; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 2396; AVX1-NEXT: shrq $34, %rcx 2397; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2398; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 2399; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 2400; AVX1-NEXT: retq 2401; 2402; AVX2-LABEL: zext_4i17_to_4i32: 2403; AVX2: # %bb.0: 2404; AVX2-NEXT: movl 8(%rdi), %eax 2405; AVX2-NEXT: shll $13, %eax 2406; AVX2-NEXT: movq (%rdi), %rcx 2407; AVX2-NEXT: movq %rcx, %rdx 2408; AVX2-NEXT: shrq $51, %rdx 2409; AVX2-NEXT: orl %eax, %edx 2410; AVX2-NEXT: movq %rcx, %rax 2411; AVX2-NEXT: shrq $17, %rax 2412; AVX2-NEXT: vmovd %ecx, %xmm0 2413; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 2414; AVX2-NEXT: shrq $34, %rcx 2415; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2416; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 2417; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071] 2418; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2419; AVX2-NEXT: retq 2420; 2421; AVX512-LABEL: zext_4i17_to_4i32: 2422; AVX512: # %bb.0: 2423; AVX512-NEXT: movl 8(%rdi), %eax 2424; AVX512-NEXT: shll $13, %eax 2425; AVX512-NEXT: movq (%rdi), %rcx 2426; AVX512-NEXT: movq %rcx, %rdx 2427; AVX512-NEXT: shrq $51, %rdx 2428; AVX512-NEXT: orl %eax, %edx 2429; AVX512-NEXT: movq %rcx, %rax 2430; AVX512-NEXT: shrq $17, %rax 2431; AVX512-NEXT: vmovd %ecx, %xmm0 2432; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 2433; AVX512-NEXT: shrq $34, %rcx 2434; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2435; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 2436; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071] 2437; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 2438; AVX512-NEXT: retq 2439 %a = load <4 x i17>, <4 x i17>* %ptr 2440 %b = zext <4 x i17> %a to <4 x i32> 2441 ret <4 x i32> %b 2442} 2443 2444define <8 x i64> @zext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { 2445; SSE2-LABEL: zext_8i6_to_8i64: 2446; SSE2: # %bb.0: # %entry 2447; SSE2-NEXT: movd %edi, %xmm0 2448; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2449; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 2450; SSE2-NEXT: paddw {{.*}}(%rip), %xmm3 2451; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 2452; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2453; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,63] 2454; SSE2-NEXT: pand %xmm4, %xmm0 2455; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 2456; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 2457; SSE2-NEXT: pand %xmm4, %xmm1 2458; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 2459; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 2460; SSE2-NEXT: pand %xmm4, %xmm2 2461; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 2462; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 2463; SSE2-NEXT: pand %xmm4, %xmm3 2464; SSE2-NEXT: retq 2465; 2466; SSSE3-LABEL: zext_8i6_to_8i64: 2467; SSSE3: # %bb.0: # %entry 2468; SSSE3-NEXT: movd %edi, %xmm0 2469; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2470; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 2471; SSSE3-NEXT: paddw {{.*}}(%rip), %xmm3 2472; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 2473; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 2474; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [63,63] 2475; SSSE3-NEXT: pand %xmm4, %xmm0 2476; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 2477; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 2478; SSSE3-NEXT: pand %xmm4, %xmm1 2479; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 2480; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 2481; SSSE3-NEXT: pand %xmm4, %xmm2 2482; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 2483; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 2484; SSSE3-NEXT: pand %xmm4, %xmm3 2485; SSSE3-NEXT: retq 2486; 2487; SSE41-LABEL: zext_8i6_to_8i64: 2488; SSE41: # %bb.0: # %entry 2489; SSE41-NEXT: movd %edi, %xmm0 2490; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2491; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 2492; SSE41-NEXT: paddw {{.*}}(%rip), %xmm3 2493; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 2494; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [63,63] 2495; SSE41-NEXT: pand %xmm4, %xmm0 2496; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 2497; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 2498; SSE41-NEXT: pand %xmm4, %xmm1 2499; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] 2500; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 2501; SSE41-NEXT: pand %xmm4, %xmm2 2502; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 2503; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 2504; SSE41-NEXT: pand %xmm4, %xmm3 2505; SSE41-NEXT: retq 2506; 2507; AVX1-LABEL: zext_8i6_to_8i64: 2508; AVX1: # %bb.0: # %entry 2509; AVX1-NEXT: vmovd %edi, %xmm0 2510; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2511; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2512; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 2513; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 2514; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 2515; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 2516; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 2517; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2518; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 2519; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 2520; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 2521; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 2522; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2523; AVX1-NEXT: retq 2524; 2525; AVX2-LABEL: zext_8i6_to_8i64: 2526; AVX2: # %bb.0: # %entry 2527; AVX2-NEXT: vmovd %edi, %xmm0 2528; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 2529; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 2530; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 2531; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2532; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 2533; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 2534; AVX2-NEXT: retq 2535; 2536; AVX512-LABEL: zext_8i6_to_8i64: 2537; AVX512: # %bb.0: # %entry 2538; AVX512-NEXT: vmovd %edi, %xmm0 2539; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 2540; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 2541; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2542; AVX512-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 2543; AVX512-NEXT: retq 2544entry: 2545 %a = trunc i32 %x to i6 2546 %b = insertelement <8 x i6> undef, i6 %a, i32 0 2547 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer 2548 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7> 2549 %e = zext <8 x i6> %d to <8 x i64> 2550 ret <8 x i64> %e 2551} 2552 2553define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) { 2554; SSE2-LABEL: splatshuf_zext_v4i64: 2555; SSE2: # %bb.0: 2556; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2557; SSE2-NEXT: pxor %xmm1, %xmm1 2558; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2559; SSE2-NEXT: movdqa %xmm0, %xmm1 2560; SSE2-NEXT: retq 2561; 2562; SSSE3-LABEL: splatshuf_zext_v4i64: 2563; SSSE3: # %bb.0: 2564; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2565; SSSE3-NEXT: pxor %xmm1, %xmm1 2566; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2567; SSSE3-NEXT: movdqa %xmm0, %xmm1 2568; SSSE3-NEXT: retq 2569; 2570; SSE41-LABEL: splatshuf_zext_v4i64: 2571; SSE41: # %bb.0: 2572; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2573; SSE41-NEXT: pxor %xmm2, %xmm2 2574; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 2575; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2576; SSE41-NEXT: retq 2577; 2578; AVX1-LABEL: splatshuf_zext_v4i64: 2579; AVX1: # %bb.0: 2580; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2581; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 2582; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2583; AVX1-NEXT: retq 2584; 2585; AVX2-LABEL: splatshuf_zext_v4i64: 2586; AVX2: # %bb.0: 2587; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 2588; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2589; AVX2-NEXT: retq 2590; 2591; AVX512-LABEL: splatshuf_zext_v4i64: 2592; AVX512: # %bb.0: 2593; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0 2594; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2595; AVX512-NEXT: retq 2596 %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer 2597 %ext = zext <4 x i32> %shuf to <4 x i64> 2598 ret <4 x i64> %ext 2599} 2600 2601define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) { 2602; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs: 2603; SSE2: # %bb.0: 2604; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 2605; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7] 2606; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 2607; SSE2-NEXT: movdqa %xmm0, %xmm1 2608; SSE2-NEXT: retq 2609; 2610; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs: 2611; SSSE3: # %bb.0: 2612; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2613; SSSE3-NEXT: movdqa %xmm0, %xmm1 2614; SSSE3-NEXT: retq 2615; 2616; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs: 2617; SSE41: # %bb.0: 2618; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2619; SSE41-NEXT: movdqa %xmm0, %xmm1 2620; SSE41-NEXT: retq 2621; 2622; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs: 2623; AVX1: # %bb.0: 2624; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[6,7],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2625; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2626; AVX1-NEXT: retq 2627; 2628; AVX2-LABEL: splatshuf_zext_v8i32_matching_undefs: 2629; AVX2: # %bb.0: 2630; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15] 2631; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2632; AVX2-NEXT: retq 2633; 2634; AVX512-LABEL: splatshuf_zext_v8i32_matching_undefs: 2635; AVX512: # %bb.0: 2636; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15] 2637; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2638; AVX512-NEXT: retq 2639 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7> 2640 %ext = zext <8 x i16> %shuf to <8 x i32> 2641 ret <8 x i32> %ext 2642} 2643 2644define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) { 2645; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2646; SSE2: # %bb.0: 2647; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 2648; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7] 2649; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2650; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7] 2651; SSE2-NEXT: pxor %xmm1, %xmm1 2652; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2653; SSE2-NEXT: movdqa %xmm0, %xmm1 2654; SSE2-NEXT: retq 2655; 2656; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2657; SSSE3: # %bb.0: 2658; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2659; SSSE3-NEXT: movdqa %xmm0, %xmm1 2660; SSSE3-NEXT: retq 2661; 2662; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2663; SSE41: # %bb.0: 2664; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero 2665; SSE41-NEXT: movdqa %xmm0, %xmm1 2666; SSE41-NEXT: retq 2667; 2668; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2669; AVX1: # %bb.0: 2670; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] 2671; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2672; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2673; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2674; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2675; AVX1-NEXT: retq 2676; 2677; AVX2-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2678; AVX2: # %bb.0: 2679; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] 2680; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2681; AVX2-NEXT: retq 2682; 2683; AVX512-LABEL: splatshuf_zext_v8i32_unmatched_undef: 2684; AVX512: # %bb.0: 2685; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] 2686; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2687; AVX512-NEXT: retq 2688 %shuf = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 7, i32 0, i32 undef, i32 3, i32 7> 2689 %ext = zext <8 x i16> %shuf to <8 x i32> 2690 ret <8 x i32> %ext 2691} 2692 2693define <16 x i16> @splatshuf_zext_v16i16(<16 x i8> %x) { 2694; SSE2-LABEL: splatshuf_zext_v16i16: 2695; SSE2: # %bb.0: 2696; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2697; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6] 2698; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 2699; SSE2-NEXT: pxor %xmm1, %xmm1 2700; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2701; SSE2-NEXT: movdqa %xmm0, %xmm1 2702; SSE2-NEXT: retq 2703; 2704; SSSE3-LABEL: splatshuf_zext_v16i16: 2705; SSSE3: # %bb.0: 2706; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero 2707; SSSE3-NEXT: movdqa %xmm0, %xmm1 2708; SSSE3-NEXT: retq 2709; 2710; SSE41-LABEL: splatshuf_zext_v16i16: 2711; SSE41: # %bb.0: 2712; SSE41-NEXT: movdqa %xmm0, %xmm1 2713; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] 2714; SSE41-NEXT: pxor %xmm2, %xmm2 2715; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2716; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 2717; SSE41-NEXT: retq 2718; 2719; AVX1-LABEL: splatshuf_zext_v16i16: 2720; AVX1: # %bb.0: 2721; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero,xmm0[14],zero 2722; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2723; AVX1-NEXT: retq 2724; 2725; AVX2-LABEL: splatshuf_zext_v16i16: 2726; AVX2: # %bb.0: 2727; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] 2728; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2729; AVX2-NEXT: retq 2730; 2731; AVX512-LABEL: splatshuf_zext_v16i16: 2732; AVX512: # %bb.0: 2733; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] 2734; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 2735; AVX512-NEXT: retq 2736 %shuf = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14> 2737 %ext = zext <16 x i8> %shuf to <16 x i16> 2738 ret <16 x i16> %ext 2739} 2740