1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7 8define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 9; SSE2-LABEL: zext_16i8_to_8i16: 10; SSE2: # BB#0: # %entry 11; SSE2-NEXT: pxor %xmm1, %xmm1 12; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 13; SSE2-NEXT: retq 14; 15; SSSE3-LABEL: zext_16i8_to_8i16: 16; SSSE3: # BB#0: # %entry 17; SSSE3-NEXT: pxor %xmm1, %xmm1 18; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 19; SSSE3-NEXT: retq 20; 21; SSE41-LABEL: zext_16i8_to_8i16: 22; SSE41: # BB#0: # %entry 23; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 24; SSE41-NEXT: retq 25; 26; AVX-LABEL: zext_16i8_to_8i16: 27; AVX: # BB#0: # %entry 28; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 29; AVX-NEXT: retq 30entry: 31 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 32 %C = zext <8 x i8> %B to <8 x i16> 33 ret <8 x i16> %C 34} 35 36; PR17654 37define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) { 38; SSE2-LABEL: zext_16i8_to_16i16: 39; SSE2: # BB#0: # %entry 40; SSE2-NEXT: movdqa %xmm0, %xmm1 41; SSE2-NEXT: pxor %xmm2, %xmm2 42; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 43; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 44; SSE2-NEXT: retq 45; 46; SSSE3-LABEL: zext_16i8_to_16i16: 47; SSSE3: # BB#0: # %entry 48; SSSE3-NEXT: movdqa %xmm0, %xmm1 49; SSSE3-NEXT: pxor %xmm2, %xmm2 50; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 51; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 52; SSSE3-NEXT: retq 53; 54; SSE41-LABEL: zext_16i8_to_16i16: 55; SSE41: # BB#0: # %entry 56; SSE41-NEXT: movdqa %xmm0, %xmm1 57; SSE41-NEXT: pxor %xmm2, %xmm2 58; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 59; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 60; SSE41-NEXT: retq 61; 62; AVX1-LABEL: zext_16i8_to_16i16: 63; AVX1: # BB#0: # %entry 64; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 65; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 66; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 67; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 68; AVX1-NEXT: retq 69; 70; AVX2-LABEL: zext_16i8_to_16i16: 71; AVX2: # BB#0: # %entry 72; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 73; AVX2-NEXT: retq 74entry: 75 %B = zext <16 x i8> %A to <16 x i16> 76 ret <16 x i16> %B 77} 78 79define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 80; SSE2-LABEL: zext_16i8_to_4i32: 81; SSE2: # BB#0: # %entry 82; SSE2-NEXT: pxor %xmm1, %xmm1 83; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 84; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 85; SSE2-NEXT: retq 86; 87; SSSE3-LABEL: zext_16i8_to_4i32: 88; SSSE3: # BB#0: # %entry 89; SSSE3-NEXT: pxor %xmm1, %xmm1 90; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 91; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 92; SSSE3-NEXT: retq 93; 94; SSE41-LABEL: zext_16i8_to_4i32: 95; SSE41: # BB#0: # %entry 96; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 97; SSE41-NEXT: retq 98; 99; AVX-LABEL: zext_16i8_to_4i32: 100; AVX: # BB#0: # %entry 101; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 102; AVX-NEXT: retq 103entry: 104 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 105 %C = zext <4 x i8> %B to <4 x i32> 106 ret <4 x i32> %C 107} 108 109define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 110; SSE2-LABEL: zext_16i8_to_8i32: 111; SSE2: # BB#0: # %entry 112; SSE2-NEXT: movdqa %xmm0, %xmm1 113; SSE2-NEXT: pxor %xmm2, %xmm2 114; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 115; SSE2-NEXT: movdqa %xmm1, %xmm0 116; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 117; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 118; SSE2-NEXT: retq 119; 120; SSSE3-LABEL: zext_16i8_to_8i32: 121; SSSE3: # BB#0: # %entry 122; SSSE3-NEXT: movdqa %xmm0, %xmm1 123; SSSE3-NEXT: pxor %xmm2, %xmm2 124; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 125; SSSE3-NEXT: movdqa %xmm1, %xmm0 126; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 127; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 128; SSSE3-NEXT: retq 129; 130; SSE41-LABEL: zext_16i8_to_8i32: 131; SSE41: # BB#0: # %entry 132; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 133; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 134; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 135; SSE41-NEXT: movdqa %xmm2, %xmm0 136; SSE41-NEXT: retq 137; 138; AVX1-LABEL: zext_16i8_to_8i32: 139; AVX1: # BB#0: # %entry 140; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 141; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 142; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 143; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 144; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 145; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 146; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 147; AVX1-NEXT: retq 148; 149; AVX2-LABEL: zext_16i8_to_8i32: 150; AVX2: # BB#0: # %entry 151; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 152; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 153; AVX2-NEXT: retq 154entry: 155 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 156 %C = zext <8 x i8> %B to <8 x i32> 157 ret <8 x i32> %C 158} 159 160define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 161; SSE2-LABEL: zext_16i8_to_2i64: 162; SSE2: # BB#0: # %entry 163; SSE2-NEXT: pxor %xmm1, %xmm1 164; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 165; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 166; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 167; SSE2-NEXT: retq 168; 169; SSSE3-LABEL: zext_16i8_to_2i64: 170; SSSE3: # BB#0: # %entry 171; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 172; SSSE3-NEXT: retq 173; 174; SSE41-LABEL: zext_16i8_to_2i64: 175; SSE41: # BB#0: # %entry 176; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 177; SSE41-NEXT: retq 178; 179; AVX-LABEL: zext_16i8_to_2i64: 180; AVX: # BB#0: # %entry 181; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 182; AVX-NEXT: retq 183entry: 184 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 185 %C = zext <2 x i8> %B to <2 x i64> 186 ret <2 x i64> %C 187} 188 189define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 190; SSE2-LABEL: zext_16i8_to_4i64: 191; SSE2: # BB#0: # %entry 192; SSE2-NEXT: movdqa %xmm0, %xmm1 193; SSE2-NEXT: pxor %xmm2, %xmm2 194; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 195; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 196; SSE2-NEXT: movdqa %xmm1, %xmm0 197; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 198; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 199; SSE2-NEXT: retq 200; 201; SSSE3-LABEL: zext_16i8_to_4i64: 202; SSSE3: # BB#0: # %entry 203; SSSE3-NEXT: movdqa %xmm0, %xmm1 204; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 205; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 206; SSSE3-NEXT: retq 207; 208; SSE41-LABEL: zext_16i8_to_4i64: 209; SSE41: # BB#0: # %entry 210; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 211; SSE41-NEXT: psrld $16, %xmm0 212; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 213; SSE41-NEXT: movdqa %xmm2, %xmm0 214; SSE41-NEXT: retq 215; 216; AVX1-LABEL: zext_16i8_to_4i64: 217; AVX1: # BB#0: # %entry 218; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 219; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 220; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 221; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 222; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 223; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 224; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 225; AVX1-NEXT: retq 226; 227; AVX2-LABEL: zext_16i8_to_4i64: 228; AVX2: # BB#0: # %entry 229; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 230; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 231; AVX2-NEXT: retq 232entry: 233 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 234 %C = zext <4 x i8> %B to <4 x i64> 235 ret <4 x i64> %C 236} 237 238define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 239; SSE2-LABEL: zext_8i16_to_4i32: 240; SSE2: # BB#0: # %entry 241; SSE2-NEXT: pxor %xmm1, %xmm1 242; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 243; SSE2-NEXT: retq 244; 245; SSSE3-LABEL: zext_8i16_to_4i32: 246; SSSE3: # BB#0: # %entry 247; SSSE3-NEXT: pxor %xmm1, %xmm1 248; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 249; SSSE3-NEXT: retq 250; 251; SSE41-LABEL: zext_8i16_to_4i32: 252; SSE41: # BB#0: # %entry 253; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 254; SSE41-NEXT: retq 255; 256; AVX-LABEL: zext_8i16_to_4i32: 257; AVX: # BB#0: # %entry 258; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 259; AVX-NEXT: retq 260entry: 261 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 262 %C = zext <4 x i16> %B to <4 x i32> 263 ret <4 x i32> %C 264} 265 266define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 267; SSE2-LABEL: zext_8i16_to_8i32: 268; SSE2: # BB#0: # %entry 269; SSE2-NEXT: movdqa %xmm0, %xmm1 270; SSE2-NEXT: pxor %xmm2, %xmm2 271; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 272; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 273; SSE2-NEXT: retq 274; 275; SSSE3-LABEL: zext_8i16_to_8i32: 276; SSSE3: # BB#0: # %entry 277; SSSE3-NEXT: movdqa %xmm0, %xmm1 278; SSSE3-NEXT: pxor %xmm2, %xmm2 279; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 280; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 281; SSSE3-NEXT: retq 282; 283; SSE41-LABEL: zext_8i16_to_8i32: 284; SSE41: # BB#0: # %entry 285; SSE41-NEXT: movdqa %xmm0, %xmm1 286; SSE41-NEXT: pxor %xmm2, %xmm2 287; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 288; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 289; SSE41-NEXT: retq 290; 291; AVX1-LABEL: zext_8i16_to_8i32: 292; AVX1: # BB#0: # %entry 293; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 294; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 295; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 296; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 297; AVX1-NEXT: retq 298; 299; AVX2-LABEL: zext_8i16_to_8i32: 300; AVX2: # BB#0: # %entry 301; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 302; AVX2-NEXT: retq 303entry: 304 %B = zext <8 x i16> %A to <8 x i32> 305 ret <8 x i32>%B 306} 307 308define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 309; SSE2-LABEL: zext_8i16_to_2i64: 310; SSE2: # BB#0: # %entry 311; SSE2-NEXT: pxor %xmm1, %xmm1 312; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 313; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 314; SSE2-NEXT: retq 315; 316; SSSE3-LABEL: zext_8i16_to_2i64: 317; SSSE3: # BB#0: # %entry 318; SSSE3-NEXT: pxor %xmm1, %xmm1 319; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 320; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 321; SSSE3-NEXT: retq 322; 323; SSE41-LABEL: zext_8i16_to_2i64: 324; SSE41: # BB#0: # %entry 325; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 326; SSE41-NEXT: retq 327; 328; AVX-LABEL: zext_8i16_to_2i64: 329; AVX: # BB#0: # %entry 330; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 331; AVX-NEXT: retq 332entry: 333 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 334 %C = zext <2 x i16> %B to <2 x i64> 335 ret <2 x i64> %C 336} 337 338define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 339; SSE2-LABEL: zext_8i16_to_4i64: 340; SSE2: # BB#0: # %entry 341; SSE2-NEXT: movdqa %xmm0, %xmm1 342; SSE2-NEXT: pxor %xmm2, %xmm2 343; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 344; SSE2-NEXT: movdqa %xmm1, %xmm0 345; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 346; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 347; SSE2-NEXT: retq 348; 349; SSSE3-LABEL: zext_8i16_to_4i64: 350; SSSE3: # BB#0: # %entry 351; SSSE3-NEXT: movdqa %xmm0, %xmm1 352; SSSE3-NEXT: pxor %xmm2, %xmm2 353; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 354; SSSE3-NEXT: movdqa %xmm1, %xmm0 355; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 356; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 357; SSSE3-NEXT: retq 358; 359; SSE41-LABEL: zext_8i16_to_4i64: 360; SSE41: # BB#0: # %entry 361; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 362; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 363; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 364; SSE41-NEXT: movdqa %xmm2, %xmm0 365; SSE41-NEXT: retq 366; 367; AVX1-LABEL: zext_8i16_to_4i64: 368; AVX1: # BB#0: # %entry 369; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 370; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 371; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 372; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] 373; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 374; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] 375; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 376; AVX1-NEXT: retq 377; 378; AVX2-LABEL: zext_8i16_to_4i64: 379; AVX2: # BB#0: # %entry 380; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 381; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 382; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] 383; AVX2-NEXT: retq 384entry: 385 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 386 %C = zext <4 x i16> %B to <4 x i64> 387 ret <4 x i64> %C 388} 389 390define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 391; SSE2-LABEL: zext_4i32_to_2i64: 392; SSE2: # BB#0: # %entry 393; SSE2-NEXT: pxor %xmm1, %xmm1 394; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 395; SSE2-NEXT: retq 396; 397; SSSE3-LABEL: zext_4i32_to_2i64: 398; SSSE3: # BB#0: # %entry 399; SSSE3-NEXT: pxor %xmm1, %xmm1 400; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 401; SSSE3-NEXT: retq 402; 403; SSE41-LABEL: zext_4i32_to_2i64: 404; SSE41: # BB#0: # %entry 405; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 406; SSE41-NEXT: retq 407; 408; AVX-LABEL: zext_4i32_to_2i64: 409; AVX: # BB#0: # %entry 410; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 411; AVX-NEXT: retq 412entry: 413 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 414 %C = zext <2 x i32> %B to <2 x i64> 415 ret <2 x i64> %C 416} 417 418define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 419; SSE2-LABEL: zext_4i32_to_4i64: 420; SSE2: # BB#0: # %entry 421; SSE2-NEXT: movdqa %xmm0, %xmm1 422; SSE2-NEXT: pxor %xmm2, %xmm2 423; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 424; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 425; SSE2-NEXT: retq 426; 427; SSSE3-LABEL: zext_4i32_to_4i64: 428; SSSE3: # BB#0: # %entry 429; SSSE3-NEXT: movdqa %xmm0, %xmm1 430; SSSE3-NEXT: pxor %xmm2, %xmm2 431; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 432; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 433; SSSE3-NEXT: retq 434; 435; SSE41-LABEL: zext_4i32_to_4i64: 436; SSE41: # BB#0: # %entry 437; SSE41-NEXT: movdqa %xmm0, %xmm1 438; SSE41-NEXT: pxor %xmm2, %xmm2 439; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 440; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 441; SSE41-NEXT: retq 442; 443; AVX1-LABEL: zext_4i32_to_4i64: 444; AVX1: # BB#0: # %entry 445; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 446; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 447; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 448; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 449; AVX1-NEXT: retq 450; 451; AVX2-LABEL: zext_4i32_to_4i64: 452; AVX2: # BB#0: # %entry 453; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 454; AVX2-NEXT: retq 455entry: 456 %B = zext <4 x i32> %A to <4 x i64> 457 ret <4 x i64>%B 458} 459 460define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) { 461; SSE2-LABEL: load_zext_2i8_to_2i64: 462; SSE2: # BB#0: # %entry 463; SSE2-NEXT: movzwl (%rdi), %eax 464; SSE2-NEXT: movd %eax, %xmm0 465; SSE2-NEXT: pxor %xmm1, %xmm1 466; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 467; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 468; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 469; SSE2-NEXT: retq 470; 471; SSSE3-LABEL: load_zext_2i8_to_2i64: 472; SSSE3: # BB#0: # %entry 473; SSSE3-NEXT: movzwl (%rdi), %eax 474; SSSE3-NEXT: movd %eax, %xmm0 475; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 476; SSSE3-NEXT: retq 477; 478; SSE41-LABEL: load_zext_2i8_to_2i64: 479; SSE41: # BB#0: # %entry 480; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 481; SSE41-NEXT: retq 482; 483; AVX-LABEL: load_zext_2i8_to_2i64: 484; AVX: # BB#0: # %entry 485; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 486; AVX-NEXT: retq 487entry: 488 %X = load <2 x i8>, <2 x i8>* %ptr 489 %Y = zext <2 x i8> %X to <2 x i64> 490 ret <2 x i64> %Y 491} 492 493define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) { 494; SSE2-LABEL: load_zext_4i8_to_4i32: 495; SSE2: # BB#0: # %entry 496; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 497; SSE2-NEXT: pxor %xmm1, %xmm1 498; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 499; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 500; SSE2-NEXT: retq 501; 502; SSSE3-LABEL: load_zext_4i8_to_4i32: 503; SSSE3: # BB#0: # %entry 504; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 505; SSSE3-NEXT: pxor %xmm1, %xmm1 506; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 507; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 508; SSSE3-NEXT: retq 509; 510; SSE41-LABEL: load_zext_4i8_to_4i32: 511; SSE41: # BB#0: # %entry 512; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 513; SSE41-NEXT: retq 514; 515; AVX-LABEL: load_zext_4i8_to_4i32: 516; AVX: # BB#0: # %entry 517; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 518; AVX-NEXT: retq 519entry: 520 %X = load <4 x i8>, <4 x i8>* %ptr 521 %Y = zext <4 x i8> %X to <4 x i32> 522 ret <4 x i32> %Y 523} 524 525define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) { 526; SSE2-LABEL: load_zext_4i8_to_4i64: 527; SSE2: # BB#0: # %entry 528; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 529; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 530; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 531; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 532; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 533; SSE2-NEXT: pand %xmm2, %xmm0 534; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 535; SSE2-NEXT: pand %xmm2, %xmm1 536; SSE2-NEXT: retq 537; 538; SSSE3-LABEL: load_zext_4i8_to_4i64: 539; SSSE3: # BB#0: # %entry 540; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 541; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 542; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 543; SSSE3-NEXT: movdqa %xmm1, %xmm0 544; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero 545; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,zero,zero,zero,zero,xmm1[12],zero,zero,zero,zero,zero,zero,zero 546; SSSE3-NEXT: retq 547; 548; SSE41-LABEL: load_zext_4i8_to_4i64: 549; SSE41: # BB#0: # %entry 550; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 551; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 552; SSE41-NEXT: retq 553; 554; AVX1-LABEL: load_zext_4i8_to_4i64: 555; AVX1: # BB#0: # %entry 556; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 557; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 558; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 559; AVX1-NEXT: retq 560; 561; AVX2-LABEL: load_zext_4i8_to_4i64: 562; AVX2: # BB#0: # %entry 563; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 564; AVX2-NEXT: retq 565entry: 566 %X = load <4 x i8>, <4 x i8>* %ptr 567 %Y = zext <4 x i8> %X to <4 x i64> 568 ret <4 x i64> %Y 569} 570 571define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) { 572; SSE2-LABEL: load_zext_8i8_to_8i16: 573; SSE2: # BB#0: # %entry 574; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 575; SSE2-NEXT: pxor %xmm1, %xmm1 576; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 577; SSE2-NEXT: retq 578; 579; SSSE3-LABEL: load_zext_8i8_to_8i16: 580; SSSE3: # BB#0: # %entry 581; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 582; SSSE3-NEXT: pxor %xmm1, %xmm1 583; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 584; SSSE3-NEXT: retq 585; 586; SSE41-LABEL: load_zext_8i8_to_8i16: 587; SSE41: # BB#0: # %entry 588; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 589; SSE41-NEXT: retq 590; 591; AVX-LABEL: load_zext_8i8_to_8i16: 592; AVX: # BB#0: # %entry 593; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 594; AVX-NEXT: retq 595entry: 596 %X = load <8 x i8>, <8 x i8>* %ptr 597 %Y = zext <8 x i8> %X to <8 x i16> 598 ret <8 x i16> %Y 599} 600 601define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) { 602; SSE2-LABEL: load_zext_8i8_to_8i32: 603; SSE2: # BB#0: # %entry 604; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 605; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 606; SSE2-NEXT: movdqa %xmm1, %xmm0 607; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 608; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 609; SSE2-NEXT: pand %xmm2, %xmm0 610; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 611; SSE2-NEXT: pand %xmm2, %xmm1 612; SSE2-NEXT: retq 613; 614; SSSE3-LABEL: load_zext_8i8_to_8i32: 615; SSSE3: # BB#0: # %entry 616; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 617; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 618; SSSE3-NEXT: movdqa %xmm1, %xmm0 619; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[6],zero,zero,zero 620; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[14],zero,zero,zero 621; SSSE3-NEXT: retq 622; 623; SSE41-LABEL: load_zext_8i8_to_8i32: 624; SSE41: # BB#0: # %entry 625; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 626; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 627; SSE41-NEXT: retq 628; 629; AVX1-LABEL: load_zext_8i8_to_8i32: 630; AVX1: # BB#0: # %entry 631; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 632; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 633; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 634; AVX1-NEXT: retq 635; 636; AVX2-LABEL: load_zext_8i8_to_8i32: 637; AVX2: # BB#0: # %entry 638; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 639; AVX2-NEXT: retq 640entry: 641 %X = load <8 x i8>, <8 x i8>* %ptr 642 %Y = zext <8 x i8> %X to <8 x i32> 643 ret <8 x i32> %Y 644} 645 646define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) { 647; SSE2-LABEL: load_zext_16i8_to_16i16: 648; SSE2: # BB#0: # %entry 649; SSE2-NEXT: movdqa (%rdi), %xmm1 650; SSE2-NEXT: pxor %xmm2, %xmm2 651; SSE2-NEXT: movdqa %xmm1, %xmm0 652; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 653; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 654; SSE2-NEXT: retq 655; 656; SSSE3-LABEL: load_zext_16i8_to_16i16: 657; SSSE3: # BB#0: # %entry 658; SSSE3-NEXT: movdqa (%rdi), %xmm1 659; SSSE3-NEXT: pxor %xmm2, %xmm2 660; SSSE3-NEXT: movdqa %xmm1, %xmm0 661; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 662; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 663; SSSE3-NEXT: retq 664; 665; SSE41-LABEL: load_zext_16i8_to_16i16: 666; SSE41: # BB#0: # %entry 667; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 668; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 669; SSE41-NEXT: retq 670; 671; AVX1-LABEL: load_zext_16i8_to_16i16: 672; AVX1: # BB#0: # %entry 673; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 674; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 675; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 676; AVX1-NEXT: retq 677; 678; AVX2-LABEL: load_zext_16i8_to_16i16: 679; AVX2: # BB#0: # %entry 680; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 681; AVX2-NEXT: retq 682entry: 683 %X = load <16 x i8>, <16 x i8>* %ptr 684 %Y = zext <16 x i8> %X to <16 x i16> 685 ret <16 x i16> %Y 686} 687 688define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) { 689; SSE2-LABEL: load_zext_2i16_to_2i64: 690; SSE2: # BB#0: # %entry 691; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 692; SSE2-NEXT: pxor %xmm1, %xmm1 693; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 694; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 695; SSE2-NEXT: retq 696; 697; SSSE3-LABEL: load_zext_2i16_to_2i64: 698; SSSE3: # BB#0: # %entry 699; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 700; SSSE3-NEXT: pxor %xmm1, %xmm1 701; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 702; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 703; SSSE3-NEXT: retq 704; 705; SSE41-LABEL: load_zext_2i16_to_2i64: 706; SSE41: # BB#0: # %entry 707; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 708; SSE41-NEXT: retq 709; 710; AVX-LABEL: load_zext_2i16_to_2i64: 711; AVX: # BB#0: # %entry 712; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 713; AVX-NEXT: retq 714entry: 715 %X = load <2 x i16>, <2 x i16>* %ptr 716 %Y = zext <2 x i16> %X to <2 x i64> 717 ret <2 x i64> %Y 718} 719 720define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) { 721; SSE2-LABEL: load_zext_4i16_to_4i32: 722; SSE2: # BB#0: # %entry 723; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 724; SSE2-NEXT: pxor %xmm1, %xmm1 725; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 726; SSE2-NEXT: retq 727; 728; SSSE3-LABEL: load_zext_4i16_to_4i32: 729; SSSE3: # BB#0: # %entry 730; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 731; SSSE3-NEXT: pxor %xmm1, %xmm1 732; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 733; SSSE3-NEXT: retq 734; 735; SSE41-LABEL: load_zext_4i16_to_4i32: 736; SSE41: # BB#0: # %entry 737; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 738; SSE41-NEXT: retq 739; 740; AVX-LABEL: load_zext_4i16_to_4i32: 741; AVX: # BB#0: # %entry 742; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 743; AVX-NEXT: retq 744entry: 745 %X = load <4 x i16>, <4 x i16>* %ptr 746 %Y = zext <4 x i16> %X to <4 x i32> 747 ret <4 x i32> %Y 748} 749 750define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) { 751; SSE2-LABEL: load_zext_4i16_to_4i64: 752; SSE2: # BB#0: # %entry 753; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 754; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 755; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 756; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0,65535,0,0,0] 757; SSE2-NEXT: pand %xmm2, %xmm0 758; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 759; SSE2-NEXT: pand %xmm2, %xmm1 760; SSE2-NEXT: retq 761; 762; SSSE3-LABEL: load_zext_4i16_to_4i64: 763; SSSE3: # BB#0: # %entry 764; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 765; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 766; SSSE3-NEXT: movdqa %xmm1, %xmm0 767; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero 768; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9],zero,zero,zero,zero,zero,zero,xmm1[12,13],zero,zero,zero,zero,zero,zero 769; SSSE3-NEXT: retq 770; 771; SSE41-LABEL: load_zext_4i16_to_4i64: 772; SSE41: # BB#0: # %entry 773; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 774; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 775; SSE41-NEXT: retq 776; 777; AVX1-LABEL: load_zext_4i16_to_4i64: 778; AVX1: # BB#0: # %entry 779; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 780; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 781; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 782; AVX1-NEXT: retq 783; 784; AVX2-LABEL: load_zext_4i16_to_4i64: 785; AVX2: # BB#0: # %entry 786; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 787; AVX2-NEXT: retq 788entry: 789 %X = load <4 x i16>, <4 x i16>* %ptr 790 %Y = zext <4 x i16> %X to <4 x i64> 791 ret <4 x i64> %Y 792} 793 794define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) { 795; SSE2-LABEL: load_zext_8i16_to_8i32: 796; SSE2: # BB#0: # %entry 797; SSE2-NEXT: movdqa (%rdi), %xmm1 798; SSE2-NEXT: pxor %xmm2, %xmm2 799; SSE2-NEXT: movdqa %xmm1, %xmm0 800; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 801; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 802; SSE2-NEXT: retq 803; 804; SSSE3-LABEL: load_zext_8i16_to_8i32: 805; SSSE3: # BB#0: # %entry 806; SSSE3-NEXT: movdqa (%rdi), %xmm1 807; SSSE3-NEXT: pxor %xmm2, %xmm2 808; SSSE3-NEXT: movdqa %xmm1, %xmm0 809; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 810; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 811; SSSE3-NEXT: retq 812; 813; SSE41-LABEL: load_zext_8i16_to_8i32: 814; SSE41: # BB#0: # %entry 815; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 816; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 817; SSE41-NEXT: retq 818; 819; AVX1-LABEL: load_zext_8i16_to_8i32: 820; AVX1: # BB#0: # %entry 821; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 822; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 823; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 824; AVX1-NEXT: retq 825; 826; AVX2-LABEL: load_zext_8i16_to_8i32: 827; AVX2: # BB#0: # %entry 828; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 829; AVX2-NEXT: retq 830entry: 831 %X = load <8 x i16>, <8 x i16>* %ptr 832 %Y = zext <8 x i16> %X to <8 x i32> 833 ret <8 x i32> %Y 834} 835 836define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) { 837; SSE2-LABEL: load_zext_2i32_to_2i64: 838; SSE2: # BB#0: # %entry 839; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 840; SSE2-NEXT: pxor %xmm1, %xmm1 841; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 842; SSE2-NEXT: retq 843; 844; SSSE3-LABEL: load_zext_2i32_to_2i64: 845; SSSE3: # BB#0: # %entry 846; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 847; SSSE3-NEXT: pxor %xmm1, %xmm1 848; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 849; SSSE3-NEXT: retq 850; 851; SSE41-LABEL: load_zext_2i32_to_2i64: 852; SSE41: # BB#0: # %entry 853; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 854; SSE41-NEXT: retq 855; 856; AVX-LABEL: load_zext_2i32_to_2i64: 857; AVX: # BB#0: # %entry 858; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 859; AVX-NEXT: retq 860entry: 861 %X = load <2 x i32>, <2 x i32>* %ptr 862 %Y = zext <2 x i32> %X to <2 x i64> 863 ret <2 x i64> %Y 864} 865 866define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) { 867; SSE2-LABEL: load_zext_4i32_to_4i64: 868; SSE2: # BB#0: # %entry 869; SSE2-NEXT: movdqa (%rdi), %xmm1 870; SSE2-NEXT: pxor %xmm2, %xmm2 871; SSE2-NEXT: movdqa %xmm1, %xmm0 872; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 873; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 874; SSE2-NEXT: retq 875; 876; SSSE3-LABEL: load_zext_4i32_to_4i64: 877; SSSE3: # BB#0: # %entry 878; SSSE3-NEXT: movdqa (%rdi), %xmm1 879; SSSE3-NEXT: pxor %xmm2, %xmm2 880; SSSE3-NEXT: movdqa %xmm1, %xmm0 881; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 882; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 883; SSSE3-NEXT: retq 884; 885; SSE41-LABEL: load_zext_4i32_to_4i64: 886; SSE41: # BB#0: # %entry 887; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 888; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 889; SSE41-NEXT: retq 890; 891; AVX1-LABEL: load_zext_4i32_to_4i64: 892; AVX1: # BB#0: # %entry 893; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 894; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 895; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 896; AVX1-NEXT: retq 897; 898; AVX2-LABEL: load_zext_4i32_to_4i64: 899; AVX2: # BB#0: # %entry 900; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 901; AVX2-NEXT: retq 902entry: 903 %X = load <4 x i32>, <4 x i32>* %ptr 904 %Y = zext <4 x i32> %X to <4 x i64> 905 ret <4 x i64> %Y 906} 907 908define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { 909; SSE2-LABEL: zext_8i8_to_8i32: 910; SSE2: # BB#0: # %entry 911; SSE2-NEXT: movdqa %xmm0, %xmm1 912; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 913; SSE2-NEXT: pxor %xmm2, %xmm2 914; SSE2-NEXT: movdqa %xmm1, %xmm0 915; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 916; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 917; SSE2-NEXT: retq 918; 919; SSSE3-LABEL: zext_8i8_to_8i32: 920; SSSE3: # BB#0: # %entry 921; SSSE3-NEXT: movdqa %xmm0, %xmm1 922; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 923; SSSE3-NEXT: pxor %xmm2, %xmm2 924; SSSE3-NEXT: movdqa %xmm1, %xmm0 925; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 926; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 927; SSSE3-NEXT: retq 928; 929; SSE41-LABEL: zext_8i8_to_8i32: 930; SSE41: # BB#0: # %entry 931; SSE41-NEXT: movdqa %xmm0, %xmm1 932; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 933; SSE41-NEXT: pxor %xmm2, %xmm2 934; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 935; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 936; SSE41-NEXT: retq 937; 938; AVX1-LABEL: zext_8i8_to_8i32: 939; AVX1: # BB#0: # %entry 940; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 941; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 942; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 943; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 944; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 945; AVX1-NEXT: retq 946; 947; AVX2-LABEL: zext_8i8_to_8i32: 948; AVX2: # BB#0: # %entry 949; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 950; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 951; AVX2-NEXT: retq 952entry: 953 %t = zext <8 x i8> %z to <8 x i32> 954 ret <8 x i32> %t 955} 956 957define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 958; SSE2-LABEL: shuf_zext_8i16_to_8i32: 959; SSE2: # BB#0: # %entry 960; SSE2-NEXT: movdqa %xmm0, %xmm1 961; SSE2-NEXT: pxor %xmm2, %xmm2 962; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 963; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 964; SSE2-NEXT: retq 965; 966; SSSE3-LABEL: shuf_zext_8i16_to_8i32: 967; SSSE3: # BB#0: # %entry 968; SSSE3-NEXT: movdqa %xmm0, %xmm1 969; SSSE3-NEXT: pxor %xmm2, %xmm2 970; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 971; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 972; SSSE3-NEXT: retq 973; 974; SSE41-LABEL: shuf_zext_8i16_to_8i32: 975; SSE41: # BB#0: # %entry 976; SSE41-NEXT: movdqa %xmm0, %xmm1 977; SSE41-NEXT: pxor %xmm2, %xmm2 978; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 979; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 980; SSE41-NEXT: retq 981; 982; AVX1-LABEL: shuf_zext_8i16_to_8i32: 983; AVX1: # BB#0: # %entry 984; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 985; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 986; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 987; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 988; AVX1-NEXT: retq 989; 990; AVX2-LABEL: shuf_zext_8i16_to_8i32: 991; AVX2: # BB#0: # %entry 992; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 993; AVX2-NEXT: retq 994entry: 995 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8> 996 %Z = bitcast <16 x i16> %B to <8 x i32> 997 ret <8 x i32> %Z 998} 999 1000define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1001; SSE2-LABEL: shuf_zext_4i32_to_4i64: 1002; SSE2: # BB#0: # %entry 1003; SSE2-NEXT: movdqa %xmm0, %xmm1 1004; SSE2-NEXT: pxor %xmm2, %xmm2 1005; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1006; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1007; SSE2-NEXT: retq 1008; 1009; SSSE3-LABEL: shuf_zext_4i32_to_4i64: 1010; SSSE3: # BB#0: # %entry 1011; SSSE3-NEXT: movdqa %xmm0, %xmm1 1012; SSSE3-NEXT: pxor %xmm2, %xmm2 1013; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1014; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1015; SSSE3-NEXT: retq 1016; 1017; SSE41-LABEL: shuf_zext_4i32_to_4i64: 1018; SSE41: # BB#0: # %entry 1019; SSE41-NEXT: movdqa %xmm0, %xmm1 1020; SSE41-NEXT: pxor %xmm2, %xmm2 1021; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1022; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1023; SSE41-NEXT: retq 1024; 1025; AVX1-LABEL: shuf_zext_4i32_to_4i64: 1026; AVX1: # BB#0: # %entry 1027; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 1028; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1029; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1030; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0] 1031; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1032; AVX1-NEXT: retq 1033; 1034; AVX2-LABEL: shuf_zext_4i32_to_4i64: 1035; AVX2: # BB#0: # %entry 1036; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1037; AVX2-NEXT: retq 1038entry: 1039 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4> 1040 %Z = bitcast <8 x i32> %B to <4 x i64> 1041 ret <4 x i64> %Z 1042} 1043 1044define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) { 1045; SSE2-LABEL: shuf_zext_8i8_to_8i32: 1046; SSE2: # BB#0: # %entry 1047; SSE2-NEXT: movdqa %xmm0, %xmm1 1048; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1049; SSE2-NEXT: packuswb %xmm1, %xmm1 1050; SSE2-NEXT: pxor %xmm2, %xmm2 1051; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1052; SSE2-NEXT: movdqa %xmm1, %xmm0 1053; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1054; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1055; SSE2-NEXT: retq 1056; 1057; SSSE3-LABEL: shuf_zext_8i8_to_8i32: 1058; SSSE3: # BB#0: # %entry 1059; SSSE3-NEXT: movdqa %xmm0, %xmm1 1060; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1061; SSSE3-NEXT: pxor %xmm2, %xmm2 1062; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1063; SSSE3-NEXT: movdqa %xmm1, %xmm0 1064; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1065; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1066; SSSE3-NEXT: retq 1067; 1068; SSE41-LABEL: shuf_zext_8i8_to_8i32: 1069; SSE41: # BB#0: # %entry 1070; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1071; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1072; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1073; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1074; SSE41-NEXT: movdqa %xmm2, %xmm0 1075; SSE41-NEXT: retq 1076; 1077; AVX1-LABEL: shuf_zext_8i8_to_8i32: 1078; AVX1: # BB#0: # %entry 1079; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1080; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1081; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1082; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1083; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1084; AVX1-NEXT: retq 1085; 1086; AVX2-LABEL: shuf_zext_8i8_to_8i32: 1087; AVX2: # BB#0: # %entry 1088; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1089; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1090; AVX2-NEXT: retq 1091entry: 1092 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8> 1093 %Z = bitcast <32 x i8> %B to <8 x i32> 1094 ret <8 x i32> %Z 1095} 1096 1097define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp { 1098; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6: 1099; SSE2: # BB#0: # %entry 1100; SSE2-NEXT: pxor %xmm1, %xmm1 1101; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1102; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1103; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1104; SSE2-NEXT: retq 1105; 1106; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6: 1107; SSSE3: # BB#0: # %entry 1108; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1109; SSSE3-NEXT: retq 1110; 1111; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6: 1112; SSE41: # BB#0: # %entry 1113; SSE41-NEXT: psrlq $48, %xmm0 1114; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1115; SSE41-NEXT: retq 1116; 1117; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6: 1118; AVX: # BB#0: # %entry 1119; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 1120; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1121; AVX-NEXT: retq 1122entry: 1123 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1124 %Z = bitcast <16 x i8> %B to <2 x i64> 1125 ret <2 x i64> %Z 1126} 1127 1128define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp { 1129; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1130; SSE2: # BB#0: # %entry 1131; SSE2-NEXT: movdqa %xmm0, %xmm1 1132; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 1133; SSE2-NEXT: pxor %xmm2, %xmm2 1134; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1135; SSE2-NEXT: movdqa %xmm1, %xmm0 1136; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1137; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1138; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1139; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1140; SSE2-NEXT: retq 1141; 1142; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11: 1143; SSSE3: # BB#0: # %entry 1144; SSSE3-NEXT: movdqa %xmm0, %xmm1 1145; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero 1146; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero 1147; SSSE3-NEXT: retq 1148; 1149; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11: 1150; SSE41: # BB#0: # %entry 1151; SSE41-NEXT: movdqa %xmm0, %xmm1 1152; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1153; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1154; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1155; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1156; SSE41-NEXT: movdqa %xmm2, %xmm0 1157; SSE41-NEXT: retq 1158; 1159; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11: 1160; AVX1: # BB#0: # %entry 1161; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1162; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1163; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1164; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1165; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1166; AVX1-NEXT: retq 1167; 1168; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1169; AVX2: # BB#0: # %entry 1170; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1171; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1172; AVX2-NEXT: retq 1173entry: 1174 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1175 %Z = bitcast <32 x i8> %B to <4 x i64> 1176 ret <4 x i64> %Z 1177} 1178 1179define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp { 1180; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6: 1181; SSE2: # BB#0: # %entry 1182; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1183; SSE2-NEXT: pxor %xmm1, %xmm1 1184; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1185; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1186; SSE2-NEXT: retq 1187; 1188; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6: 1189; SSSE3: # BB#0: # %entry 1190; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1191; SSSE3-NEXT: pxor %xmm1, %xmm1 1192; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1193; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1194; SSSE3-NEXT: retq 1195; 1196; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6: 1197; SSE41: # BB#0: # %entry 1198; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1199; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1200; SSE41-NEXT: retq 1201; 1202; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6: 1203; AVX: # BB#0: # %entry 1204; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1205; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1206; AVX-NEXT: retq 1207entry: 1208 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8> 1209 %Z = bitcast <8 x i16> %B to <2 x i64> 1210 ret <2 x i64> %Z 1211} 1212 1213define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp { 1214; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1215; SSE2: # BB#0: # %entry 1216; SSE2-NEXT: movdqa %xmm0, %xmm1 1217; SSE2-NEXT: pxor %xmm2, %xmm2 1218; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1219; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1220; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1221; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1222; SSE2-NEXT: retq 1223; 1224; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2: 1225; SSSE3: # BB#0: # %entry 1226; SSSE3-NEXT: movdqa %xmm0, %xmm1 1227; SSSE3-NEXT: pxor %xmm2, %xmm2 1228; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1229; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1230; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1231; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1232; SSSE3-NEXT: retq 1233; 1234; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2: 1235; SSE41: # BB#0: # %entry 1236; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1237; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1238; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1239; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1240; SSE41-NEXT: movdqa %xmm2, %xmm0 1241; SSE41-NEXT: retq 1242; 1243; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2: 1244; AVX1: # BB#0: # %entry 1245; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1246; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1247; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1248; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1249; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1250; AVX1-NEXT: retq 1251; 1252; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1253; AVX2: # BB#0: # %entry 1254; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,2,3,5,6,6,7] 1255; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1256; AVX2-NEXT: retq 1257entry: 1258 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8> 1259 %Z = bitcast <16 x i16> %B to <4 x i64> 1260 ret <4 x i64> %Z 1261} 1262 1263define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp { 1264; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1: 1265; SSE: # BB#0: # %entry 1266; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1267; SSE-NEXT: pxor %xmm1, %xmm1 1268; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1269; SSE-NEXT: retq 1270; 1271; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1: 1272; AVX: # BB#0: # %entry 1273; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1274; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1275; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1276; AVX-NEXT: retq 1277entry: 1278 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8> 1279 %Z = bitcast <8 x i16> %B to <4 x i32> 1280 ret <4 x i32> %Z 1281} 1282 1283define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp { 1284; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1285; SSE2: # BB#0: # %entry 1286; SSE2-NEXT: movdqa %xmm0, %xmm1 1287; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1288; SSE2-NEXT: pxor %xmm2, %xmm2 1289; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1290; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1291; SSE2-NEXT: retq 1292; 1293; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3: 1294; SSSE3: # BB#0: # %entry 1295; SSSE3-NEXT: movdqa %xmm0, %xmm1 1296; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1297; SSSE3-NEXT: pxor %xmm2, %xmm2 1298; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1299; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1300; SSSE3-NEXT: retq 1301; 1302; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3: 1303; SSE41: # BB#0: # %entry 1304; SSE41-NEXT: movdqa %xmm0, %xmm1 1305; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1306; SSE41-NEXT: pxor %xmm2, %xmm2 1307; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1308; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1309; SSE41-NEXT: retq 1310; 1311; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3: 1312; AVX1: # BB#0: # %entry 1313; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1314; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1315; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1316; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1317; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1318; AVX1-NEXT: retq 1319; 1320; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1321; AVX2: # BB#0: # %entry 1322; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero 1323; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1324; AVX2-NEXT: retq 1325entry: 1326 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8> 1327 %Z = bitcast <16 x i16> %B to <8 x i32> 1328 ret <8 x i32> %Z 1329} 1330 1331define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp { 1332; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8: 1333; SSE2: # BB#0: # %entry 1334; SSE2-NEXT: pxor %xmm2, %xmm2 1335; SSE2-NEXT: movdqa %xmm1, %xmm0 1336; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1337; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1338; SSE2-NEXT: retq 1339; 1340; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8: 1341; SSSE3: # BB#0: # %entry 1342; SSSE3-NEXT: pxor %xmm2, %xmm2 1343; SSSE3-NEXT: movdqa %xmm1, %xmm0 1344; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1345; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1346; SSSE3-NEXT: retq 1347; 1348; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8: 1349; SSE41: # BB#0: # %entry 1350; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 1351; SSE41-NEXT: pxor %xmm2, %xmm2 1352; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] 1353; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1354; SSE41-NEXT: movdqa %xmm2, %xmm1 1355; SSE41-NEXT: retq 1356; 1357; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8: 1358; AVX1: # BB#0: # %entry 1359; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1360; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] 1361; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1362; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] 1363; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1364; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1365; AVX1-NEXT: retq 1366; 1367; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8: 1368; AVX2: # BB#0: # %entry 1369; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1370; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1371; AVX2-NEXT: retq 1372entry: 1373 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16> 1374 %Z = bitcast <16 x i16> %B to <8 x i32> 1375 ret <8 x i32> %Z 1376} 1377 1378define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp { 1379; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2: 1380; SSE: # BB#0: # %entry 1381; SSE-NEXT: pxor %xmm1, %xmm1 1382; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1383; SSE-NEXT: retq 1384; 1385; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2: 1386; AVX: # BB#0: # %entry 1387; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1388; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1389; AVX-NEXT: retq 1390entry: 1391 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4> 1392 %Z = bitcast <4 x i32> %B to <2 x i64> 1393 ret <2 x i64> %Z 1394} 1395 1396define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp { 1397; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1: 1398; SSE2: # BB#0: # %entry 1399; SSE2-NEXT: movdqa %xmm0, %xmm1 1400; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 1401; SSE2-NEXT: pand %xmm1, %xmm0 1402; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1403; SSE2-NEXT: retq 1404; 1405; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1: 1406; SSSE3: # BB#0: # %entry 1407; SSSE3-NEXT: movdqa %xmm0, %xmm1 1408; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 1409; SSSE3-NEXT: pand %xmm1, %xmm0 1410; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1411; SSSE3-NEXT: retq 1412; 1413; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1: 1414; SSE41: # BB#0: # %entry 1415; SSE41-NEXT: movdqa %xmm0, %xmm1 1416; SSE41-NEXT: pxor %xmm0, %xmm0 1417; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 1418; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1419; SSE41-NEXT: retq 1420; 1421; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1: 1422; AVX1: # BB#0: # %entry 1423; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero 1424; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 1425; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] 1426; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1427; AVX1-NEXT: retq 1428; 1429; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1: 1430; AVX2: # BB#0: # %entry 1431; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,3,5,6,7,7] 1432; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1433; AVX2-NEXT: retq 1434entry: 1435 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4> 1436 %Z = bitcast <8 x i32> %B to <4 x i64> 1437 ret <4 x i64> %Z 1438} 1439