1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; 8; Just one 32-bit run to make sure we do reasonable things there. 9; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41 10 11define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 12; SSE2-LABEL: sext_16i8_to_8i16: 13; SSE2: # BB#0: # %entry 14; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 15; SSE2-NEXT: psraw $8, %xmm0 16; SSE2-NEXT: retq 17; 18; SSSE3-LABEL: sext_16i8_to_8i16: 19; SSSE3: # BB#0: # %entry 20; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 21; SSSE3-NEXT: psraw $8, %xmm0 22; SSSE3-NEXT: retq 23; 24; SSE41-LABEL: sext_16i8_to_8i16: 25; SSE41: # BB#0: # %entry 26; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 27; SSE41-NEXT: retq 28; 29; AVX-LABEL: sext_16i8_to_8i16: 30; AVX: # BB#0: # %entry 31; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 32; AVX-NEXT: retq 33; 34; X32-SSE41-LABEL: sext_16i8_to_8i16: 35; X32-SSE41: # BB#0: # %entry 36; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 37; X32-SSE41-NEXT: retl 38entry: 39 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 40 %C = sext <8 x i8> %B to <8 x i16> 41 ret <8 x i16> %C 42} 43 44define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp { 45; SSE2-LABEL: sext_16i8_to_16i16: 46; SSE2: # BB#0: # %entry 47; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 48; SSE2-NEXT: psraw $8, %xmm2 49; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 50; SSE2-NEXT: psraw $8, %xmm1 51; SSE2-NEXT: movdqa %xmm2, %xmm0 52; SSE2-NEXT: retq 53; 54; SSSE3-LABEL: sext_16i8_to_16i16: 55; SSSE3: # BB#0: # %entry 56; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 57; SSSE3-NEXT: psraw $8, %xmm2 58; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 59; SSSE3-NEXT: psraw $8, %xmm1 60; SSSE3-NEXT: movdqa %xmm2, %xmm0 61; SSSE3-NEXT: retq 62; 63; SSE41-LABEL: sext_16i8_to_16i16: 64; SSE41: # BB#0: # %entry 65; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 66; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 67; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 68; SSE41-NEXT: movdqa %xmm2, %xmm0 69; SSE41-NEXT: retq 70; 71; AVX1-LABEL: sext_16i8_to_16i16: 72; AVX1: # BB#0: # %entry 73; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 74; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 75; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 76; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 77; AVX1-NEXT: retq 78; 79; AVX2-LABEL: sext_16i8_to_16i16: 80; AVX2: # BB#0: # %entry 81; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 82; AVX2-NEXT: retq 83; 84; X32-SSE41-LABEL: sext_16i8_to_16i16: 85; X32-SSE41: # BB#0: # %entry 86; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm2 87; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 88; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm1 89; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 90; X32-SSE41-NEXT: retl 91entry: 92 %B = sext <16 x i8> %A to <16 x i16> 93 ret <16 x i16> %B 94} 95 96define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 97; SSE2-LABEL: sext_16i8_to_4i32: 98; SSE2: # BB#0: # %entry 99; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 100; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 101; SSE2-NEXT: psrad $24, %xmm0 102; SSE2-NEXT: retq 103; 104; SSSE3-LABEL: sext_16i8_to_4i32: 105; SSSE3: # BB#0: # %entry 106; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 107; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 108; SSSE3-NEXT: psrad $24, %xmm0 109; SSSE3-NEXT: retq 110; 111; SSE41-LABEL: sext_16i8_to_4i32: 112; SSE41: # BB#0: # %entry 113; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 114; SSE41-NEXT: retq 115; 116; AVX-LABEL: sext_16i8_to_4i32: 117; AVX: # BB#0: # %entry 118; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 119; AVX-NEXT: retq 120; 121; X32-SSE41-LABEL: sext_16i8_to_4i32: 122; X32-SSE41: # BB#0: # %entry 123; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 124; X32-SSE41-NEXT: retl 125entry: 126 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 127 %C = sext <4 x i8> %B to <4 x i32> 128 ret <4 x i32> %C 129} 130 131define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 132; SSE2-LABEL: sext_16i8_to_8i32: 133; SSE2: # BB#0: # %entry 134; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 135; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 136; SSE2-NEXT: psrad $24, %xmm2 137; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 138; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 139; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 140; SSE2-NEXT: psrad $24, %xmm1 141; SSE2-NEXT: movdqa %xmm2, %xmm0 142; SSE2-NEXT: retq 143; 144; SSSE3-LABEL: sext_16i8_to_8i32: 145; SSSE3: # BB#0: # %entry 146; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 147; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 148; SSSE3-NEXT: psrad $24, %xmm2 149; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 150; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 151; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 152; SSSE3-NEXT: psrad $24, %xmm1 153; SSSE3-NEXT: movdqa %xmm2, %xmm0 154; SSSE3-NEXT: retq 155; 156; SSE41-LABEL: sext_16i8_to_8i32: 157; SSE41: # BB#0: # %entry 158; SSE41-NEXT: pmovsxbd %xmm0, %xmm2 159; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 160; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 161; SSE41-NEXT: movdqa %xmm2, %xmm0 162; SSE41-NEXT: retq 163; 164; AVX1-LABEL: sext_16i8_to_8i32: 165; AVX1: # BB#0: # %entry 166; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 167; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 168; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 169; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 170; AVX1-NEXT: retq 171; 172; AVX2-LABEL: sext_16i8_to_8i32: 173; AVX2: # BB#0: # %entry 174; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 175; AVX2-NEXT: vpslld $24, %ymm0, %ymm0 176; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0 177; AVX2-NEXT: retq 178; 179; X32-SSE41-LABEL: sext_16i8_to_8i32: 180; X32-SSE41: # BB#0: # %entry 181; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm2 182; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 183; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm1 184; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 185; X32-SSE41-NEXT: retl 186entry: 187 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 188 %C = sext <8 x i8> %B to <8 x i32> 189 ret <8 x i32> %C 190} 191 192define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 193; SSE2-LABEL: sext_16i8_to_2i64: 194; SSE2: # BB#0: # %entry 195; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 196; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 197; SSE2-NEXT: movdqa %xmm0, %xmm1 198; SSE2-NEXT: psrad $31, %xmm1 199; SSE2-NEXT: psrad $24, %xmm0 200; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 201; SSE2-NEXT: retq 202; 203; SSSE3-LABEL: sext_16i8_to_2i64: 204; SSSE3: # BB#0: # %entry 205; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 206; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 207; SSSE3-NEXT: movdqa %xmm0, %xmm1 208; SSSE3-NEXT: psrad $31, %xmm1 209; SSSE3-NEXT: psrad $24, %xmm0 210; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 211; SSSE3-NEXT: retq 212; 213; SSE41-LABEL: sext_16i8_to_2i64: 214; SSE41: # BB#0: # %entry 215; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 216; SSE41-NEXT: retq 217; 218; AVX-LABEL: sext_16i8_to_2i64: 219; AVX: # BB#0: # %entry 220; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 221; AVX-NEXT: retq 222; 223; X32-SSE41-LABEL: sext_16i8_to_2i64: 224; X32-SSE41: # BB#0: # %entry 225; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm0 226; X32-SSE41-NEXT: retl 227entry: 228 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 229 %C = sext <2 x i8> %B to <2 x i64> 230 ret <2 x i64> %C 231} 232 233define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 234; SSE2-LABEL: sext_16i8_to_4i64: 235; SSE2: # BB#0: # %entry 236; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 237; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 238; SSE2-NEXT: movdqa %xmm2, %xmm1 239; SSE2-NEXT: psrad $31, %xmm1 240; SSE2-NEXT: psrad $24, %xmm2 241; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 242; SSE2-NEXT: psrld $16, %xmm0 243; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 244; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 245; SSE2-NEXT: movdqa %xmm1, %xmm0 246; SSE2-NEXT: psrad $31, %xmm0 247; SSE2-NEXT: psrad $24, %xmm1 248; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 249; SSE2-NEXT: movdqa %xmm2, %xmm0 250; SSE2-NEXT: retq 251; 252; SSSE3-LABEL: sext_16i8_to_4i64: 253; SSSE3: # BB#0: # %entry 254; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 255; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 256; SSSE3-NEXT: movdqa %xmm2, %xmm1 257; SSSE3-NEXT: psrad $31, %xmm1 258; SSSE3-NEXT: psrad $24, %xmm2 259; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 260; SSSE3-NEXT: psrld $16, %xmm0 261; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 262; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 263; SSSE3-NEXT: movdqa %xmm1, %xmm0 264; SSSE3-NEXT: psrad $31, %xmm0 265; SSSE3-NEXT: psrad $24, %xmm1 266; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 267; SSSE3-NEXT: movdqa %xmm2, %xmm0 268; SSSE3-NEXT: retq 269; 270; SSE41-LABEL: sext_16i8_to_4i64: 271; SSE41: # BB#0: # %entry 272; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 273; SSE41-NEXT: psrld $16, %xmm0 274; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 275; SSE41-NEXT: movdqa %xmm2, %xmm0 276; SSE41-NEXT: retq 277; 278; AVX1-LABEL: sext_16i8_to_4i64: 279; AVX1: # BB#0: # %entry 280; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 281; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 282; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 283; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 284; AVX1-NEXT: retq 285; 286; AVX2-LABEL: sext_16i8_to_4i64: 287; AVX2: # BB#0: # %entry 288; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 289; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 290; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 291; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 292; AVX2-NEXT: retq 293; 294; X32-SSE41-LABEL: sext_16i8_to_4i64: 295; X32-SSE41: # BB#0: # %entry 296; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 297; X32-SSE41-NEXT: psrld $16, %xmm0 298; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 299; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 300; X32-SSE41-NEXT: retl 301entry: 302 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 303 %C = sext <4 x i8> %B to <4 x i64> 304 ret <4 x i64> %C 305} 306 307define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 308; SSE2-LABEL: sext_8i16_to_4i32: 309; SSE2: # BB#0: # %entry 310; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 311; SSE2-NEXT: psrad $16, %xmm0 312; SSE2-NEXT: retq 313; 314; SSSE3-LABEL: sext_8i16_to_4i32: 315; SSSE3: # BB#0: # %entry 316; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 317; SSSE3-NEXT: psrad $16, %xmm0 318; SSSE3-NEXT: retq 319; 320; SSE41-LABEL: sext_8i16_to_4i32: 321; SSE41: # BB#0: # %entry 322; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 323; SSE41-NEXT: retq 324; 325; AVX-LABEL: sext_8i16_to_4i32: 326; AVX: # BB#0: # %entry 327; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 328; AVX-NEXT: retq 329; 330; X32-SSE41-LABEL: sext_8i16_to_4i32: 331; X32-SSE41: # BB#0: # %entry 332; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm0 333; X32-SSE41-NEXT: retl 334entry: 335 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 336 %C = sext <4 x i16> %B to <4 x i32> 337 ret <4 x i32> %C 338} 339 340define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 341; SSE2-LABEL: sext_8i16_to_8i32: 342; SSE2: # BB#0: # %entry 343; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 344; SSE2-NEXT: psrad $16, %xmm2 345; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 346; SSE2-NEXT: psrad $16, %xmm1 347; SSE2-NEXT: movdqa %xmm2, %xmm0 348; SSE2-NEXT: retq 349; 350; SSSE3-LABEL: sext_8i16_to_8i32: 351; SSSE3: # BB#0: # %entry 352; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 353; SSSE3-NEXT: psrad $16, %xmm2 354; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 355; SSSE3-NEXT: psrad $16, %xmm1 356; SSSE3-NEXT: movdqa %xmm2, %xmm0 357; SSSE3-NEXT: retq 358; 359; SSE41-LABEL: sext_8i16_to_8i32: 360; SSE41: # BB#0: # %entry 361; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 362; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 363; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 364; SSE41-NEXT: movdqa %xmm2, %xmm0 365; SSE41-NEXT: retq 366; 367; AVX1-LABEL: sext_8i16_to_8i32: 368; AVX1: # BB#0: # %entry 369; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 370; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 371; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 372; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 373; AVX1-NEXT: retq 374; 375; AVX2-LABEL: sext_8i16_to_8i32: 376; AVX2: # BB#0: # %entry 377; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 378; AVX2-NEXT: retq 379; 380; X32-SSE41-LABEL: sext_8i16_to_8i32: 381; X32-SSE41: # BB#0: # %entry 382; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm2 383; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 384; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm1 385; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 386; X32-SSE41-NEXT: retl 387entry: 388 %B = sext <8 x i16> %A to <8 x i32> 389 ret <8 x i32> %B 390} 391 392define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 393; SSE2-LABEL: sext_8i16_to_2i64: 394; SSE2: # BB#0: # %entry 395; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 396; SSE2-NEXT: movdqa %xmm0, %xmm1 397; SSE2-NEXT: psrad $31, %xmm1 398; SSE2-NEXT: psrad $16, %xmm0 399; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 400; SSE2-NEXT: retq 401; 402; SSSE3-LABEL: sext_8i16_to_2i64: 403; SSSE3: # BB#0: # %entry 404; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 405; SSSE3-NEXT: movdqa %xmm0, %xmm1 406; SSSE3-NEXT: psrad $31, %xmm1 407; SSSE3-NEXT: psrad $16, %xmm0 408; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 409; SSSE3-NEXT: retq 410; 411; SSE41-LABEL: sext_8i16_to_2i64: 412; SSE41: # BB#0: # %entry 413; SSE41-NEXT: pmovsxwq %xmm0, %xmm0 414; SSE41-NEXT: retq 415; 416; AVX-LABEL: sext_8i16_to_2i64: 417; AVX: # BB#0: # %entry 418; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 419; AVX-NEXT: retq 420; 421; X32-SSE41-LABEL: sext_8i16_to_2i64: 422; X32-SSE41: # BB#0: # %entry 423; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm0 424; X32-SSE41-NEXT: retl 425entry: 426 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 427 %C = sext <2 x i16> %B to <2 x i64> 428 ret <2 x i64> %C 429} 430 431define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 432; SSE2-LABEL: sext_8i16_to_4i64: 433; SSE2: # BB#0: # %entry 434; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 435; SSE2-NEXT: movdqa %xmm2, %xmm1 436; SSE2-NEXT: psrad $31, %xmm1 437; SSE2-NEXT: psrad $16, %xmm2 438; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 439; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 440; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 441; SSE2-NEXT: movdqa %xmm1, %xmm0 442; SSE2-NEXT: psrad $31, %xmm0 443; SSE2-NEXT: psrad $16, %xmm1 444; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 445; SSE2-NEXT: movdqa %xmm2, %xmm0 446; SSE2-NEXT: retq 447; 448; SSSE3-LABEL: sext_8i16_to_4i64: 449; SSSE3: # BB#0: # %entry 450; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 451; SSSE3-NEXT: movdqa %xmm2, %xmm1 452; SSSE3-NEXT: psrad $31, %xmm1 453; SSSE3-NEXT: psrad $16, %xmm2 454; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 455; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 456; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 457; SSSE3-NEXT: movdqa %xmm1, %xmm0 458; SSSE3-NEXT: psrad $31, %xmm0 459; SSSE3-NEXT: psrad $16, %xmm1 460; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 461; SSSE3-NEXT: movdqa %xmm2, %xmm0 462; SSSE3-NEXT: retq 463; 464; SSE41-LABEL: sext_8i16_to_4i64: 465; SSE41: # BB#0: # %entry 466; SSE41-NEXT: pmovsxwq %xmm0, %xmm2 467; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 468; SSE41-NEXT: pmovsxwq %xmm0, %xmm1 469; SSE41-NEXT: movdqa %xmm2, %xmm0 470; SSE41-NEXT: retq 471; 472; AVX1-LABEL: sext_8i16_to_4i64: 473; AVX1: # BB#0: # %entry 474; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 475; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 476; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 477; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 478; AVX1-NEXT: retq 479; 480; AVX2-LABEL: sext_8i16_to_4i64: 481; AVX2: # BB#0: # %entry 482; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 483; AVX2-NEXT: vpslld $16, %xmm0, %xmm0 484; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0 485; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 486; AVX2-NEXT: retq 487; 488; X32-SSE41-LABEL: sext_8i16_to_4i64: 489; X32-SSE41: # BB#0: # %entry 490; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm2 491; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 492; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm1 493; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 494; X32-SSE41-NEXT: retl 495entry: 496 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 497 %C = sext <4 x i16> %B to <4 x i64> 498 ret <4 x i64> %C 499} 500 501define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 502; SSE2-LABEL: sext_4i32_to_2i64: 503; SSE2: # BB#0: # %entry 504; SSE2-NEXT: movdqa %xmm0, %xmm1 505; SSE2-NEXT: psrad $31, %xmm1 506; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 507; SSE2-NEXT: retq 508; 509; SSSE3-LABEL: sext_4i32_to_2i64: 510; SSSE3: # BB#0: # %entry 511; SSSE3-NEXT: movdqa %xmm0, %xmm1 512; SSSE3-NEXT: psrad $31, %xmm1 513; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 514; SSSE3-NEXT: retq 515; 516; SSE41-LABEL: sext_4i32_to_2i64: 517; SSE41: # BB#0: # %entry 518; SSE41-NEXT: pmovsxdq %xmm0, %xmm0 519; SSE41-NEXT: retq 520; 521; AVX-LABEL: sext_4i32_to_2i64: 522; AVX: # BB#0: # %entry 523; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 524; AVX-NEXT: retq 525; 526; X32-SSE41-LABEL: sext_4i32_to_2i64: 527; X32-SSE41: # BB#0: # %entry 528; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm0 529; X32-SSE41-NEXT: retl 530entry: 531 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 532 %C = sext <2 x i32> %B to <2 x i64> 533 ret <2 x i64> %C 534} 535 536define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 537; SSE2-LABEL: sext_4i32_to_4i64: 538; SSE2: # BB#0: # %entry 539; SSE2-NEXT: movdqa %xmm0, %xmm2 540; SSE2-NEXT: psrad $31, %xmm2 541; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 542; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 543; SSE2-NEXT: movdqa %xmm1, %xmm2 544; SSE2-NEXT: psrad $31, %xmm2 545; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 546; SSE2-NEXT: retq 547; 548; SSSE3-LABEL: sext_4i32_to_4i64: 549; SSSE3: # BB#0: # %entry 550; SSSE3-NEXT: movdqa %xmm0, %xmm2 551; SSSE3-NEXT: psrad $31, %xmm2 552; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 553; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 554; SSSE3-NEXT: movdqa %xmm1, %xmm2 555; SSSE3-NEXT: psrad $31, %xmm2 556; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 557; SSSE3-NEXT: retq 558; 559; SSE41-LABEL: sext_4i32_to_4i64: 560; SSE41: # BB#0: # %entry 561; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 562; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 563; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 564; SSE41-NEXT: movdqa %xmm2, %xmm0 565; SSE41-NEXT: retq 566; 567; AVX1-LABEL: sext_4i32_to_4i64: 568; AVX1: # BB#0: # %entry 569; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 570; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 571; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 572; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 573; AVX1-NEXT: retq 574; 575; AVX2-LABEL: sext_4i32_to_4i64: 576; AVX2: # BB#0: # %entry 577; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 578; AVX2-NEXT: retq 579; 580; X32-SSE41-LABEL: sext_4i32_to_4i64: 581; X32-SSE41: # BB#0: # %entry 582; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 583; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 584; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 585; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 586; X32-SSE41-NEXT: retl 587entry: 588 %B = sext <4 x i32> %A to <4 x i64> 589 ret <4 x i64> %B 590} 591 592define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { 593; SSE-LABEL: load_sext_2i1_to_2i64: 594; SSE: # BB#0: # %entry 595; SSE-NEXT: movzbl (%rdi), %eax 596; SSE-NEXT: movq %rax, %rcx 597; SSE-NEXT: shlq $62, %rcx 598; SSE-NEXT: sarq $63, %rcx 599; SSE-NEXT: movd %rcx, %xmm1 600; SSE-NEXT: shlq $63, %rax 601; SSE-NEXT: sarq $63, %rax 602; SSE-NEXT: movd %rax, %xmm0 603; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 604; SSE-NEXT: retq 605; 606; AVX-LABEL: load_sext_2i1_to_2i64: 607; AVX: # BB#0: # %entry 608; AVX-NEXT: movzbl (%rdi), %eax 609; AVX-NEXT: movq %rax, %rcx 610; AVX-NEXT: shlq $62, %rcx 611; AVX-NEXT: sarq $63, %rcx 612; AVX-NEXT: vmovq %rcx, %xmm0 613; AVX-NEXT: shlq $63, %rax 614; AVX-NEXT: sarq $63, %rax 615; AVX-NEXT: vmovq %rax, %xmm1 616; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 617; AVX-NEXT: retq 618; 619; X32-SSE41-LABEL: load_sext_2i1_to_2i64: 620; X32-SSE41: # BB#0: # %entry 621; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 622; X32-SSE41-NEXT: movzbl (%eax), %eax 623; X32-SSE41-NEXT: movl %eax, %ecx 624; X32-SSE41-NEXT: shll $31, %ecx 625; X32-SSE41-NEXT: sarl $31, %ecx 626; X32-SSE41-NEXT: movd %ecx, %xmm0 627; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 628; X32-SSE41-NEXT: shll $30, %eax 629; X32-SSE41-NEXT: sarl $31, %eax 630; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0 631; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 632; X32-SSE41-NEXT: retl 633entry: 634 %X = load <2 x i1>, <2 x i1>* %ptr 635 %Y = sext <2 x i1> %X to <2 x i64> 636 ret <2 x i64> %Y 637} 638 639define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) { 640; SSE2-LABEL: load_sext_2i8_to_2i64: 641; SSE2: # BB#0: # %entry 642; SSE2-NEXT: movzwl (%rdi), %eax 643; SSE2-NEXT: movd %eax, %xmm0 644; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 645; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 646; SSE2-NEXT: movdqa %xmm0, %xmm1 647; SSE2-NEXT: psrad $31, %xmm1 648; SSE2-NEXT: psrad $24, %xmm0 649; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 650; SSE2-NEXT: retq 651; 652; SSSE3-LABEL: load_sext_2i8_to_2i64: 653; SSSE3: # BB#0: # %entry 654; SSSE3-NEXT: movzwl (%rdi), %eax 655; SSSE3-NEXT: movd %eax, %xmm0 656; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 657; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 658; SSSE3-NEXT: movdqa %xmm0, %xmm1 659; SSSE3-NEXT: psrad $31, %xmm1 660; SSSE3-NEXT: psrad $24, %xmm0 661; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 662; SSSE3-NEXT: retq 663; 664; SSE41-LABEL: load_sext_2i8_to_2i64: 665; SSE41: # BB#0: # %entry 666; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 667; SSE41-NEXT: retq 668; 669; AVX-LABEL: load_sext_2i8_to_2i64: 670; AVX: # BB#0: # %entry 671; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 672; AVX-NEXT: retq 673; 674; X32-SSE41-LABEL: load_sext_2i8_to_2i64: 675; X32-SSE41: # BB#0: # %entry 676; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 677; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 678; X32-SSE41-NEXT: retl 679entry: 680 %X = load <2 x i8>, <2 x i8>* %ptr 681 %Y = sext <2 x i8> %X to <2 x i64> 682 ret <2 x i64> %Y 683} 684 685define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { 686; SSE2-LABEL: load_sext_4i1_to_4i32: 687; SSE2: # BB#0: # %entry 688; SSE2-NEXT: movzbl (%rdi), %eax 689; SSE2-NEXT: movq %rax, %rcx 690; SSE2-NEXT: shlq $60, %rcx 691; SSE2-NEXT: sarq $63, %rcx 692; SSE2-NEXT: movd %ecx, %xmm0 693; SSE2-NEXT: movq %rax, %rcx 694; SSE2-NEXT: shlq $62, %rcx 695; SSE2-NEXT: sarq $63, %rcx 696; SSE2-NEXT: movd %ecx, %xmm1 697; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 698; SSE2-NEXT: movq %rax, %rcx 699; SSE2-NEXT: shlq $61, %rcx 700; SSE2-NEXT: sarq $63, %rcx 701; SSE2-NEXT: movd %ecx, %xmm2 702; SSE2-NEXT: shlq $63, %rax 703; SSE2-NEXT: sarq $63, %rax 704; SSE2-NEXT: movd %eax, %xmm0 705; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 706; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 707; SSE2-NEXT: retq 708; 709; SSSE3-LABEL: load_sext_4i1_to_4i32: 710; SSSE3: # BB#0: # %entry 711; SSSE3-NEXT: movzbl (%rdi), %eax 712; SSSE3-NEXT: movq %rax, %rcx 713; SSSE3-NEXT: shlq $60, %rcx 714; SSSE3-NEXT: sarq $63, %rcx 715; SSSE3-NEXT: movd %ecx, %xmm0 716; SSSE3-NEXT: movq %rax, %rcx 717; SSSE3-NEXT: shlq $62, %rcx 718; SSSE3-NEXT: sarq $63, %rcx 719; SSSE3-NEXT: movd %ecx, %xmm1 720; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 721; SSSE3-NEXT: movq %rax, %rcx 722; SSSE3-NEXT: shlq $61, %rcx 723; SSSE3-NEXT: sarq $63, %rcx 724; SSSE3-NEXT: movd %ecx, %xmm2 725; SSSE3-NEXT: shlq $63, %rax 726; SSSE3-NEXT: sarq $63, %rax 727; SSSE3-NEXT: movd %eax, %xmm0 728; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 729; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 730; SSSE3-NEXT: retq 731; 732; SSE41-LABEL: load_sext_4i1_to_4i32: 733; SSE41: # BB#0: # %entry 734; SSE41-NEXT: movzbl (%rdi), %eax 735; SSE41-NEXT: movq %rax, %rcx 736; SSE41-NEXT: shlq $62, %rcx 737; SSE41-NEXT: sarq $63, %rcx 738; SSE41-NEXT: movq %rax, %rdx 739; SSE41-NEXT: shlq $63, %rdx 740; SSE41-NEXT: sarq $63, %rdx 741; SSE41-NEXT: movd %edx, %xmm0 742; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 743; SSE41-NEXT: movq %rax, %rcx 744; SSE41-NEXT: shlq $61, %rcx 745; SSE41-NEXT: sarq $63, %rcx 746; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 747; SSE41-NEXT: shlq $60, %rax 748; SSE41-NEXT: sarq $63, %rax 749; SSE41-NEXT: pinsrd $3, %eax, %xmm0 750; SSE41-NEXT: retq 751; 752; AVX-LABEL: load_sext_4i1_to_4i32: 753; AVX: # BB#0: # %entry 754; AVX-NEXT: movzbl (%rdi), %eax 755; AVX-NEXT: movq %rax, %rcx 756; AVX-NEXT: shlq $62, %rcx 757; AVX-NEXT: sarq $63, %rcx 758; AVX-NEXT: movq %rax, %rdx 759; AVX-NEXT: shlq $63, %rdx 760; AVX-NEXT: sarq $63, %rdx 761; AVX-NEXT: vmovd %edx, %xmm0 762; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 763; AVX-NEXT: movq %rax, %rcx 764; AVX-NEXT: shlq $61, %rcx 765; AVX-NEXT: sarq $63, %rcx 766; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 767; AVX-NEXT: shlq $60, %rax 768; AVX-NEXT: sarq $63, %rax 769; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 770; AVX-NEXT: retq 771; 772; X32-SSE41-LABEL: load_sext_4i1_to_4i32: 773; X32-SSE41: # BB#0: # %entry 774; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 775; X32-SSE41-NEXT: movl (%eax), %eax 776; X32-SSE41-NEXT: movl %eax, %ecx 777; X32-SSE41-NEXT: shll $30, %ecx 778; X32-SSE41-NEXT: sarl $31, %ecx 779; X32-SSE41-NEXT: movl %eax, %edx 780; X32-SSE41-NEXT: shll $31, %edx 781; X32-SSE41-NEXT: sarl $31, %edx 782; X32-SSE41-NEXT: movd %edx, %xmm0 783; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 784; X32-SSE41-NEXT: movl %eax, %ecx 785; X32-SSE41-NEXT: shll $29, %ecx 786; X32-SSE41-NEXT: sarl $31, %ecx 787; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 788; X32-SSE41-NEXT: shll $28, %eax 789; X32-SSE41-NEXT: sarl $31, %eax 790; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 791; X32-SSE41-NEXT: retl 792entry: 793 %X = load <4 x i1>, <4 x i1>* %ptr 794 %Y = sext <4 x i1> %X to <4 x i32> 795 ret <4 x i32> %Y 796} 797 798define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) { 799; SSE2-LABEL: load_sext_4i8_to_4i32: 800; SSE2: # BB#0: # %entry 801; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 802; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 803; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 804; SSE2-NEXT: psrad $24, %xmm0 805; SSE2-NEXT: retq 806; 807; SSSE3-LABEL: load_sext_4i8_to_4i32: 808; SSSE3: # BB#0: # %entry 809; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 810; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 811; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 812; SSSE3-NEXT: psrad $24, %xmm0 813; SSSE3-NEXT: retq 814; 815; SSE41-LABEL: load_sext_4i8_to_4i32: 816; SSE41: # BB#0: # %entry 817; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 818; SSE41-NEXT: retq 819; 820; AVX-LABEL: load_sext_4i8_to_4i32: 821; AVX: # BB#0: # %entry 822; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 823; AVX-NEXT: retq 824; 825; X32-SSE41-LABEL: load_sext_4i8_to_4i32: 826; X32-SSE41: # BB#0: # %entry 827; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 828; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 829; X32-SSE41-NEXT: retl 830entry: 831 %X = load <4 x i8>, <4 x i8>* %ptr 832 %Y = sext <4 x i8> %X to <4 x i32> 833 ret <4 x i32> %Y 834} 835 836define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { 837; SSE2-LABEL: load_sext_4i1_to_4i64: 838; SSE2: # BB#0: # %entry 839; SSE2-NEXT: movzbl (%rdi), %eax 840; SSE2-NEXT: movl %eax, %ecx 841; SSE2-NEXT: shrl $3, %ecx 842; SSE2-NEXT: andl $1, %ecx 843; SSE2-NEXT: movd %ecx, %xmm0 844; SSE2-NEXT: movl %eax, %ecx 845; SSE2-NEXT: shrl %ecx 846; SSE2-NEXT: andl $1, %ecx 847; SSE2-NEXT: movd %ecx, %xmm1 848; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 849; SSE2-NEXT: movl %eax, %ecx 850; SSE2-NEXT: andl $1, %ecx 851; SSE2-NEXT: movd %ecx, %xmm2 852; SSE2-NEXT: shrl $2, %eax 853; SSE2-NEXT: andl $1, %eax 854; SSE2-NEXT: movd %eax, %xmm0 855; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 856; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 857; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 858; SSE2-NEXT: psllq $63, %xmm0 859; SSE2-NEXT: psrad $31, %xmm0 860; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 861; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] 862; SSE2-NEXT: psllq $63, %xmm1 863; SSE2-NEXT: psrad $31, %xmm1 864; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 865; SSE2-NEXT: retq 866; 867; SSSE3-LABEL: load_sext_4i1_to_4i64: 868; SSSE3: # BB#0: # %entry 869; SSSE3-NEXT: movzbl (%rdi), %eax 870; SSSE3-NEXT: movl %eax, %ecx 871; SSSE3-NEXT: shrl $3, %ecx 872; SSSE3-NEXT: andl $1, %ecx 873; SSSE3-NEXT: movd %ecx, %xmm0 874; SSSE3-NEXT: movl %eax, %ecx 875; SSSE3-NEXT: shrl %ecx 876; SSSE3-NEXT: andl $1, %ecx 877; SSSE3-NEXT: movd %ecx, %xmm1 878; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 879; SSSE3-NEXT: movl %eax, %ecx 880; SSSE3-NEXT: andl $1, %ecx 881; SSSE3-NEXT: movd %ecx, %xmm2 882; SSSE3-NEXT: shrl $2, %eax 883; SSSE3-NEXT: andl $1, %eax 884; SSSE3-NEXT: movd %eax, %xmm0 885; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 886; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 887; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 888; SSSE3-NEXT: psllq $63, %xmm0 889; SSSE3-NEXT: psrad $31, %xmm0 890; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 891; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] 892; SSSE3-NEXT: psllq $63, %xmm1 893; SSSE3-NEXT: psrad $31, %xmm1 894; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 895; SSSE3-NEXT: retq 896; 897; SSE41-LABEL: load_sext_4i1_to_4i64: 898; SSE41: # BB#0: # %entry 899; SSE41-NEXT: movzbl (%rdi), %eax 900; SSE41-NEXT: movl %eax, %ecx 901; SSE41-NEXT: shrl %ecx 902; SSE41-NEXT: andl $1, %ecx 903; SSE41-NEXT: movl %eax, %edx 904; SSE41-NEXT: andl $1, %edx 905; SSE41-NEXT: movd %edx, %xmm1 906; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 907; SSE41-NEXT: movl %eax, %ecx 908; SSE41-NEXT: shrl $2, %ecx 909; SSE41-NEXT: andl $1, %ecx 910; SSE41-NEXT: pinsrd $2, %ecx, %xmm1 911; SSE41-NEXT: shrl $3, %eax 912; SSE41-NEXT: andl $1, %eax 913; SSE41-NEXT: pinsrd $3, %eax, %xmm1 914; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 915; SSE41-NEXT: psllq $63, %xmm0 916; SSE41-NEXT: psrad $31, %xmm0 917; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 918; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 919; SSE41-NEXT: psllq $63, %xmm1 920; SSE41-NEXT: psrad $31, %xmm1 921; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 922; SSE41-NEXT: retq 923; 924; AVX1-LABEL: load_sext_4i1_to_4i64: 925; AVX1: # BB#0: # %entry 926; AVX1-NEXT: movzbl (%rdi), %eax 927; AVX1-NEXT: movq %rax, %rcx 928; AVX1-NEXT: shlq $62, %rcx 929; AVX1-NEXT: sarq $63, %rcx 930; AVX1-NEXT: movq %rax, %rdx 931; AVX1-NEXT: shlq $63, %rdx 932; AVX1-NEXT: sarq $63, %rdx 933; AVX1-NEXT: vmovd %edx, %xmm0 934; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 935; AVX1-NEXT: movq %rax, %rcx 936; AVX1-NEXT: shlq $61, %rcx 937; AVX1-NEXT: sarq $63, %rcx 938; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 939; AVX1-NEXT: shlq $60, %rax 940; AVX1-NEXT: sarq $63, %rax 941; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 942; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 943; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 944; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 945; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 946; AVX1-NEXT: retq 947; 948; AVX2-LABEL: load_sext_4i1_to_4i64: 949; AVX2: # BB#0: # %entry 950; AVX2-NEXT: movzbl (%rdi), %eax 951; AVX2-NEXT: movq %rax, %rcx 952; AVX2-NEXT: shlq $60, %rcx 953; AVX2-NEXT: sarq $63, %rcx 954; AVX2-NEXT: vmovq %rcx, %xmm0 955; AVX2-NEXT: movq %rax, %rcx 956; AVX2-NEXT: shlq $61, %rcx 957; AVX2-NEXT: sarq $63, %rcx 958; AVX2-NEXT: vmovq %rcx, %xmm1 959; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 960; AVX2-NEXT: movq %rax, %rcx 961; AVX2-NEXT: shlq $62, %rcx 962; AVX2-NEXT: sarq $63, %rcx 963; AVX2-NEXT: vmovq %rcx, %xmm1 964; AVX2-NEXT: shlq $63, %rax 965; AVX2-NEXT: sarq $63, %rax 966; AVX2-NEXT: vmovq %rax, %xmm2 967; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 968; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 969; AVX2-NEXT: retq 970; 971; X32-SSE41-LABEL: load_sext_4i1_to_4i64: 972; X32-SSE41: # BB#0: # %entry 973; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 974; X32-SSE41-NEXT: movzbl (%eax), %eax 975; X32-SSE41-NEXT: movl %eax, %ecx 976; X32-SSE41-NEXT: shrl %ecx 977; X32-SSE41-NEXT: andl $1, %ecx 978; X32-SSE41-NEXT: movl %eax, %edx 979; X32-SSE41-NEXT: andl $1, %edx 980; X32-SSE41-NEXT: movd %edx, %xmm1 981; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm1 982; X32-SSE41-NEXT: movl %eax, %ecx 983; X32-SSE41-NEXT: shrl $2, %ecx 984; X32-SSE41-NEXT: andl $1, %ecx 985; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm1 986; X32-SSE41-NEXT: shrl $3, %eax 987; X32-SSE41-NEXT: andl $1, %eax 988; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm1 989; X32-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 990; X32-SSE41-NEXT: psllq $63, %xmm0 991; X32-SSE41-NEXT: psrad $31, %xmm0 992; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 993; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 994; X32-SSE41-NEXT: psllq $63, %xmm1 995; X32-SSE41-NEXT: psrad $31, %xmm1 996; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 997; X32-SSE41-NEXT: retl 998entry: 999 %X = load <4 x i1>, <4 x i1>* %ptr 1000 %Y = sext <4 x i1> %X to <4 x i64> 1001 ret <4 x i64> %Y 1002} 1003 1004define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) { 1005; SSE2-LABEL: load_sext_4i8_to_4i64: 1006; SSE2: # BB#0: # %entry 1007; SSE2-NEXT: movsbq 1(%rdi), %rax 1008; SSE2-NEXT: movd %rax, %xmm1 1009; SSE2-NEXT: movsbq (%rdi), %rax 1010; SSE2-NEXT: movd %rax, %xmm0 1011; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1012; SSE2-NEXT: movsbq 3(%rdi), %rax 1013; SSE2-NEXT: movd %rax, %xmm2 1014; SSE2-NEXT: movsbq 2(%rdi), %rax 1015; SSE2-NEXT: movd %rax, %xmm1 1016; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1017; SSE2-NEXT: retq 1018; 1019; SSSE3-LABEL: load_sext_4i8_to_4i64: 1020; SSSE3: # BB#0: # %entry 1021; SSSE3-NEXT: movsbq 1(%rdi), %rax 1022; SSSE3-NEXT: movd %rax, %xmm1 1023; SSSE3-NEXT: movsbq (%rdi), %rax 1024; SSSE3-NEXT: movd %rax, %xmm0 1025; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1026; SSSE3-NEXT: movsbq 3(%rdi), %rax 1027; SSSE3-NEXT: movd %rax, %xmm2 1028; SSSE3-NEXT: movsbq 2(%rdi), %rax 1029; SSSE3-NEXT: movd %rax, %xmm1 1030; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1031; SSSE3-NEXT: retq 1032; 1033; SSE41-LABEL: load_sext_4i8_to_4i64: 1034; SSE41: # BB#0: # %entry 1035; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1036; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 1037; SSE41-NEXT: retq 1038; 1039; AVX1-LABEL: load_sext_4i8_to_4i64: 1040; AVX1: # BB#0: # %entry 1041; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0 1042; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1043; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1044; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1045; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1046; AVX1-NEXT: retq 1047; 1048; AVX2-LABEL: load_sext_4i8_to_4i64: 1049; AVX2: # BB#0: # %entry 1050; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 1051; AVX2-NEXT: retq 1052; 1053; X32-SSE41-LABEL: load_sext_4i8_to_4i64: 1054; X32-SSE41: # BB#0: # %entry 1055; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1056; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1057; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 1058; X32-SSE41-NEXT: retl 1059entry: 1060 %X = load <4 x i8>, <4 x i8>* %ptr 1061 %Y = sext <4 x i8> %X to <4 x i64> 1062 ret <4 x i64> %Y 1063} 1064 1065define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { 1066; SSE2-LABEL: load_sext_8i1_to_8i16: 1067; SSE2: # BB#0: # %entry 1068; SSE2-NEXT: movsbq (%rdi), %rax 1069; SSE2-NEXT: movq %rax, %rcx 1070; SSE2-NEXT: shrq $7, %rcx 1071; SSE2-NEXT: movd %ecx, %xmm0 1072; SSE2-NEXT: movq %rax, %rcx 1073; SSE2-NEXT: shlq $60, %rcx 1074; SSE2-NEXT: sarq $63, %rcx 1075; SSE2-NEXT: movd %ecx, %xmm2 1076; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1077; SSE2-NEXT: movq %rax, %rcx 1078; SSE2-NEXT: shlq $58, %rcx 1079; SSE2-NEXT: sarq $63, %rcx 1080; SSE2-NEXT: movd %ecx, %xmm0 1081; SSE2-NEXT: movq %rax, %rcx 1082; SSE2-NEXT: shlq $62, %rcx 1083; SSE2-NEXT: sarq $63, %rcx 1084; SSE2-NEXT: movd %ecx, %xmm1 1085; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1086; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1087; SSE2-NEXT: movq %rax, %rcx 1088; SSE2-NEXT: shlq $57, %rcx 1089; SSE2-NEXT: sarq $63, %rcx 1090; SSE2-NEXT: movd %ecx, %xmm0 1091; SSE2-NEXT: movq %rax, %rcx 1092; SSE2-NEXT: shlq $61, %rcx 1093; SSE2-NEXT: sarq $63, %rcx 1094; SSE2-NEXT: movd %ecx, %xmm2 1095; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1096; SSE2-NEXT: movq %rax, %rcx 1097; SSE2-NEXT: shlq $59, %rcx 1098; SSE2-NEXT: sarq $63, %rcx 1099; SSE2-NEXT: movd %ecx, %xmm3 1100; SSE2-NEXT: shlq $63, %rax 1101; SSE2-NEXT: sarq $63, %rax 1102; SSE2-NEXT: movd %eax, %xmm0 1103; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1104; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1105; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1106; SSE2-NEXT: retq 1107; 1108; SSSE3-LABEL: load_sext_8i1_to_8i16: 1109; SSSE3: # BB#0: # %entry 1110; SSSE3-NEXT: movsbq (%rdi), %rax 1111; SSSE3-NEXT: movq %rax, %rcx 1112; SSSE3-NEXT: shrq $7, %rcx 1113; SSSE3-NEXT: movd %ecx, %xmm0 1114; SSSE3-NEXT: movq %rax, %rcx 1115; SSSE3-NEXT: shlq $60, %rcx 1116; SSSE3-NEXT: sarq $63, %rcx 1117; SSSE3-NEXT: movd %ecx, %xmm2 1118; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1119; SSSE3-NEXT: movq %rax, %rcx 1120; SSSE3-NEXT: shlq $58, %rcx 1121; SSSE3-NEXT: sarq $63, %rcx 1122; SSSE3-NEXT: movd %ecx, %xmm0 1123; SSSE3-NEXT: movq %rax, %rcx 1124; SSSE3-NEXT: shlq $62, %rcx 1125; SSSE3-NEXT: sarq $63, %rcx 1126; SSSE3-NEXT: movd %ecx, %xmm1 1127; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1128; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1129; SSSE3-NEXT: movq %rax, %rcx 1130; SSSE3-NEXT: shlq $57, %rcx 1131; SSSE3-NEXT: sarq $63, %rcx 1132; SSSE3-NEXT: movd %ecx, %xmm0 1133; SSSE3-NEXT: movq %rax, %rcx 1134; SSSE3-NEXT: shlq $61, %rcx 1135; SSSE3-NEXT: sarq $63, %rcx 1136; SSSE3-NEXT: movd %ecx, %xmm2 1137; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1138; SSSE3-NEXT: movq %rax, %rcx 1139; SSSE3-NEXT: shlq $59, %rcx 1140; SSSE3-NEXT: sarq $63, %rcx 1141; SSSE3-NEXT: movd %ecx, %xmm3 1142; SSSE3-NEXT: shlq $63, %rax 1143; SSSE3-NEXT: sarq $63, %rax 1144; SSSE3-NEXT: movd %eax, %xmm0 1145; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1146; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1147; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1148; SSSE3-NEXT: retq 1149; 1150; SSE41-LABEL: load_sext_8i1_to_8i16: 1151; SSE41: # BB#0: # %entry 1152; SSE41-NEXT: movsbq (%rdi), %rax 1153; SSE41-NEXT: movq %rax, %rcx 1154; SSE41-NEXT: shlq $62, %rcx 1155; SSE41-NEXT: sarq $63, %rcx 1156; SSE41-NEXT: movq %rax, %rdx 1157; SSE41-NEXT: shlq $63, %rdx 1158; SSE41-NEXT: sarq $63, %rdx 1159; SSE41-NEXT: movd %edx, %xmm0 1160; SSE41-NEXT: pinsrw $1, %ecx, %xmm0 1161; SSE41-NEXT: movq %rax, %rcx 1162; SSE41-NEXT: shlq $61, %rcx 1163; SSE41-NEXT: sarq $63, %rcx 1164; SSE41-NEXT: pinsrw $2, %ecx, %xmm0 1165; SSE41-NEXT: movq %rax, %rcx 1166; SSE41-NEXT: shlq $60, %rcx 1167; SSE41-NEXT: sarq $63, %rcx 1168; SSE41-NEXT: pinsrw $3, %ecx, %xmm0 1169; SSE41-NEXT: movq %rax, %rcx 1170; SSE41-NEXT: shlq $59, %rcx 1171; SSE41-NEXT: sarq $63, %rcx 1172; SSE41-NEXT: pinsrw $4, %ecx, %xmm0 1173; SSE41-NEXT: movq %rax, %rcx 1174; SSE41-NEXT: shlq $58, %rcx 1175; SSE41-NEXT: sarq $63, %rcx 1176; SSE41-NEXT: pinsrw $5, %ecx, %xmm0 1177; SSE41-NEXT: movq %rax, %rcx 1178; SSE41-NEXT: shlq $57, %rcx 1179; SSE41-NEXT: sarq $63, %rcx 1180; SSE41-NEXT: pinsrw $6, %ecx, %xmm0 1181; SSE41-NEXT: shrq $7, %rax 1182; SSE41-NEXT: pinsrw $7, %eax, %xmm0 1183; SSE41-NEXT: retq 1184; 1185; AVX-LABEL: load_sext_8i1_to_8i16: 1186; AVX: # BB#0: # %entry 1187; AVX-NEXT: movsbq (%rdi), %rax 1188; AVX-NEXT: movq %rax, %rcx 1189; AVX-NEXT: shlq $62, %rcx 1190; AVX-NEXT: sarq $63, %rcx 1191; AVX-NEXT: movq %rax, %rdx 1192; AVX-NEXT: shlq $63, %rdx 1193; AVX-NEXT: sarq $63, %rdx 1194; AVX-NEXT: vmovd %edx, %xmm0 1195; AVX-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 1196; AVX-NEXT: movq %rax, %rcx 1197; AVX-NEXT: shlq $61, %rcx 1198; AVX-NEXT: sarq $63, %rcx 1199; AVX-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 1200; AVX-NEXT: movq %rax, %rcx 1201; AVX-NEXT: shlq $60, %rcx 1202; AVX-NEXT: sarq $63, %rcx 1203; AVX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 1204; AVX-NEXT: movq %rax, %rcx 1205; AVX-NEXT: shlq $59, %rcx 1206; AVX-NEXT: sarq $63, %rcx 1207; AVX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 1208; AVX-NEXT: movq %rax, %rcx 1209; AVX-NEXT: shlq $58, %rcx 1210; AVX-NEXT: sarq $63, %rcx 1211; AVX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 1212; AVX-NEXT: movq %rax, %rcx 1213; AVX-NEXT: shlq $57, %rcx 1214; AVX-NEXT: sarq $63, %rcx 1215; AVX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1216; AVX-NEXT: shrq $7, %rax 1217; AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1218; AVX-NEXT: retq 1219; 1220; X32-SSE41-LABEL: load_sext_8i1_to_8i16: 1221; X32-SSE41: # BB#0: # %entry 1222; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1223; X32-SSE41-NEXT: movsbl (%eax), %eax 1224; X32-SSE41-NEXT: movl %eax, %ecx 1225; X32-SSE41-NEXT: shll $30, %ecx 1226; X32-SSE41-NEXT: sarl $31, %ecx 1227; X32-SSE41-NEXT: movl %eax, %edx 1228; X32-SSE41-NEXT: shll $31, %edx 1229; X32-SSE41-NEXT: sarl $31, %edx 1230; X32-SSE41-NEXT: movd %edx, %xmm0 1231; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm0 1232; X32-SSE41-NEXT: movl %eax, %ecx 1233; X32-SSE41-NEXT: shll $29, %ecx 1234; X32-SSE41-NEXT: sarl $31, %ecx 1235; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm0 1236; X32-SSE41-NEXT: movl %eax, %ecx 1237; X32-SSE41-NEXT: shll $28, %ecx 1238; X32-SSE41-NEXT: sarl $31, %ecx 1239; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm0 1240; X32-SSE41-NEXT: movl %eax, %ecx 1241; X32-SSE41-NEXT: shll $27, %ecx 1242; X32-SSE41-NEXT: sarl $31, %ecx 1243; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm0 1244; X32-SSE41-NEXT: movl %eax, %ecx 1245; X32-SSE41-NEXT: shll $26, %ecx 1246; X32-SSE41-NEXT: sarl $31, %ecx 1247; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm0 1248; X32-SSE41-NEXT: movl %eax, %ecx 1249; X32-SSE41-NEXT: shll $25, %ecx 1250; X32-SSE41-NEXT: sarl $31, %ecx 1251; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm0 1252; X32-SSE41-NEXT: shrl $7, %eax 1253; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm0 1254; X32-SSE41-NEXT: retl 1255entry: 1256 %X = load <8 x i1>, <8 x i1>* %ptr 1257 %Y = sext <8 x i1> %X to <8 x i16> 1258 ret <8 x i16> %Y 1259} 1260 1261define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) { 1262; SSE2-LABEL: load_sext_8i8_to_8i16: 1263; SSE2: # BB#0: # %entry 1264; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1265; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1266; SSE2-NEXT: psraw $8, %xmm0 1267; SSE2-NEXT: retq 1268; 1269; SSSE3-LABEL: load_sext_8i8_to_8i16: 1270; SSSE3: # BB#0: # %entry 1271; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1272; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1273; SSSE3-NEXT: psraw $8, %xmm0 1274; SSSE3-NEXT: retq 1275; 1276; SSE41-LABEL: load_sext_8i8_to_8i16: 1277; SSE41: # BB#0: # %entry 1278; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 1279; SSE41-NEXT: retq 1280; 1281; AVX-LABEL: load_sext_8i8_to_8i16: 1282; AVX: # BB#0: # %entry 1283; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 1284; AVX-NEXT: retq 1285; 1286; X32-SSE41-LABEL: load_sext_8i8_to_8i16: 1287; X32-SSE41: # BB#0: # %entry 1288; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1289; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 1290; X32-SSE41-NEXT: retl 1291entry: 1292 %X = load <8 x i8>, <8 x i8>* %ptr 1293 %Y = sext <8 x i8> %X to <8 x i16> 1294 ret <8 x i16> %Y 1295} 1296 1297define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { 1298; SSE2-LABEL: load_sext_8i1_to_8i32: 1299; SSE2: # BB#0: # %entry 1300; SSE2-NEXT: movzbl (%rdi), %eax 1301; SSE2-NEXT: movl %eax, %ecx 1302; SSE2-NEXT: shrl $6, %ecx 1303; SSE2-NEXT: andl $1, %ecx 1304; SSE2-NEXT: movd %ecx, %xmm0 1305; SSE2-NEXT: movl %eax, %ecx 1306; SSE2-NEXT: shrl $2, %ecx 1307; SSE2-NEXT: andl $1, %ecx 1308; SSE2-NEXT: movd %ecx, %xmm2 1309; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1310; SSE2-NEXT: movl %eax, %ecx 1311; SSE2-NEXT: andl $1, %ecx 1312; SSE2-NEXT: movd %ecx, %xmm1 1313; SSE2-NEXT: movl %eax, %ecx 1314; SSE2-NEXT: shrl $4, %ecx 1315; SSE2-NEXT: andl $1, %ecx 1316; SSE2-NEXT: movd %ecx, %xmm0 1317; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1318; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1319; SSE2-NEXT: movl %eax, %ecx 1320; SSE2-NEXT: shrl $5, %ecx 1321; SSE2-NEXT: andl $1, %ecx 1322; SSE2-NEXT: movd %ecx, %xmm0 1323; SSE2-NEXT: movl %eax, %ecx 1324; SSE2-NEXT: shrl %ecx 1325; SSE2-NEXT: andl $1, %ecx 1326; SSE2-NEXT: movd %ecx, %xmm2 1327; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1328; SSE2-NEXT: movl %eax, %ecx 1329; SSE2-NEXT: shrl $3, %ecx 1330; SSE2-NEXT: andl $1, %ecx 1331; SSE2-NEXT: movd %ecx, %xmm0 1332; SSE2-NEXT: shrl $7, %eax 1333; SSE2-NEXT: movzwl %ax, %eax 1334; SSE2-NEXT: movd %eax, %xmm3 1335; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1336; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1337; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1338; SSE2-NEXT: movdqa %xmm1, %xmm0 1339; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1340; SSE2-NEXT: pslld $31, %xmm0 1341; SSE2-NEXT: psrad $31, %xmm0 1342; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1343; SSE2-NEXT: pslld $31, %xmm1 1344; SSE2-NEXT: psrad $31, %xmm1 1345; SSE2-NEXT: retq 1346; 1347; SSSE3-LABEL: load_sext_8i1_to_8i32: 1348; SSSE3: # BB#0: # %entry 1349; SSSE3-NEXT: movzbl (%rdi), %eax 1350; SSSE3-NEXT: movl %eax, %ecx 1351; SSSE3-NEXT: shrl $6, %ecx 1352; SSSE3-NEXT: andl $1, %ecx 1353; SSSE3-NEXT: movd %ecx, %xmm0 1354; SSSE3-NEXT: movl %eax, %ecx 1355; SSSE3-NEXT: shrl $2, %ecx 1356; SSSE3-NEXT: andl $1, %ecx 1357; SSSE3-NEXT: movd %ecx, %xmm2 1358; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1359; SSSE3-NEXT: movl %eax, %ecx 1360; SSSE3-NEXT: andl $1, %ecx 1361; SSSE3-NEXT: movd %ecx, %xmm1 1362; SSSE3-NEXT: movl %eax, %ecx 1363; SSSE3-NEXT: shrl $4, %ecx 1364; SSSE3-NEXT: andl $1, %ecx 1365; SSSE3-NEXT: movd %ecx, %xmm0 1366; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1367; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1368; SSSE3-NEXT: movl %eax, %ecx 1369; SSSE3-NEXT: shrl $5, %ecx 1370; SSSE3-NEXT: andl $1, %ecx 1371; SSSE3-NEXT: movd %ecx, %xmm0 1372; SSSE3-NEXT: movl %eax, %ecx 1373; SSSE3-NEXT: shrl %ecx 1374; SSSE3-NEXT: andl $1, %ecx 1375; SSSE3-NEXT: movd %ecx, %xmm2 1376; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1377; SSSE3-NEXT: movl %eax, %ecx 1378; SSSE3-NEXT: shrl $3, %ecx 1379; SSSE3-NEXT: andl $1, %ecx 1380; SSSE3-NEXT: movd %ecx, %xmm0 1381; SSSE3-NEXT: shrl $7, %eax 1382; SSSE3-NEXT: movzwl %ax, %eax 1383; SSSE3-NEXT: movd %eax, %xmm3 1384; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1385; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1386; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1387; SSSE3-NEXT: movdqa %xmm1, %xmm0 1388; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1389; SSSE3-NEXT: pslld $31, %xmm0 1390; SSSE3-NEXT: psrad $31, %xmm0 1391; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1392; SSSE3-NEXT: pslld $31, %xmm1 1393; SSSE3-NEXT: psrad $31, %xmm1 1394; SSSE3-NEXT: retq 1395; 1396; SSE41-LABEL: load_sext_8i1_to_8i32: 1397; SSE41: # BB#0: # %entry 1398; SSE41-NEXT: movzbl (%rdi), %eax 1399; SSE41-NEXT: movl %eax, %ecx 1400; SSE41-NEXT: shrl %ecx 1401; SSE41-NEXT: andl $1, %ecx 1402; SSE41-NEXT: movl %eax, %edx 1403; SSE41-NEXT: andl $1, %edx 1404; SSE41-NEXT: movd %edx, %xmm1 1405; SSE41-NEXT: pinsrw $1, %ecx, %xmm1 1406; SSE41-NEXT: movl %eax, %ecx 1407; SSE41-NEXT: shrl $2, %ecx 1408; SSE41-NEXT: andl $1, %ecx 1409; SSE41-NEXT: pinsrw $2, %ecx, %xmm1 1410; SSE41-NEXT: movl %eax, %ecx 1411; SSE41-NEXT: shrl $3, %ecx 1412; SSE41-NEXT: andl $1, %ecx 1413; SSE41-NEXT: pinsrw $3, %ecx, %xmm1 1414; SSE41-NEXT: movl %eax, %ecx 1415; SSE41-NEXT: shrl $4, %ecx 1416; SSE41-NEXT: andl $1, %ecx 1417; SSE41-NEXT: pinsrw $4, %ecx, %xmm1 1418; SSE41-NEXT: movl %eax, %ecx 1419; SSE41-NEXT: shrl $5, %ecx 1420; SSE41-NEXT: andl $1, %ecx 1421; SSE41-NEXT: pinsrw $5, %ecx, %xmm1 1422; SSE41-NEXT: movl %eax, %ecx 1423; SSE41-NEXT: shrl $6, %ecx 1424; SSE41-NEXT: andl $1, %ecx 1425; SSE41-NEXT: pinsrw $6, %ecx, %xmm1 1426; SSE41-NEXT: shrl $7, %eax 1427; SSE41-NEXT: movzwl %ax, %eax 1428; SSE41-NEXT: pinsrw $7, %eax, %xmm1 1429; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1430; SSE41-NEXT: pslld $31, %xmm0 1431; SSE41-NEXT: psrad $31, %xmm0 1432; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1433; SSE41-NEXT: pslld $31, %xmm1 1434; SSE41-NEXT: psrad $31, %xmm1 1435; SSE41-NEXT: retq 1436; 1437; AVX1-LABEL: load_sext_8i1_to_8i32: 1438; AVX1: # BB#0: # %entry 1439; AVX1-NEXT: movsbq (%rdi), %rax 1440; AVX1-NEXT: movq %rax, %rcx 1441; AVX1-NEXT: shlq $58, %rcx 1442; AVX1-NEXT: sarq $63, %rcx 1443; AVX1-NEXT: movq %rax, %rdx 1444; AVX1-NEXT: shlq $59, %rdx 1445; AVX1-NEXT: sarq $63, %rdx 1446; AVX1-NEXT: vmovd %edx, %xmm0 1447; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1448; AVX1-NEXT: movq %rax, %rcx 1449; AVX1-NEXT: shlq $57, %rcx 1450; AVX1-NEXT: sarq $63, %rcx 1451; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1452; AVX1-NEXT: movq %rax, %rcx 1453; AVX1-NEXT: shrq $7, %rcx 1454; AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 1455; AVX1-NEXT: movq %rax, %rcx 1456; AVX1-NEXT: shlq $62, %rcx 1457; AVX1-NEXT: sarq $63, %rcx 1458; AVX1-NEXT: movq %rax, %rdx 1459; AVX1-NEXT: shlq $63, %rdx 1460; AVX1-NEXT: sarq $63, %rdx 1461; AVX1-NEXT: vmovd %edx, %xmm1 1462; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 1463; AVX1-NEXT: movq %rax, %rcx 1464; AVX1-NEXT: shlq $61, %rcx 1465; AVX1-NEXT: sarq $63, %rcx 1466; AVX1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 1467; AVX1-NEXT: shlq $60, %rax 1468; AVX1-NEXT: sarq $63, %rax 1469; AVX1-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 1470; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1471; AVX1-NEXT: retq 1472; 1473; AVX2-LABEL: load_sext_8i1_to_8i32: 1474; AVX2: # BB#0: # %entry 1475; AVX2-NEXT: movsbq (%rdi), %rax 1476; AVX2-NEXT: movq %rax, %rcx 1477; AVX2-NEXT: shlq $58, %rcx 1478; AVX2-NEXT: sarq $63, %rcx 1479; AVX2-NEXT: movq %rax, %rdx 1480; AVX2-NEXT: shlq $59, %rdx 1481; AVX2-NEXT: sarq $63, %rdx 1482; AVX2-NEXT: vmovd %edx, %xmm0 1483; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1484; AVX2-NEXT: movq %rax, %rcx 1485; AVX2-NEXT: shlq $57, %rcx 1486; AVX2-NEXT: sarq $63, %rcx 1487; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1488; AVX2-NEXT: movq %rax, %rcx 1489; AVX2-NEXT: shrq $7, %rcx 1490; AVX2-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 1491; AVX2-NEXT: movq %rax, %rcx 1492; AVX2-NEXT: shlq $62, %rcx 1493; AVX2-NEXT: sarq $63, %rcx 1494; AVX2-NEXT: movq %rax, %rdx 1495; AVX2-NEXT: shlq $63, %rdx 1496; AVX2-NEXT: sarq $63, %rdx 1497; AVX2-NEXT: vmovd %edx, %xmm1 1498; AVX2-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 1499; AVX2-NEXT: movq %rax, %rcx 1500; AVX2-NEXT: shlq $61, %rcx 1501; AVX2-NEXT: sarq $63, %rcx 1502; AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 1503; AVX2-NEXT: shlq $60, %rax 1504; AVX2-NEXT: sarq $63, %rax 1505; AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 1506; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1507; AVX2-NEXT: retq 1508; 1509; X32-SSE41-LABEL: load_sext_8i1_to_8i32: 1510; X32-SSE41: # BB#0: # %entry 1511; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1512; X32-SSE41-NEXT: movzbl (%eax), %eax 1513; X32-SSE41-NEXT: movl %eax, %ecx 1514; X32-SSE41-NEXT: shrl %ecx 1515; X32-SSE41-NEXT: andl $1, %ecx 1516; X32-SSE41-NEXT: movl %eax, %edx 1517; X32-SSE41-NEXT: andl $1, %edx 1518; X32-SSE41-NEXT: movd %edx, %xmm1 1519; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm1 1520; X32-SSE41-NEXT: movl %eax, %ecx 1521; X32-SSE41-NEXT: shrl $2, %ecx 1522; X32-SSE41-NEXT: andl $1, %ecx 1523; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm1 1524; X32-SSE41-NEXT: movl %eax, %ecx 1525; X32-SSE41-NEXT: shrl $3, %ecx 1526; X32-SSE41-NEXT: andl $1, %ecx 1527; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm1 1528; X32-SSE41-NEXT: movl %eax, %ecx 1529; X32-SSE41-NEXT: shrl $4, %ecx 1530; X32-SSE41-NEXT: andl $1, %ecx 1531; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm1 1532; X32-SSE41-NEXT: movl %eax, %ecx 1533; X32-SSE41-NEXT: shrl $5, %ecx 1534; X32-SSE41-NEXT: andl $1, %ecx 1535; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm1 1536; X32-SSE41-NEXT: movl %eax, %ecx 1537; X32-SSE41-NEXT: shrl $6, %ecx 1538; X32-SSE41-NEXT: andl $1, %ecx 1539; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm1 1540; X32-SSE41-NEXT: shrl $7, %eax 1541; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm1 1542; X32-SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1543; X32-SSE41-NEXT: pslld $31, %xmm0 1544; X32-SSE41-NEXT: psrad $31, %xmm0 1545; X32-SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1546; X32-SSE41-NEXT: pslld $31, %xmm1 1547; X32-SSE41-NEXT: psrad $31, %xmm1 1548; X32-SSE41-NEXT: retl 1549entry: 1550 %X = load <8 x i1>, <8 x i1>* %ptr 1551 %Y = sext <8 x i1> %X to <8 x i32> 1552 ret <8 x i32> %Y 1553} 1554 1555define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { 1556; SSE2-LABEL: load_sext_8i8_to_8i32: 1557; SSE2: # BB#0: # %entry 1558; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1559; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1560; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1561; SSE2-NEXT: psrad $24, %xmm0 1562; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1563; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1564; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 1565; SSE2-NEXT: psrad $24, %xmm1 1566; SSE2-NEXT: retq 1567; 1568; SSSE3-LABEL: load_sext_8i8_to_8i32: 1569; SSSE3: # BB#0: # %entry 1570; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1571; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1572; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1573; SSSE3-NEXT: psrad $24, %xmm0 1574; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1575; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1576; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 1577; SSSE3-NEXT: psrad $24, %xmm1 1578; SSSE3-NEXT: retq 1579; 1580; SSE41-LABEL: load_sext_8i8_to_8i32: 1581; SSE41: # BB#0: # %entry 1582; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 1583; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 1584; SSE41-NEXT: retq 1585; 1586; AVX1-LABEL: load_sext_8i8_to_8i32: 1587; AVX1: # BB#0: # %entry 1588; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0 1589; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 1590; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1591; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 1592; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1593; AVX1-NEXT: retq 1594; 1595; AVX2-LABEL: load_sext_8i8_to_8i32: 1596; AVX2: # BB#0: # %entry 1597; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 1598; AVX2-NEXT: retq 1599; 1600; X32-SSE41-LABEL: load_sext_8i8_to_8i32: 1601; X32-SSE41: # BB#0: # %entry 1602; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1603; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 1604; X32-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1 1605; X32-SSE41-NEXT: retl 1606entry: 1607 %X = load <8 x i8>, <8 x i8>* %ptr 1608 %Y = sext <8 x i8> %X to <8 x i32> 1609 ret <8 x i32> %Y 1610} 1611 1612define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { 1613; SSE2-LABEL: load_sext_16i1_to_16i8: 1614; SSE2: # BB#0: # %entry 1615; SSE2-NEXT: pushq %rbp 1616; SSE2-NEXT: pushq %r15 1617; SSE2-NEXT: pushq %r14 1618; SSE2-NEXT: pushq %r13 1619; SSE2-NEXT: pushq %r12 1620; SSE2-NEXT: pushq %rbx 1621; SSE2-NEXT: movswq (%rdi), %rax 1622; SSE2-NEXT: movq %rax, %r8 1623; SSE2-NEXT: movq %rax, %r9 1624; SSE2-NEXT: movq %rax, %r10 1625; SSE2-NEXT: movq %rax, %r11 1626; SSE2-NEXT: movq %rax, %r14 1627; SSE2-NEXT: movq %rax, %r15 1628; SSE2-NEXT: movq %rax, %r12 1629; SSE2-NEXT: movq %rax, %r13 1630; SSE2-NEXT: movq %rax, %rbx 1631; SSE2-NEXT: movq %rax, %rcx 1632; SSE2-NEXT: movq %rax, %rdx 1633; SSE2-NEXT: movq %rax, %rsi 1634; SSE2-NEXT: movq %rax, %rdi 1635; SSE2-NEXT: movq %rax, %rbp 1636; SSE2-NEXT: shlq $49, %rbp 1637; SSE2-NEXT: sarq $63, %rbp 1638; SSE2-NEXT: movd %ebp, %xmm0 1639; SSE2-NEXT: movq %rax, %rbp 1640; SSE2-NEXT: movsbq %al, %rax 1641; SSE2-NEXT: shlq $57, %r8 1642; SSE2-NEXT: sarq $63, %r8 1643; SSE2-NEXT: movd %r8d, %xmm1 1644; SSE2-NEXT: shlq $53, %r9 1645; SSE2-NEXT: sarq $63, %r9 1646; SSE2-NEXT: movd %r9d, %xmm2 1647; SSE2-NEXT: shlq $61, %r10 1648; SSE2-NEXT: sarq $63, %r10 1649; SSE2-NEXT: movd %r10d, %xmm3 1650; SSE2-NEXT: shlq $51, %r11 1651; SSE2-NEXT: sarq $63, %r11 1652; SSE2-NEXT: movd %r11d, %xmm4 1653; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1654; SSE2-NEXT: shlq $59, %r14 1655; SSE2-NEXT: sarq $63, %r14 1656; SSE2-NEXT: movd %r14d, %xmm5 1657; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1658; SSE2-NEXT: shlq $55, %r15 1659; SSE2-NEXT: sarq $63, %r15 1660; SSE2-NEXT: movd %r15d, %xmm2 1661; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 1662; SSE2-NEXT: shlq $63, %r12 1663; SSE2-NEXT: sarq $63, %r12 1664; SSE2-NEXT: movd %r12d, %xmm0 1665; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 1666; SSE2-NEXT: shlq $50, %r13 1667; SSE2-NEXT: sarq $63, %r13 1668; SSE2-NEXT: movd %r13d, %xmm1 1669; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1670; SSE2-NEXT: shlq $58, %rbx 1671; SSE2-NEXT: sarq $63, %rbx 1672; SSE2-NEXT: movd %ebx, %xmm2 1673; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 1674; SSE2-NEXT: shlq $54, %rcx 1675; SSE2-NEXT: sarq $63, %rcx 1676; SSE2-NEXT: movd %ecx, %xmm4 1677; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 1678; SSE2-NEXT: shlq $62, %rdx 1679; SSE2-NEXT: sarq $63, %rdx 1680; SSE2-NEXT: movd %edx, %xmm3 1681; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 1682; SSE2-NEXT: shlq $52, %rsi 1683; SSE2-NEXT: sarq $63, %rsi 1684; SSE2-NEXT: movd %esi, %xmm1 1685; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1686; SSE2-NEXT: shlq $60, %rdi 1687; SSE2-NEXT: sarq $63, %rdi 1688; SSE2-NEXT: movd %edi, %xmm4 1689; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1690; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 1691; SSE2-NEXT: shrq $15, %rbp 1692; SSE2-NEXT: movd %ebp, %xmm1 1693; SSE2-NEXT: shrq $7, %rax 1694; SSE2-NEXT: movd %eax, %xmm2 1695; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 1696; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 1697; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1698; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 1699; SSE2-NEXT: popq %rbx 1700; SSE2-NEXT: popq %r12 1701; SSE2-NEXT: popq %r13 1702; SSE2-NEXT: popq %r14 1703; SSE2-NEXT: popq %r15 1704; SSE2-NEXT: popq %rbp 1705; SSE2-NEXT: retq 1706; 1707; SSSE3-LABEL: load_sext_16i1_to_16i8: 1708; SSSE3: # BB#0: # %entry 1709; SSSE3-NEXT: pushq %rbp 1710; SSSE3-NEXT: pushq %r15 1711; SSSE3-NEXT: pushq %r14 1712; SSSE3-NEXT: pushq %r13 1713; SSSE3-NEXT: pushq %r12 1714; SSSE3-NEXT: pushq %rbx 1715; SSSE3-NEXT: movswq (%rdi), %rax 1716; SSSE3-NEXT: movq %rax, %r8 1717; SSSE3-NEXT: movq %rax, %r9 1718; SSSE3-NEXT: movq %rax, %r10 1719; SSSE3-NEXT: movq %rax, %r11 1720; SSSE3-NEXT: movq %rax, %r14 1721; SSSE3-NEXT: movq %rax, %r15 1722; SSSE3-NEXT: movq %rax, %r12 1723; SSSE3-NEXT: movq %rax, %r13 1724; SSSE3-NEXT: movq %rax, %rbx 1725; SSSE3-NEXT: movq %rax, %rcx 1726; SSSE3-NEXT: movq %rax, %rdx 1727; SSSE3-NEXT: movq %rax, %rsi 1728; SSSE3-NEXT: movq %rax, %rdi 1729; SSSE3-NEXT: movq %rax, %rbp 1730; SSSE3-NEXT: shlq $49, %rbp 1731; SSSE3-NEXT: sarq $63, %rbp 1732; SSSE3-NEXT: movd %ebp, %xmm0 1733; SSSE3-NEXT: movq %rax, %rbp 1734; SSSE3-NEXT: movsbq %al, %rax 1735; SSSE3-NEXT: shlq $57, %r8 1736; SSSE3-NEXT: sarq $63, %r8 1737; SSSE3-NEXT: movd %r8d, %xmm1 1738; SSSE3-NEXT: shlq $53, %r9 1739; SSSE3-NEXT: sarq $63, %r9 1740; SSSE3-NEXT: movd %r9d, %xmm2 1741; SSSE3-NEXT: shlq $61, %r10 1742; SSSE3-NEXT: sarq $63, %r10 1743; SSSE3-NEXT: movd %r10d, %xmm3 1744; SSSE3-NEXT: shlq $51, %r11 1745; SSSE3-NEXT: sarq $63, %r11 1746; SSSE3-NEXT: movd %r11d, %xmm4 1747; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1748; SSSE3-NEXT: shlq $59, %r14 1749; SSSE3-NEXT: sarq $63, %r14 1750; SSSE3-NEXT: movd %r14d, %xmm5 1751; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1752; SSSE3-NEXT: shlq $55, %r15 1753; SSSE3-NEXT: sarq $63, %r15 1754; SSSE3-NEXT: movd %r15d, %xmm2 1755; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 1756; SSSE3-NEXT: shlq $63, %r12 1757; SSSE3-NEXT: sarq $63, %r12 1758; SSSE3-NEXT: movd %r12d, %xmm0 1759; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 1760; SSSE3-NEXT: shlq $50, %r13 1761; SSSE3-NEXT: sarq $63, %r13 1762; SSSE3-NEXT: movd %r13d, %xmm1 1763; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1764; SSSE3-NEXT: shlq $58, %rbx 1765; SSSE3-NEXT: sarq $63, %rbx 1766; SSSE3-NEXT: movd %ebx, %xmm2 1767; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 1768; SSSE3-NEXT: shlq $54, %rcx 1769; SSSE3-NEXT: sarq $63, %rcx 1770; SSSE3-NEXT: movd %ecx, %xmm4 1771; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 1772; SSSE3-NEXT: shlq $62, %rdx 1773; SSSE3-NEXT: sarq $63, %rdx 1774; SSSE3-NEXT: movd %edx, %xmm3 1775; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 1776; SSSE3-NEXT: shlq $52, %rsi 1777; SSSE3-NEXT: sarq $63, %rsi 1778; SSSE3-NEXT: movd %esi, %xmm1 1779; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1780; SSSE3-NEXT: shlq $60, %rdi 1781; SSSE3-NEXT: sarq $63, %rdi 1782; SSSE3-NEXT: movd %edi, %xmm4 1783; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1784; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 1785; SSSE3-NEXT: shrq $15, %rbp 1786; SSSE3-NEXT: movd %ebp, %xmm1 1787; SSSE3-NEXT: shrq $7, %rax 1788; SSSE3-NEXT: movd %eax, %xmm2 1789; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 1790; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 1791; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1792; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 1793; SSSE3-NEXT: popq %rbx 1794; SSSE3-NEXT: popq %r12 1795; SSSE3-NEXT: popq %r13 1796; SSSE3-NEXT: popq %r14 1797; SSSE3-NEXT: popq %r15 1798; SSSE3-NEXT: popq %rbp 1799; SSSE3-NEXT: retq 1800; 1801; SSE41-LABEL: load_sext_16i1_to_16i8: 1802; SSE41: # BB#0: # %entry 1803; SSE41-NEXT: movswq (%rdi), %rax 1804; SSE41-NEXT: movq %rax, %rcx 1805; SSE41-NEXT: shlq $62, %rcx 1806; SSE41-NEXT: sarq $63, %rcx 1807; SSE41-NEXT: movq %rax, %rdx 1808; SSE41-NEXT: shlq $63, %rdx 1809; SSE41-NEXT: sarq $63, %rdx 1810; SSE41-NEXT: movd %edx, %xmm0 1811; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 1812; SSE41-NEXT: movq %rax, %rcx 1813; SSE41-NEXT: shlq $61, %rcx 1814; SSE41-NEXT: sarq $63, %rcx 1815; SSE41-NEXT: pinsrb $2, %ecx, %xmm0 1816; SSE41-NEXT: movq %rax, %rcx 1817; SSE41-NEXT: shlq $60, %rcx 1818; SSE41-NEXT: sarq $63, %rcx 1819; SSE41-NEXT: pinsrb $3, %ecx, %xmm0 1820; SSE41-NEXT: movq %rax, %rcx 1821; SSE41-NEXT: shlq $59, %rcx 1822; SSE41-NEXT: sarq $63, %rcx 1823; SSE41-NEXT: pinsrb $4, %ecx, %xmm0 1824; SSE41-NEXT: movq %rax, %rcx 1825; SSE41-NEXT: shlq $58, %rcx 1826; SSE41-NEXT: sarq $63, %rcx 1827; SSE41-NEXT: pinsrb $5, %ecx, %xmm0 1828; SSE41-NEXT: movq %rax, %rcx 1829; SSE41-NEXT: shlq $57, %rcx 1830; SSE41-NEXT: sarq $63, %rcx 1831; SSE41-NEXT: pinsrb $6, %ecx, %xmm0 1832; SSE41-NEXT: movsbq %al, %rcx 1833; SSE41-NEXT: shrq $7, %rcx 1834; SSE41-NEXT: pinsrb $7, %ecx, %xmm0 1835; SSE41-NEXT: movq %rax, %rcx 1836; SSE41-NEXT: shlq $55, %rcx 1837; SSE41-NEXT: sarq $63, %rcx 1838; SSE41-NEXT: pinsrb $8, %ecx, %xmm0 1839; SSE41-NEXT: movq %rax, %rcx 1840; SSE41-NEXT: shlq $54, %rcx 1841; SSE41-NEXT: sarq $63, %rcx 1842; SSE41-NEXT: pinsrb $9, %ecx, %xmm0 1843; SSE41-NEXT: movq %rax, %rcx 1844; SSE41-NEXT: shlq $53, %rcx 1845; SSE41-NEXT: sarq $63, %rcx 1846; SSE41-NEXT: pinsrb $10, %ecx, %xmm0 1847; SSE41-NEXT: movq %rax, %rcx 1848; SSE41-NEXT: shlq $52, %rcx 1849; SSE41-NEXT: sarq $63, %rcx 1850; SSE41-NEXT: pinsrb $11, %ecx, %xmm0 1851; SSE41-NEXT: movq %rax, %rcx 1852; SSE41-NEXT: shlq $51, %rcx 1853; SSE41-NEXT: sarq $63, %rcx 1854; SSE41-NEXT: pinsrb $12, %ecx, %xmm0 1855; SSE41-NEXT: movq %rax, %rcx 1856; SSE41-NEXT: shlq $50, %rcx 1857; SSE41-NEXT: sarq $63, %rcx 1858; SSE41-NEXT: pinsrb $13, %ecx, %xmm0 1859; SSE41-NEXT: movq %rax, %rcx 1860; SSE41-NEXT: shlq $49, %rcx 1861; SSE41-NEXT: sarq $63, %rcx 1862; SSE41-NEXT: pinsrb $14, %ecx, %xmm0 1863; SSE41-NEXT: shrq $15, %rax 1864; SSE41-NEXT: pinsrb $15, %eax, %xmm0 1865; SSE41-NEXT: retq 1866; 1867; AVX-LABEL: load_sext_16i1_to_16i8: 1868; AVX: # BB#0: # %entry 1869; AVX-NEXT: movswq (%rdi), %rax 1870; AVX-NEXT: movq %rax, %rcx 1871; AVX-NEXT: shlq $62, %rcx 1872; AVX-NEXT: sarq $63, %rcx 1873; AVX-NEXT: movq %rax, %rdx 1874; AVX-NEXT: shlq $63, %rdx 1875; AVX-NEXT: sarq $63, %rdx 1876; AVX-NEXT: vmovd %edx, %xmm0 1877; AVX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 1878; AVX-NEXT: movq %rax, %rcx 1879; AVX-NEXT: shlq $61, %rcx 1880; AVX-NEXT: sarq $63, %rcx 1881; AVX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 1882; AVX-NEXT: movq %rax, %rcx 1883; AVX-NEXT: shlq $60, %rcx 1884; AVX-NEXT: sarq $63, %rcx 1885; AVX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 1886; AVX-NEXT: movq %rax, %rcx 1887; AVX-NEXT: shlq $59, %rcx 1888; AVX-NEXT: sarq $63, %rcx 1889; AVX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 1890; AVX-NEXT: movq %rax, %rcx 1891; AVX-NEXT: shlq $58, %rcx 1892; AVX-NEXT: sarq $63, %rcx 1893; AVX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 1894; AVX-NEXT: movq %rax, %rcx 1895; AVX-NEXT: shlq $57, %rcx 1896; AVX-NEXT: sarq $63, %rcx 1897; AVX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 1898; AVX-NEXT: movsbq %al, %rcx 1899; AVX-NEXT: shrq $7, %rcx 1900; AVX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 1901; AVX-NEXT: movq %rax, %rcx 1902; AVX-NEXT: shlq $55, %rcx 1903; AVX-NEXT: sarq $63, %rcx 1904; AVX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 1905; AVX-NEXT: movq %rax, %rcx 1906; AVX-NEXT: shlq $54, %rcx 1907; AVX-NEXT: sarq $63, %rcx 1908; AVX-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1909; AVX-NEXT: movq %rax, %rcx 1910; AVX-NEXT: shlq $53, %rcx 1911; AVX-NEXT: sarq $63, %rcx 1912; AVX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1913; AVX-NEXT: movq %rax, %rcx 1914; AVX-NEXT: shlq $52, %rcx 1915; AVX-NEXT: sarq $63, %rcx 1916; AVX-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1917; AVX-NEXT: movq %rax, %rcx 1918; AVX-NEXT: shlq $51, %rcx 1919; AVX-NEXT: sarq $63, %rcx 1920; AVX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1921; AVX-NEXT: movq %rax, %rcx 1922; AVX-NEXT: shlq $50, %rcx 1923; AVX-NEXT: sarq $63, %rcx 1924; AVX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1925; AVX-NEXT: movq %rax, %rcx 1926; AVX-NEXT: shlq $49, %rcx 1927; AVX-NEXT: sarq $63, %rcx 1928; AVX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1929; AVX-NEXT: shrq $15, %rax 1930; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1931; AVX-NEXT: retq 1932; 1933; X32-SSE41-LABEL: load_sext_16i1_to_16i8: 1934; X32-SSE41: # BB#0: # %entry 1935; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1936; X32-SSE41-NEXT: movswl (%eax), %eax 1937; X32-SSE41-NEXT: movl %eax, %ecx 1938; X32-SSE41-NEXT: shll $30, %ecx 1939; X32-SSE41-NEXT: sarl $31, %ecx 1940; X32-SSE41-NEXT: movl %eax, %edx 1941; X32-SSE41-NEXT: shll $31, %edx 1942; X32-SSE41-NEXT: sarl $31, %edx 1943; X32-SSE41-NEXT: movd %edx, %xmm0 1944; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm0 1945; X32-SSE41-NEXT: movl %eax, %ecx 1946; X32-SSE41-NEXT: shll $29, %ecx 1947; X32-SSE41-NEXT: sarl $31, %ecx 1948; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm0 1949; X32-SSE41-NEXT: movl %eax, %ecx 1950; X32-SSE41-NEXT: shll $28, %ecx 1951; X32-SSE41-NEXT: sarl $31, %ecx 1952; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm0 1953; X32-SSE41-NEXT: movl %eax, %ecx 1954; X32-SSE41-NEXT: shll $27, %ecx 1955; X32-SSE41-NEXT: sarl $31, %ecx 1956; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm0 1957; X32-SSE41-NEXT: movl %eax, %ecx 1958; X32-SSE41-NEXT: shll $26, %ecx 1959; X32-SSE41-NEXT: sarl $31, %ecx 1960; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm0 1961; X32-SSE41-NEXT: movl %eax, %ecx 1962; X32-SSE41-NEXT: shll $25, %ecx 1963; X32-SSE41-NEXT: sarl $31, %ecx 1964; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm0 1965; X32-SSE41-NEXT: movsbl %al, %ecx 1966; X32-SSE41-NEXT: shrl $7, %ecx 1967; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm0 1968; X32-SSE41-NEXT: movl %eax, %ecx 1969; X32-SSE41-NEXT: shll $23, %ecx 1970; X32-SSE41-NEXT: sarl $31, %ecx 1971; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm0 1972; X32-SSE41-NEXT: movl %eax, %ecx 1973; X32-SSE41-NEXT: shll $22, %ecx 1974; X32-SSE41-NEXT: sarl $31, %ecx 1975; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm0 1976; X32-SSE41-NEXT: movl %eax, %ecx 1977; X32-SSE41-NEXT: shll $21, %ecx 1978; X32-SSE41-NEXT: sarl $31, %ecx 1979; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm0 1980; X32-SSE41-NEXT: movl %eax, %ecx 1981; X32-SSE41-NEXT: shll $20, %ecx 1982; X32-SSE41-NEXT: sarl $31, %ecx 1983; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm0 1984; X32-SSE41-NEXT: movl %eax, %ecx 1985; X32-SSE41-NEXT: shll $19, %ecx 1986; X32-SSE41-NEXT: sarl $31, %ecx 1987; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm0 1988; X32-SSE41-NEXT: movl %eax, %ecx 1989; X32-SSE41-NEXT: shll $18, %ecx 1990; X32-SSE41-NEXT: sarl $31, %ecx 1991; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm0 1992; X32-SSE41-NEXT: movl %eax, %ecx 1993; X32-SSE41-NEXT: shll $17, %ecx 1994; X32-SSE41-NEXT: sarl $31, %ecx 1995; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm0 1996; X32-SSE41-NEXT: shrl $15, %eax 1997; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm0 1998; X32-SSE41-NEXT: retl 1999entry: 2000 %X = load <16 x i1>, <16 x i1>* %ptr 2001 %Y = sext <16 x i1> %X to <16 x i8> 2002 ret <16 x i8> %Y 2003} 2004 2005define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { 2006; SSE2-LABEL: load_sext_16i1_to_16i16: 2007; SSE2: # BB#0: # %entry 2008; SSE2-NEXT: movzwl (%rdi), %eax 2009; SSE2-NEXT: movl %eax, %ecx 2010; SSE2-NEXT: shrl $14, %ecx 2011; SSE2-NEXT: andl $1, %ecx 2012; SSE2-NEXT: movd %ecx, %xmm0 2013; SSE2-NEXT: movl %eax, %ecx 2014; SSE2-NEXT: shrl $6, %ecx 2015; SSE2-NEXT: andl $1, %ecx 2016; SSE2-NEXT: movd %ecx, %xmm1 2017; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2018; SSE2-NEXT: movl %eax, %ecx 2019; SSE2-NEXT: shrl $10, %ecx 2020; SSE2-NEXT: andl $1, %ecx 2021; SSE2-NEXT: movd %ecx, %xmm0 2022; SSE2-NEXT: movl %eax, %ecx 2023; SSE2-NEXT: shrl $2, %ecx 2024; SSE2-NEXT: andl $1, %ecx 2025; SSE2-NEXT: movd %ecx, %xmm2 2026; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2027; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2028; SSE2-NEXT: movl %eax, %ecx 2029; SSE2-NEXT: shrl $12, %ecx 2030; SSE2-NEXT: andl $1, %ecx 2031; SSE2-NEXT: movd %ecx, %xmm0 2032; SSE2-NEXT: movl %eax, %ecx 2033; SSE2-NEXT: shrl $4, %ecx 2034; SSE2-NEXT: andl $1, %ecx 2035; SSE2-NEXT: movd %ecx, %xmm3 2036; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2037; SSE2-NEXT: movl %eax, %ecx 2038; SSE2-NEXT: andl $1, %ecx 2039; SSE2-NEXT: movd %ecx, %xmm1 2040; SSE2-NEXT: movl %eax, %ecx 2041; SSE2-NEXT: shrl $8, %ecx 2042; SSE2-NEXT: andl $1, %ecx 2043; SSE2-NEXT: movd %ecx, %xmm0 2044; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2045; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2046; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2047; SSE2-NEXT: movl %eax, %ecx 2048; SSE2-NEXT: shrl $13, %ecx 2049; SSE2-NEXT: andl $1, %ecx 2050; SSE2-NEXT: movd %ecx, %xmm0 2051; SSE2-NEXT: movl %eax, %ecx 2052; SSE2-NEXT: shrl $5, %ecx 2053; SSE2-NEXT: andl $1, %ecx 2054; SSE2-NEXT: movd %ecx, %xmm2 2055; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2056; SSE2-NEXT: movl %eax, %ecx 2057; SSE2-NEXT: shrl $9, %ecx 2058; SSE2-NEXT: andl $1, %ecx 2059; SSE2-NEXT: movd %ecx, %xmm3 2060; SSE2-NEXT: movl %eax, %ecx 2061; SSE2-NEXT: shrl %ecx 2062; SSE2-NEXT: andl $1, %ecx 2063; SSE2-NEXT: movd %ecx, %xmm0 2064; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2065; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2066; SSE2-NEXT: movl %eax, %ecx 2067; SSE2-NEXT: shrl $11, %ecx 2068; SSE2-NEXT: andl $1, %ecx 2069; SSE2-NEXT: movd %ecx, %xmm2 2070; SSE2-NEXT: movl %eax, %ecx 2071; SSE2-NEXT: shrl $3, %ecx 2072; SSE2-NEXT: andl $1, %ecx 2073; SSE2-NEXT: movd %ecx, %xmm3 2074; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2075; SSE2-NEXT: movl %eax, %ecx 2076; SSE2-NEXT: shrl $7, %ecx 2077; SSE2-NEXT: andl $1, %ecx 2078; SSE2-NEXT: movd %ecx, %xmm2 2079; SSE2-NEXT: shrl $15, %eax 2080; SSE2-NEXT: movzwl %ax, %eax 2081; SSE2-NEXT: movd %eax, %xmm4 2082; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 2083; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2084; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2085; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2086; SSE2-NEXT: movdqa %xmm1, %xmm0 2087; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2088; SSE2-NEXT: psllw $15, %xmm0 2089; SSE2-NEXT: psraw $15, %xmm0 2090; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 2091; SSE2-NEXT: psllw $15, %xmm1 2092; SSE2-NEXT: psraw $15, %xmm1 2093; SSE2-NEXT: retq 2094; 2095; SSSE3-LABEL: load_sext_16i1_to_16i16: 2096; SSSE3: # BB#0: # %entry 2097; SSSE3-NEXT: movzwl (%rdi), %eax 2098; SSSE3-NEXT: movl %eax, %ecx 2099; SSSE3-NEXT: shrl $14, %ecx 2100; SSSE3-NEXT: andl $1, %ecx 2101; SSSE3-NEXT: movd %ecx, %xmm0 2102; SSSE3-NEXT: movl %eax, %ecx 2103; SSSE3-NEXT: shrl $6, %ecx 2104; SSSE3-NEXT: andl $1, %ecx 2105; SSSE3-NEXT: movd %ecx, %xmm1 2106; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2107; SSSE3-NEXT: movl %eax, %ecx 2108; SSSE3-NEXT: shrl $10, %ecx 2109; SSSE3-NEXT: andl $1, %ecx 2110; SSSE3-NEXT: movd %ecx, %xmm0 2111; SSSE3-NEXT: movl %eax, %ecx 2112; SSSE3-NEXT: shrl $2, %ecx 2113; SSSE3-NEXT: andl $1, %ecx 2114; SSSE3-NEXT: movd %ecx, %xmm2 2115; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2116; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2117; SSSE3-NEXT: movl %eax, %ecx 2118; SSSE3-NEXT: shrl $12, %ecx 2119; SSSE3-NEXT: andl $1, %ecx 2120; SSSE3-NEXT: movd %ecx, %xmm0 2121; SSSE3-NEXT: movl %eax, %ecx 2122; SSSE3-NEXT: shrl $4, %ecx 2123; SSSE3-NEXT: andl $1, %ecx 2124; SSSE3-NEXT: movd %ecx, %xmm3 2125; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2126; SSSE3-NEXT: movl %eax, %ecx 2127; SSSE3-NEXT: andl $1, %ecx 2128; SSSE3-NEXT: movd %ecx, %xmm1 2129; SSSE3-NEXT: movl %eax, %ecx 2130; SSSE3-NEXT: shrl $8, %ecx 2131; SSSE3-NEXT: andl $1, %ecx 2132; SSSE3-NEXT: movd %ecx, %xmm0 2133; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2134; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2135; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2136; SSSE3-NEXT: movl %eax, %ecx 2137; SSSE3-NEXT: shrl $13, %ecx 2138; SSSE3-NEXT: andl $1, %ecx 2139; SSSE3-NEXT: movd %ecx, %xmm0 2140; SSSE3-NEXT: movl %eax, %ecx 2141; SSSE3-NEXT: shrl $5, %ecx 2142; SSSE3-NEXT: andl $1, %ecx 2143; SSSE3-NEXT: movd %ecx, %xmm2 2144; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2145; SSSE3-NEXT: movl %eax, %ecx 2146; SSSE3-NEXT: shrl $9, %ecx 2147; SSSE3-NEXT: andl $1, %ecx 2148; SSSE3-NEXT: movd %ecx, %xmm3 2149; SSSE3-NEXT: movl %eax, %ecx 2150; SSSE3-NEXT: shrl %ecx 2151; SSSE3-NEXT: andl $1, %ecx 2152; SSSE3-NEXT: movd %ecx, %xmm0 2153; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2154; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2155; SSSE3-NEXT: movl %eax, %ecx 2156; SSSE3-NEXT: shrl $11, %ecx 2157; SSSE3-NEXT: andl $1, %ecx 2158; SSSE3-NEXT: movd %ecx, %xmm2 2159; SSSE3-NEXT: movl %eax, %ecx 2160; SSSE3-NEXT: shrl $3, %ecx 2161; SSSE3-NEXT: andl $1, %ecx 2162; SSSE3-NEXT: movd %ecx, %xmm3 2163; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2164; SSSE3-NEXT: movl %eax, %ecx 2165; SSSE3-NEXT: shrl $7, %ecx 2166; SSSE3-NEXT: andl $1, %ecx 2167; SSSE3-NEXT: movd %ecx, %xmm2 2168; SSSE3-NEXT: shrl $15, %eax 2169; SSSE3-NEXT: movzwl %ax, %eax 2170; SSSE3-NEXT: movd %eax, %xmm4 2171; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 2172; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2173; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2174; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2175; SSSE3-NEXT: movdqa %xmm1, %xmm0 2176; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2177; SSSE3-NEXT: psllw $15, %xmm0 2178; SSSE3-NEXT: psraw $15, %xmm0 2179; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 2180; SSSE3-NEXT: psllw $15, %xmm1 2181; SSSE3-NEXT: psraw $15, %xmm1 2182; SSSE3-NEXT: retq 2183; 2184; SSE41-LABEL: load_sext_16i1_to_16i16: 2185; SSE41: # BB#0: # %entry 2186; SSE41-NEXT: movzwl (%rdi), %eax 2187; SSE41-NEXT: movl %eax, %ecx 2188; SSE41-NEXT: shrl %ecx 2189; SSE41-NEXT: andl $1, %ecx 2190; SSE41-NEXT: movl %eax, %edx 2191; SSE41-NEXT: andl $1, %edx 2192; SSE41-NEXT: movd %edx, %xmm1 2193; SSE41-NEXT: pinsrb $1, %ecx, %xmm1 2194; SSE41-NEXT: movl %eax, %ecx 2195; SSE41-NEXT: shrl $2, %ecx 2196; SSE41-NEXT: andl $1, %ecx 2197; SSE41-NEXT: pinsrb $2, %ecx, %xmm1 2198; SSE41-NEXT: movl %eax, %ecx 2199; SSE41-NEXT: shrl $3, %ecx 2200; SSE41-NEXT: andl $1, %ecx 2201; SSE41-NEXT: pinsrb $3, %ecx, %xmm1 2202; SSE41-NEXT: movl %eax, %ecx 2203; SSE41-NEXT: shrl $4, %ecx 2204; SSE41-NEXT: andl $1, %ecx 2205; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 2206; SSE41-NEXT: movl %eax, %ecx 2207; SSE41-NEXT: shrl $5, %ecx 2208; SSE41-NEXT: andl $1, %ecx 2209; SSE41-NEXT: pinsrb $5, %ecx, %xmm1 2210; SSE41-NEXT: movl %eax, %ecx 2211; SSE41-NEXT: shrl $6, %ecx 2212; SSE41-NEXT: andl $1, %ecx 2213; SSE41-NEXT: pinsrb $6, %ecx, %xmm1 2214; SSE41-NEXT: movl %eax, %ecx 2215; SSE41-NEXT: shrl $7, %ecx 2216; SSE41-NEXT: andl $1, %ecx 2217; SSE41-NEXT: pinsrb $7, %ecx, %xmm1 2218; SSE41-NEXT: movl %eax, %ecx 2219; SSE41-NEXT: shrl $8, %ecx 2220; SSE41-NEXT: andl $1, %ecx 2221; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 2222; SSE41-NEXT: movl %eax, %ecx 2223; SSE41-NEXT: shrl $9, %ecx 2224; SSE41-NEXT: andl $1, %ecx 2225; SSE41-NEXT: pinsrb $9, %ecx, %xmm1 2226; SSE41-NEXT: movl %eax, %ecx 2227; SSE41-NEXT: shrl $10, %ecx 2228; SSE41-NEXT: andl $1, %ecx 2229; SSE41-NEXT: pinsrb $10, %ecx, %xmm1 2230; SSE41-NEXT: movl %eax, %ecx 2231; SSE41-NEXT: shrl $11, %ecx 2232; SSE41-NEXT: andl $1, %ecx 2233; SSE41-NEXT: pinsrb $11, %ecx, %xmm1 2234; SSE41-NEXT: movl %eax, %ecx 2235; SSE41-NEXT: shrl $12, %ecx 2236; SSE41-NEXT: andl $1, %ecx 2237; SSE41-NEXT: pinsrb $12, %ecx, %xmm1 2238; SSE41-NEXT: movl %eax, %ecx 2239; SSE41-NEXT: shrl $13, %ecx 2240; SSE41-NEXT: andl $1, %ecx 2241; SSE41-NEXT: pinsrb $13, %ecx, %xmm1 2242; SSE41-NEXT: movl %eax, %ecx 2243; SSE41-NEXT: shrl $14, %ecx 2244; SSE41-NEXT: andl $1, %ecx 2245; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 2246; SSE41-NEXT: shrl $15, %eax 2247; SSE41-NEXT: movzwl %ax, %eax 2248; SSE41-NEXT: pinsrb $15, %eax, %xmm1 2249; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2250; SSE41-NEXT: psllw $15, %xmm0 2251; SSE41-NEXT: psraw $15, %xmm0 2252; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2253; SSE41-NEXT: psllw $15, %xmm1 2254; SSE41-NEXT: psraw $15, %xmm1 2255; SSE41-NEXT: retq 2256; 2257; AVX1-LABEL: load_sext_16i1_to_16i16: 2258; AVX1: # BB#0: # %entry 2259; AVX1-NEXT: pushq %rbp 2260; AVX1-NEXT: .Ltmp0: 2261; AVX1-NEXT: .cfi_def_cfa_offset 16 2262; AVX1-NEXT: pushq %r15 2263; AVX1-NEXT: .Ltmp1: 2264; AVX1-NEXT: .cfi_def_cfa_offset 24 2265; AVX1-NEXT: pushq %r14 2266; AVX1-NEXT: .Ltmp2: 2267; AVX1-NEXT: .cfi_def_cfa_offset 32 2268; AVX1-NEXT: pushq %r13 2269; AVX1-NEXT: .Ltmp3: 2270; AVX1-NEXT: .cfi_def_cfa_offset 40 2271; AVX1-NEXT: pushq %r12 2272; AVX1-NEXT: .Ltmp4: 2273; AVX1-NEXT: .cfi_def_cfa_offset 48 2274; AVX1-NEXT: pushq %rbx 2275; AVX1-NEXT: .Ltmp5: 2276; AVX1-NEXT: .cfi_def_cfa_offset 56 2277; AVX1-NEXT: .Ltmp6: 2278; AVX1-NEXT: .cfi_offset %rbx, -56 2279; AVX1-NEXT: .Ltmp7: 2280; AVX1-NEXT: .cfi_offset %r12, -48 2281; AVX1-NEXT: .Ltmp8: 2282; AVX1-NEXT: .cfi_offset %r13, -40 2283; AVX1-NEXT: .Ltmp9: 2284; AVX1-NEXT: .cfi_offset %r14, -32 2285; AVX1-NEXT: .Ltmp10: 2286; AVX1-NEXT: .cfi_offset %r15, -24 2287; AVX1-NEXT: .Ltmp11: 2288; AVX1-NEXT: .cfi_offset %rbp, -16 2289; AVX1-NEXT: movswq (%rdi), %rax 2290; AVX1-NEXT: movq %rax, %rcx 2291; AVX1-NEXT: shlq $55, %rcx 2292; AVX1-NEXT: sarq $63, %rcx 2293; AVX1-NEXT: vmovd %ecx, %xmm0 2294; AVX1-NEXT: movq %rax, %r8 2295; AVX1-NEXT: movq %rax, %r10 2296; AVX1-NEXT: movq %rax, %r11 2297; AVX1-NEXT: movq %rax, %r14 2298; AVX1-NEXT: movq %rax, %r15 2299; AVX1-NEXT: movq %rax, %r9 2300; AVX1-NEXT: movq %rax, %r12 2301; AVX1-NEXT: movq %rax, %r13 2302; AVX1-NEXT: movq %rax, %rbx 2303; AVX1-NEXT: movq %rax, %rdi 2304; AVX1-NEXT: movq %rax, %rcx 2305; AVX1-NEXT: movq %rax, %rdx 2306; AVX1-NEXT: movq %rax, %rsi 2307; AVX1-NEXT: movsbq %al, %rbp 2308; AVX1-NEXT: shlq $54, %rax 2309; AVX1-NEXT: sarq $63, %rax 2310; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2311; AVX1-NEXT: shlq $53, %r8 2312; AVX1-NEXT: sarq $63, %r8 2313; AVX1-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 2314; AVX1-NEXT: shlq $52, %r10 2315; AVX1-NEXT: sarq $63, %r10 2316; AVX1-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0 2317; AVX1-NEXT: shlq $51, %r11 2318; AVX1-NEXT: sarq $63, %r11 2319; AVX1-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0 2320; AVX1-NEXT: shlq $50, %r14 2321; AVX1-NEXT: sarq $63, %r14 2322; AVX1-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 2323; AVX1-NEXT: shlq $49, %r15 2324; AVX1-NEXT: sarq $63, %r15 2325; AVX1-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0 2326; AVX1-NEXT: shrq $15, %r9 2327; AVX1-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0 2328; AVX1-NEXT: shlq $63, %r13 2329; AVX1-NEXT: sarq $63, %r13 2330; AVX1-NEXT: vmovd %r13d, %xmm1 2331; AVX1-NEXT: shlq $62, %r12 2332; AVX1-NEXT: sarq $63, %r12 2333; AVX1-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1 2334; AVX1-NEXT: shlq $61, %rbx 2335; AVX1-NEXT: sarq $63, %rbx 2336; AVX1-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1 2337; AVX1-NEXT: shlq $60, %rdi 2338; AVX1-NEXT: sarq $63, %rdi 2339; AVX1-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1 2340; AVX1-NEXT: shlq $59, %rcx 2341; AVX1-NEXT: sarq $63, %rcx 2342; AVX1-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 2343; AVX1-NEXT: shlq $58, %rdx 2344; AVX1-NEXT: sarq $63, %rdx 2345; AVX1-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1 2346; AVX1-NEXT: shlq $57, %rsi 2347; AVX1-NEXT: sarq $63, %rsi 2348; AVX1-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1 2349; AVX1-NEXT: shrq $7, %rbp 2350; AVX1-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 2351; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2352; AVX1-NEXT: popq %rbx 2353; AVX1-NEXT: popq %r12 2354; AVX1-NEXT: popq %r13 2355; AVX1-NEXT: popq %r14 2356; AVX1-NEXT: popq %r15 2357; AVX1-NEXT: popq %rbp 2358; AVX1-NEXT: retq 2359; 2360; AVX2-LABEL: load_sext_16i1_to_16i16: 2361; AVX2: # BB#0: # %entry 2362; AVX2-NEXT: pushq %rbp 2363; AVX2-NEXT: .Ltmp0: 2364; AVX2-NEXT: .cfi_def_cfa_offset 16 2365; AVX2-NEXT: pushq %r15 2366; AVX2-NEXT: .Ltmp1: 2367; AVX2-NEXT: .cfi_def_cfa_offset 24 2368; AVX2-NEXT: pushq %r14 2369; AVX2-NEXT: .Ltmp2: 2370; AVX2-NEXT: .cfi_def_cfa_offset 32 2371; AVX2-NEXT: pushq %r13 2372; AVX2-NEXT: .Ltmp3: 2373; AVX2-NEXT: .cfi_def_cfa_offset 40 2374; AVX2-NEXT: pushq %r12 2375; AVX2-NEXT: .Ltmp4: 2376; AVX2-NEXT: .cfi_def_cfa_offset 48 2377; AVX2-NEXT: pushq %rbx 2378; AVX2-NEXT: .Ltmp5: 2379; AVX2-NEXT: .cfi_def_cfa_offset 56 2380; AVX2-NEXT: .Ltmp6: 2381; AVX2-NEXT: .cfi_offset %rbx, -56 2382; AVX2-NEXT: .Ltmp7: 2383; AVX2-NEXT: .cfi_offset %r12, -48 2384; AVX2-NEXT: .Ltmp8: 2385; AVX2-NEXT: .cfi_offset %r13, -40 2386; AVX2-NEXT: .Ltmp9: 2387; AVX2-NEXT: .cfi_offset %r14, -32 2388; AVX2-NEXT: .Ltmp10: 2389; AVX2-NEXT: .cfi_offset %r15, -24 2390; AVX2-NEXT: .Ltmp11: 2391; AVX2-NEXT: .cfi_offset %rbp, -16 2392; AVX2-NEXT: movswq (%rdi), %rax 2393; AVX2-NEXT: movq %rax, %rcx 2394; AVX2-NEXT: shlq $55, %rcx 2395; AVX2-NEXT: sarq $63, %rcx 2396; AVX2-NEXT: vmovd %ecx, %xmm0 2397; AVX2-NEXT: movq %rax, %r8 2398; AVX2-NEXT: movq %rax, %r10 2399; AVX2-NEXT: movq %rax, %r11 2400; AVX2-NEXT: movq %rax, %r14 2401; AVX2-NEXT: movq %rax, %r15 2402; AVX2-NEXT: movq %rax, %r9 2403; AVX2-NEXT: movq %rax, %r12 2404; AVX2-NEXT: movq %rax, %r13 2405; AVX2-NEXT: movq %rax, %rbx 2406; AVX2-NEXT: movq %rax, %rdi 2407; AVX2-NEXT: movq %rax, %rcx 2408; AVX2-NEXT: movq %rax, %rdx 2409; AVX2-NEXT: movq %rax, %rsi 2410; AVX2-NEXT: movsbq %al, %rbp 2411; AVX2-NEXT: shlq $54, %rax 2412; AVX2-NEXT: sarq $63, %rax 2413; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2414; AVX2-NEXT: shlq $53, %r8 2415; AVX2-NEXT: sarq $63, %r8 2416; AVX2-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 2417; AVX2-NEXT: shlq $52, %r10 2418; AVX2-NEXT: sarq $63, %r10 2419; AVX2-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0 2420; AVX2-NEXT: shlq $51, %r11 2421; AVX2-NEXT: sarq $63, %r11 2422; AVX2-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0 2423; AVX2-NEXT: shlq $50, %r14 2424; AVX2-NEXT: sarq $63, %r14 2425; AVX2-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 2426; AVX2-NEXT: shlq $49, %r15 2427; AVX2-NEXT: sarq $63, %r15 2428; AVX2-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0 2429; AVX2-NEXT: shrq $15, %r9 2430; AVX2-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0 2431; AVX2-NEXT: shlq $63, %r13 2432; AVX2-NEXT: sarq $63, %r13 2433; AVX2-NEXT: vmovd %r13d, %xmm1 2434; AVX2-NEXT: shlq $62, %r12 2435; AVX2-NEXT: sarq $63, %r12 2436; AVX2-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1 2437; AVX2-NEXT: shlq $61, %rbx 2438; AVX2-NEXT: sarq $63, %rbx 2439; AVX2-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1 2440; AVX2-NEXT: shlq $60, %rdi 2441; AVX2-NEXT: sarq $63, %rdi 2442; AVX2-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1 2443; AVX2-NEXT: shlq $59, %rcx 2444; AVX2-NEXT: sarq $63, %rcx 2445; AVX2-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 2446; AVX2-NEXT: shlq $58, %rdx 2447; AVX2-NEXT: sarq $63, %rdx 2448; AVX2-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1 2449; AVX2-NEXT: shlq $57, %rsi 2450; AVX2-NEXT: sarq $63, %rsi 2451; AVX2-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1 2452; AVX2-NEXT: shrq $7, %rbp 2453; AVX2-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 2454; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 2455; AVX2-NEXT: popq %rbx 2456; AVX2-NEXT: popq %r12 2457; AVX2-NEXT: popq %r13 2458; AVX2-NEXT: popq %r14 2459; AVX2-NEXT: popq %r15 2460; AVX2-NEXT: popq %rbp 2461; AVX2-NEXT: retq 2462; 2463; X32-SSE41-LABEL: load_sext_16i1_to_16i16: 2464; X32-SSE41: # BB#0: # %entry 2465; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2466; X32-SSE41-NEXT: movzwl (%eax), %eax 2467; X32-SSE41-NEXT: movl %eax, %ecx 2468; X32-SSE41-NEXT: shrl %ecx 2469; X32-SSE41-NEXT: andl $1, %ecx 2470; X32-SSE41-NEXT: movl %eax, %edx 2471; X32-SSE41-NEXT: andl $1, %edx 2472; X32-SSE41-NEXT: movd %edx, %xmm1 2473; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1 2474; X32-SSE41-NEXT: movl %eax, %ecx 2475; X32-SSE41-NEXT: shrl $2, %ecx 2476; X32-SSE41-NEXT: andl $1, %ecx 2477; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1 2478; X32-SSE41-NEXT: movl %eax, %ecx 2479; X32-SSE41-NEXT: shrl $3, %ecx 2480; X32-SSE41-NEXT: andl $1, %ecx 2481; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1 2482; X32-SSE41-NEXT: movl %eax, %ecx 2483; X32-SSE41-NEXT: shrl $4, %ecx 2484; X32-SSE41-NEXT: andl $1, %ecx 2485; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 2486; X32-SSE41-NEXT: movl %eax, %ecx 2487; X32-SSE41-NEXT: shrl $5, %ecx 2488; X32-SSE41-NEXT: andl $1, %ecx 2489; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1 2490; X32-SSE41-NEXT: movl %eax, %ecx 2491; X32-SSE41-NEXT: shrl $6, %ecx 2492; X32-SSE41-NEXT: andl $1, %ecx 2493; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1 2494; X32-SSE41-NEXT: movl %eax, %ecx 2495; X32-SSE41-NEXT: shrl $7, %ecx 2496; X32-SSE41-NEXT: andl $1, %ecx 2497; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1 2498; X32-SSE41-NEXT: movl %eax, %ecx 2499; X32-SSE41-NEXT: shrl $8, %ecx 2500; X32-SSE41-NEXT: andl $1, %ecx 2501; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 2502; X32-SSE41-NEXT: movl %eax, %ecx 2503; X32-SSE41-NEXT: shrl $9, %ecx 2504; X32-SSE41-NEXT: andl $1, %ecx 2505; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1 2506; X32-SSE41-NEXT: movl %eax, %ecx 2507; X32-SSE41-NEXT: shrl $10, %ecx 2508; X32-SSE41-NEXT: andl $1, %ecx 2509; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1 2510; X32-SSE41-NEXT: movl %eax, %ecx 2511; X32-SSE41-NEXT: shrl $11, %ecx 2512; X32-SSE41-NEXT: andl $1, %ecx 2513; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1 2514; X32-SSE41-NEXT: movl %eax, %ecx 2515; X32-SSE41-NEXT: shrl $12, %ecx 2516; X32-SSE41-NEXT: andl $1, %ecx 2517; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1 2518; X32-SSE41-NEXT: movl %eax, %ecx 2519; X32-SSE41-NEXT: shrl $13, %ecx 2520; X32-SSE41-NEXT: andl $1, %ecx 2521; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1 2522; X32-SSE41-NEXT: movl %eax, %ecx 2523; X32-SSE41-NEXT: shrl $14, %ecx 2524; X32-SSE41-NEXT: andl $1, %ecx 2525; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1 2526; X32-SSE41-NEXT: shrl $15, %eax 2527; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1 2528; X32-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2529; X32-SSE41-NEXT: psllw $15, %xmm0 2530; X32-SSE41-NEXT: psraw $15, %xmm0 2531; X32-SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2532; X32-SSE41-NEXT: psllw $15, %xmm1 2533; X32-SSE41-NEXT: psraw $15, %xmm1 2534; X32-SSE41-NEXT: retl 2535entry: 2536 %X = load <16 x i1>, <16 x i1>* %ptr 2537 %Y = sext <16 x i1> %X to <16 x i16> 2538 ret <16 x i16> %Y 2539} 2540 2541define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { 2542; SSE2-LABEL: load_sext_32i1_to_32i8: 2543; SSE2: # BB#0: # %entry 2544; SSE2-NEXT: pushq %rbp 2545; SSE2-NEXT: pushq %r15 2546; SSE2-NEXT: pushq %r14 2547; SSE2-NEXT: pushq %r13 2548; SSE2-NEXT: pushq %r12 2549; SSE2-NEXT: pushq %rbx 2550; SSE2-NEXT: movswq (%rdi), %rbx 2551; SSE2-NEXT: movq %rbx, %r10 2552; SSE2-NEXT: movq %rbx, %r8 2553; SSE2-NEXT: movq %rbx, %r9 2554; SSE2-NEXT: movq %rbx, %r11 2555; SSE2-NEXT: movq %rbx, %r14 2556; SSE2-NEXT: movq %rbx, %r15 2557; SSE2-NEXT: movq %rbx, %r12 2558; SSE2-NEXT: movq %rbx, %r13 2559; SSE2-NEXT: movq %rbx, %rdx 2560; SSE2-NEXT: movq %rbx, %rsi 2561; SSE2-NEXT: movq %rbx, %rcx 2562; SSE2-NEXT: movq %rbx, %rbp 2563; SSE2-NEXT: movq %rbx, %rax 2564; SSE2-NEXT: shlq $49, %rax 2565; SSE2-NEXT: sarq $63, %rax 2566; SSE2-NEXT: movd %eax, %xmm0 2567; SSE2-NEXT: movq %rbx, %rax 2568; SSE2-NEXT: shlq $57, %r10 2569; SSE2-NEXT: sarq $63, %r10 2570; SSE2-NEXT: movd %r10d, %xmm15 2571; SSE2-NEXT: movq %rbx, %r10 2572; SSE2-NEXT: movsbq %bl, %rbx 2573; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] 2574; SSE2-NEXT: shlq $53, %r8 2575; SSE2-NEXT: sarq $63, %r8 2576; SSE2-NEXT: movd %r8d, %xmm8 2577; SSE2-NEXT: shlq $61, %r9 2578; SSE2-NEXT: sarq $63, %r9 2579; SSE2-NEXT: movd %r9d, %xmm2 2580; SSE2-NEXT: shlq $51, %r11 2581; SSE2-NEXT: sarq $63, %r11 2582; SSE2-NEXT: movd %r11d, %xmm9 2583; SSE2-NEXT: shlq $59, %r14 2584; SSE2-NEXT: sarq $63, %r14 2585; SSE2-NEXT: movd %r14d, %xmm5 2586; SSE2-NEXT: shlq $55, %r15 2587; SSE2-NEXT: sarq $63, %r15 2588; SSE2-NEXT: movd %r15d, %xmm10 2589; SSE2-NEXT: shlq $63, %r12 2590; SSE2-NEXT: sarq $63, %r12 2591; SSE2-NEXT: movd %r12d, %xmm0 2592; SSE2-NEXT: shlq $50, %r13 2593; SSE2-NEXT: sarq $63, %r13 2594; SSE2-NEXT: movd %r13d, %xmm11 2595; SSE2-NEXT: shlq $58, %rdx 2596; SSE2-NEXT: sarq $63, %rdx 2597; SSE2-NEXT: movd %edx, %xmm4 2598; SSE2-NEXT: shlq $54, %rsi 2599; SSE2-NEXT: sarq $63, %rsi 2600; SSE2-NEXT: movd %esi, %xmm12 2601; SSE2-NEXT: shlq $62, %rcx 2602; SSE2-NEXT: sarq $63, %rcx 2603; SSE2-NEXT: movd %ecx, %xmm6 2604; SSE2-NEXT: shlq $52, %rbp 2605; SSE2-NEXT: sarq $63, %rbp 2606; SSE2-NEXT: movd %ebp, %xmm13 2607; SSE2-NEXT: shlq $60, %rax 2608; SSE2-NEXT: sarq $63, %rax 2609; SSE2-NEXT: movd %eax, %xmm7 2610; SSE2-NEXT: shrq $15, %r10 2611; SSE2-NEXT: movd %r10d, %xmm14 2612; SSE2-NEXT: shrq $7, %rbx 2613; SSE2-NEXT: movd %ebx, %xmm3 2614; SSE2-NEXT: movswq 2(%rdi), %rdx 2615; SSE2-NEXT: movq %rdx, %r8 2616; SSE2-NEXT: movq %rdx, %r9 2617; SSE2-NEXT: movq %rdx, %r10 2618; SSE2-NEXT: movq %rdx, %r11 2619; SSE2-NEXT: movq %rdx, %r14 2620; SSE2-NEXT: movq %rdx, %r15 2621; SSE2-NEXT: movq %rdx, %r12 2622; SSE2-NEXT: movq %rdx, %r13 2623; SSE2-NEXT: movq %rdx, %rbx 2624; SSE2-NEXT: movq %rdx, %rax 2625; SSE2-NEXT: movq %rdx, %rcx 2626; SSE2-NEXT: movq %rdx, %rsi 2627; SSE2-NEXT: movq %rdx, %rdi 2628; SSE2-NEXT: movq %rdx, %rbp 2629; SSE2-NEXT: shlq $49, %rbp 2630; SSE2-NEXT: sarq $63, %rbp 2631; SSE2-NEXT: movd %ebp, %xmm1 2632; SSE2-NEXT: movq %rdx, %rbp 2633; SSE2-NEXT: movsbq %dl, %rdx 2634; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] 2635; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7] 2636; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7] 2637; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7] 2638; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 2639; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2640; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7] 2641; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7] 2642; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] 2643; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] 2644; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7] 2645; SSE2-NEXT: shlq $57, %r8 2646; SSE2-NEXT: sarq $63, %r8 2647; SSE2-NEXT: movd %r8d, %xmm2 2648; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] 2649; SSE2-NEXT: shlq $53, %r9 2650; SSE2-NEXT: sarq $63, %r9 2651; SSE2-NEXT: movd %r9d, %xmm3 2652; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] 2653; SSE2-NEXT: shlq $61, %r10 2654; SSE2-NEXT: sarq $63, %r10 2655; SSE2-NEXT: movd %r10d, %xmm4 2656; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] 2657; SSE2-NEXT: shlq $51, %r11 2658; SSE2-NEXT: sarq $63, %r11 2659; SSE2-NEXT: movd %r11d, %xmm5 2660; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2661; SSE2-NEXT: shlq $59, %r14 2662; SSE2-NEXT: sarq $63, %r14 2663; SSE2-NEXT: movd %r14d, %xmm6 2664; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 2665; SSE2-NEXT: shlq $55, %r15 2666; SSE2-NEXT: sarq $63, %r15 2667; SSE2-NEXT: movd %r15d, %xmm3 2668; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 2669; SSE2-NEXT: shlq $63, %r12 2670; SSE2-NEXT: sarq $63, %r12 2671; SSE2-NEXT: movd %r12d, %xmm1 2672; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] 2673; SSE2-NEXT: shlq $50, %r13 2674; SSE2-NEXT: sarq $63, %r13 2675; SSE2-NEXT: movd %r13d, %xmm2 2676; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2677; SSE2-NEXT: shlq $58, %rbx 2678; SSE2-NEXT: sarq $63, %rbx 2679; SSE2-NEXT: movd %ebx, %xmm3 2680; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 2681; SSE2-NEXT: shlq $54, %rax 2682; SSE2-NEXT: sarq $63, %rax 2683; SSE2-NEXT: movd %eax, %xmm5 2684; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 2685; SSE2-NEXT: shlq $62, %rcx 2686; SSE2-NEXT: sarq $63, %rcx 2687; SSE2-NEXT: movd %ecx, %xmm4 2688; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2689; SSE2-NEXT: shlq $52, %rsi 2690; SSE2-NEXT: sarq $63, %rsi 2691; SSE2-NEXT: movd %esi, %xmm2 2692; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] 2693; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 2694; SSE2-NEXT: shlq $60, %rdi 2695; SSE2-NEXT: sarq $63, %rdi 2696; SSE2-NEXT: movd %edi, %xmm3 2697; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2698; SSE2-NEXT: shrq $15, %rbp 2699; SSE2-NEXT: movd %ebp, %xmm2 2700; SSE2-NEXT: shrq $7, %rdx 2701; SSE2-NEXT: movd %edx, %xmm5 2702; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] 2703; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] 2704; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 2705; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 2706; SSE2-NEXT: popq %rbx 2707; SSE2-NEXT: popq %r12 2708; SSE2-NEXT: popq %r13 2709; SSE2-NEXT: popq %r14 2710; SSE2-NEXT: popq %r15 2711; SSE2-NEXT: popq %rbp 2712; SSE2-NEXT: retq 2713; 2714; SSSE3-LABEL: load_sext_32i1_to_32i8: 2715; SSSE3: # BB#0: # %entry 2716; SSSE3-NEXT: pushq %rbp 2717; SSSE3-NEXT: pushq %r15 2718; SSSE3-NEXT: pushq %r14 2719; SSSE3-NEXT: pushq %r13 2720; SSSE3-NEXT: pushq %r12 2721; SSSE3-NEXT: pushq %rbx 2722; SSSE3-NEXT: movswq (%rdi), %rbx 2723; SSSE3-NEXT: movq %rbx, %r10 2724; SSSE3-NEXT: movq %rbx, %r8 2725; SSSE3-NEXT: movq %rbx, %r9 2726; SSSE3-NEXT: movq %rbx, %r11 2727; SSSE3-NEXT: movq %rbx, %r14 2728; SSSE3-NEXT: movq %rbx, %r15 2729; SSSE3-NEXT: movq %rbx, %r12 2730; SSSE3-NEXT: movq %rbx, %r13 2731; SSSE3-NEXT: movq %rbx, %rdx 2732; SSSE3-NEXT: movq %rbx, %rsi 2733; SSSE3-NEXT: movq %rbx, %rcx 2734; SSSE3-NEXT: movq %rbx, %rbp 2735; SSSE3-NEXT: movq %rbx, %rax 2736; SSSE3-NEXT: shlq $49, %rax 2737; SSSE3-NEXT: sarq $63, %rax 2738; SSSE3-NEXT: movd %eax, %xmm0 2739; SSSE3-NEXT: movq %rbx, %rax 2740; SSSE3-NEXT: shlq $57, %r10 2741; SSSE3-NEXT: sarq $63, %r10 2742; SSSE3-NEXT: movd %r10d, %xmm15 2743; SSSE3-NEXT: movq %rbx, %r10 2744; SSSE3-NEXT: movsbq %bl, %rbx 2745; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] 2746; SSSE3-NEXT: shlq $53, %r8 2747; SSSE3-NEXT: sarq $63, %r8 2748; SSSE3-NEXT: movd %r8d, %xmm8 2749; SSSE3-NEXT: shlq $61, %r9 2750; SSSE3-NEXT: sarq $63, %r9 2751; SSSE3-NEXT: movd %r9d, %xmm2 2752; SSSE3-NEXT: shlq $51, %r11 2753; SSSE3-NEXT: sarq $63, %r11 2754; SSSE3-NEXT: movd %r11d, %xmm9 2755; SSSE3-NEXT: shlq $59, %r14 2756; SSSE3-NEXT: sarq $63, %r14 2757; SSSE3-NEXT: movd %r14d, %xmm5 2758; SSSE3-NEXT: shlq $55, %r15 2759; SSSE3-NEXT: sarq $63, %r15 2760; SSSE3-NEXT: movd %r15d, %xmm10 2761; SSSE3-NEXT: shlq $63, %r12 2762; SSSE3-NEXT: sarq $63, %r12 2763; SSSE3-NEXT: movd %r12d, %xmm0 2764; SSSE3-NEXT: shlq $50, %r13 2765; SSSE3-NEXT: sarq $63, %r13 2766; SSSE3-NEXT: movd %r13d, %xmm11 2767; SSSE3-NEXT: shlq $58, %rdx 2768; SSSE3-NEXT: sarq $63, %rdx 2769; SSSE3-NEXT: movd %edx, %xmm4 2770; SSSE3-NEXT: shlq $54, %rsi 2771; SSSE3-NEXT: sarq $63, %rsi 2772; SSSE3-NEXT: movd %esi, %xmm12 2773; SSSE3-NEXT: shlq $62, %rcx 2774; SSSE3-NEXT: sarq $63, %rcx 2775; SSSE3-NEXT: movd %ecx, %xmm6 2776; SSSE3-NEXT: shlq $52, %rbp 2777; SSSE3-NEXT: sarq $63, %rbp 2778; SSSE3-NEXT: movd %ebp, %xmm13 2779; SSSE3-NEXT: shlq $60, %rax 2780; SSSE3-NEXT: sarq $63, %rax 2781; SSSE3-NEXT: movd %eax, %xmm7 2782; SSSE3-NEXT: shrq $15, %r10 2783; SSSE3-NEXT: movd %r10d, %xmm14 2784; SSSE3-NEXT: shrq $7, %rbx 2785; SSSE3-NEXT: movd %ebx, %xmm3 2786; SSSE3-NEXT: movswq 2(%rdi), %rdx 2787; SSSE3-NEXT: movq %rdx, %r8 2788; SSSE3-NEXT: movq %rdx, %r9 2789; SSSE3-NEXT: movq %rdx, %r10 2790; SSSE3-NEXT: movq %rdx, %r11 2791; SSSE3-NEXT: movq %rdx, %r14 2792; SSSE3-NEXT: movq %rdx, %r15 2793; SSSE3-NEXT: movq %rdx, %r12 2794; SSSE3-NEXT: movq %rdx, %r13 2795; SSSE3-NEXT: movq %rdx, %rbx 2796; SSSE3-NEXT: movq %rdx, %rax 2797; SSSE3-NEXT: movq %rdx, %rcx 2798; SSSE3-NEXT: movq %rdx, %rsi 2799; SSSE3-NEXT: movq %rdx, %rdi 2800; SSSE3-NEXT: movq %rdx, %rbp 2801; SSSE3-NEXT: shlq $49, %rbp 2802; SSSE3-NEXT: sarq $63, %rbp 2803; SSSE3-NEXT: movd %ebp, %xmm1 2804; SSSE3-NEXT: movq %rdx, %rbp 2805; SSSE3-NEXT: movsbq %dl, %rdx 2806; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] 2807; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7] 2808; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7] 2809; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7] 2810; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 2811; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2812; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7] 2813; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7] 2814; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] 2815; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] 2816; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7] 2817; SSSE3-NEXT: shlq $57, %r8 2818; SSSE3-NEXT: sarq $63, %r8 2819; SSSE3-NEXT: movd %r8d, %xmm2 2820; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] 2821; SSSE3-NEXT: shlq $53, %r9 2822; SSSE3-NEXT: sarq $63, %r9 2823; SSSE3-NEXT: movd %r9d, %xmm3 2824; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] 2825; SSSE3-NEXT: shlq $61, %r10 2826; SSSE3-NEXT: sarq $63, %r10 2827; SSSE3-NEXT: movd %r10d, %xmm4 2828; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] 2829; SSSE3-NEXT: shlq $51, %r11 2830; SSSE3-NEXT: sarq $63, %r11 2831; SSSE3-NEXT: movd %r11d, %xmm5 2832; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2833; SSSE3-NEXT: shlq $59, %r14 2834; SSSE3-NEXT: sarq $63, %r14 2835; SSSE3-NEXT: movd %r14d, %xmm6 2836; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 2837; SSSE3-NEXT: shlq $55, %r15 2838; SSSE3-NEXT: sarq $63, %r15 2839; SSSE3-NEXT: movd %r15d, %xmm3 2840; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 2841; SSSE3-NEXT: shlq $63, %r12 2842; SSSE3-NEXT: sarq $63, %r12 2843; SSSE3-NEXT: movd %r12d, %xmm1 2844; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] 2845; SSSE3-NEXT: shlq $50, %r13 2846; SSSE3-NEXT: sarq $63, %r13 2847; SSSE3-NEXT: movd %r13d, %xmm2 2848; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2849; SSSE3-NEXT: shlq $58, %rbx 2850; SSSE3-NEXT: sarq $63, %rbx 2851; SSSE3-NEXT: movd %ebx, %xmm3 2852; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 2853; SSSE3-NEXT: shlq $54, %rax 2854; SSSE3-NEXT: sarq $63, %rax 2855; SSSE3-NEXT: movd %eax, %xmm5 2856; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 2857; SSSE3-NEXT: shlq $62, %rcx 2858; SSSE3-NEXT: sarq $63, %rcx 2859; SSSE3-NEXT: movd %ecx, %xmm4 2860; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2861; SSSE3-NEXT: shlq $52, %rsi 2862; SSSE3-NEXT: sarq $63, %rsi 2863; SSSE3-NEXT: movd %esi, %xmm2 2864; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] 2865; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 2866; SSSE3-NEXT: shlq $60, %rdi 2867; SSSE3-NEXT: sarq $63, %rdi 2868; SSSE3-NEXT: movd %edi, %xmm3 2869; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2870; SSSE3-NEXT: shrq $15, %rbp 2871; SSSE3-NEXT: movd %ebp, %xmm2 2872; SSSE3-NEXT: shrq $7, %rdx 2873; SSSE3-NEXT: movd %edx, %xmm5 2874; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] 2875; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] 2876; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 2877; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 2878; SSSE3-NEXT: popq %rbx 2879; SSSE3-NEXT: popq %r12 2880; SSSE3-NEXT: popq %r13 2881; SSSE3-NEXT: popq %r14 2882; SSSE3-NEXT: popq %r15 2883; SSSE3-NEXT: popq %rbp 2884; SSSE3-NEXT: retq 2885; 2886; SSE41-LABEL: load_sext_32i1_to_32i8: 2887; SSE41: # BB#0: # %entry 2888; SSE41-NEXT: movswq (%rdi), %rax 2889; SSE41-NEXT: movq %rax, %rcx 2890; SSE41-NEXT: shlq $62, %rcx 2891; SSE41-NEXT: sarq $63, %rcx 2892; SSE41-NEXT: movq %rax, %rdx 2893; SSE41-NEXT: shlq $63, %rdx 2894; SSE41-NEXT: sarq $63, %rdx 2895; SSE41-NEXT: movd %edx, %xmm0 2896; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 2897; SSE41-NEXT: movq %rax, %rcx 2898; SSE41-NEXT: shlq $61, %rcx 2899; SSE41-NEXT: sarq $63, %rcx 2900; SSE41-NEXT: pinsrb $2, %ecx, %xmm0 2901; SSE41-NEXT: movq %rax, %rcx 2902; SSE41-NEXT: shlq $60, %rcx 2903; SSE41-NEXT: sarq $63, %rcx 2904; SSE41-NEXT: pinsrb $3, %ecx, %xmm0 2905; SSE41-NEXT: movq %rax, %rcx 2906; SSE41-NEXT: shlq $59, %rcx 2907; SSE41-NEXT: sarq $63, %rcx 2908; SSE41-NEXT: pinsrb $4, %ecx, %xmm0 2909; SSE41-NEXT: movq %rax, %rcx 2910; SSE41-NEXT: shlq $58, %rcx 2911; SSE41-NEXT: sarq $63, %rcx 2912; SSE41-NEXT: pinsrb $5, %ecx, %xmm0 2913; SSE41-NEXT: movq %rax, %rcx 2914; SSE41-NEXT: shlq $57, %rcx 2915; SSE41-NEXT: sarq $63, %rcx 2916; SSE41-NEXT: pinsrb $6, %ecx, %xmm0 2917; SSE41-NEXT: movsbq %al, %rcx 2918; SSE41-NEXT: shrq $7, %rcx 2919; SSE41-NEXT: pinsrb $7, %ecx, %xmm0 2920; SSE41-NEXT: movq %rax, %rcx 2921; SSE41-NEXT: shlq $55, %rcx 2922; SSE41-NEXT: sarq $63, %rcx 2923; SSE41-NEXT: pinsrb $8, %ecx, %xmm0 2924; SSE41-NEXT: movq %rax, %rcx 2925; SSE41-NEXT: shlq $54, %rcx 2926; SSE41-NEXT: sarq $63, %rcx 2927; SSE41-NEXT: pinsrb $9, %ecx, %xmm0 2928; SSE41-NEXT: movq %rax, %rcx 2929; SSE41-NEXT: shlq $53, %rcx 2930; SSE41-NEXT: sarq $63, %rcx 2931; SSE41-NEXT: pinsrb $10, %ecx, %xmm0 2932; SSE41-NEXT: movq %rax, %rcx 2933; SSE41-NEXT: shlq $52, %rcx 2934; SSE41-NEXT: sarq $63, %rcx 2935; SSE41-NEXT: pinsrb $11, %ecx, %xmm0 2936; SSE41-NEXT: movq %rax, %rcx 2937; SSE41-NEXT: shlq $51, %rcx 2938; SSE41-NEXT: sarq $63, %rcx 2939; SSE41-NEXT: pinsrb $12, %ecx, %xmm0 2940; SSE41-NEXT: movq %rax, %rcx 2941; SSE41-NEXT: shlq $50, %rcx 2942; SSE41-NEXT: sarq $63, %rcx 2943; SSE41-NEXT: pinsrb $13, %ecx, %xmm0 2944; SSE41-NEXT: movq %rax, %rcx 2945; SSE41-NEXT: shlq $49, %rcx 2946; SSE41-NEXT: sarq $63, %rcx 2947; SSE41-NEXT: pinsrb $14, %ecx, %xmm0 2948; SSE41-NEXT: shrq $15, %rax 2949; SSE41-NEXT: pinsrb $15, %eax, %xmm0 2950; SSE41-NEXT: movswq 2(%rdi), %rax 2951; SSE41-NEXT: movq %rax, %rcx 2952; SSE41-NEXT: shlq $62, %rcx 2953; SSE41-NEXT: sarq $63, %rcx 2954; SSE41-NEXT: movq %rax, %rdx 2955; SSE41-NEXT: shlq $63, %rdx 2956; SSE41-NEXT: sarq $63, %rdx 2957; SSE41-NEXT: movd %edx, %xmm1 2958; SSE41-NEXT: pinsrb $1, %ecx, %xmm1 2959; SSE41-NEXT: movq %rax, %rcx 2960; SSE41-NEXT: shlq $61, %rcx 2961; SSE41-NEXT: sarq $63, %rcx 2962; SSE41-NEXT: pinsrb $2, %ecx, %xmm1 2963; SSE41-NEXT: movq %rax, %rcx 2964; SSE41-NEXT: shlq $60, %rcx 2965; SSE41-NEXT: sarq $63, %rcx 2966; SSE41-NEXT: pinsrb $3, %ecx, %xmm1 2967; SSE41-NEXT: movq %rax, %rcx 2968; SSE41-NEXT: shlq $59, %rcx 2969; SSE41-NEXT: sarq $63, %rcx 2970; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 2971; SSE41-NEXT: movq %rax, %rcx 2972; SSE41-NEXT: shlq $58, %rcx 2973; SSE41-NEXT: sarq $63, %rcx 2974; SSE41-NEXT: pinsrb $5, %ecx, %xmm1 2975; SSE41-NEXT: movq %rax, %rcx 2976; SSE41-NEXT: shlq $57, %rcx 2977; SSE41-NEXT: sarq $63, %rcx 2978; SSE41-NEXT: pinsrb $6, %ecx, %xmm1 2979; SSE41-NEXT: movsbq %al, %rcx 2980; SSE41-NEXT: shrq $7, %rcx 2981; SSE41-NEXT: pinsrb $7, %ecx, %xmm1 2982; SSE41-NEXT: movq %rax, %rcx 2983; SSE41-NEXT: shlq $55, %rcx 2984; SSE41-NEXT: sarq $63, %rcx 2985; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 2986; SSE41-NEXT: movq %rax, %rcx 2987; SSE41-NEXT: shlq $54, %rcx 2988; SSE41-NEXT: sarq $63, %rcx 2989; SSE41-NEXT: pinsrb $9, %ecx, %xmm1 2990; SSE41-NEXT: movq %rax, %rcx 2991; SSE41-NEXT: shlq $53, %rcx 2992; SSE41-NEXT: sarq $63, %rcx 2993; SSE41-NEXT: pinsrb $10, %ecx, %xmm1 2994; SSE41-NEXT: movq %rax, %rcx 2995; SSE41-NEXT: shlq $52, %rcx 2996; SSE41-NEXT: sarq $63, %rcx 2997; SSE41-NEXT: pinsrb $11, %ecx, %xmm1 2998; SSE41-NEXT: movq %rax, %rcx 2999; SSE41-NEXT: shlq $51, %rcx 3000; SSE41-NEXT: sarq $63, %rcx 3001; SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3002; SSE41-NEXT: movq %rax, %rcx 3003; SSE41-NEXT: shlq $50, %rcx 3004; SSE41-NEXT: sarq $63, %rcx 3005; SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3006; SSE41-NEXT: movq %rax, %rcx 3007; SSE41-NEXT: shlq $49, %rcx 3008; SSE41-NEXT: sarq $63, %rcx 3009; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3010; SSE41-NEXT: shrq $15, %rax 3011; SSE41-NEXT: pinsrb $15, %eax, %xmm1 3012; SSE41-NEXT: retq 3013; 3014; AVX1-LABEL: load_sext_32i1_to_32i8: 3015; AVX1: # BB#0: # %entry 3016; AVX1-NEXT: pushq %rbp 3017; AVX1-NEXT: pushq %r15 3018; AVX1-NEXT: pushq %r14 3019; AVX1-NEXT: pushq %r13 3020; AVX1-NEXT: pushq %r12 3021; AVX1-NEXT: pushq %rbx 3022; AVX1-NEXT: movslq (%rdi), %rax 3023; AVX1-NEXT: movq %rax, %rcx 3024; AVX1-NEXT: shlq $47, %rcx 3025; AVX1-NEXT: sarq $63, %rcx 3026; AVX1-NEXT: vmovd %ecx, %xmm0 3027; AVX1-NEXT: movq %rax, %r8 3028; AVX1-NEXT: movq %rax, %rdx 3029; AVX1-NEXT: movq %rax, %rcx 3030; AVX1-NEXT: movq %rax, %rdi 3031; AVX1-NEXT: movq %rax, %r13 3032; AVX1-NEXT: movq %rax, %rsi 3033; AVX1-NEXT: movq %rax, %r10 3034; AVX1-NEXT: movq %rax, %r11 3035; AVX1-NEXT: movq %rax, %r9 3036; AVX1-NEXT: movq %rax, %rbx 3037; AVX1-NEXT: movq %rax, %r14 3038; AVX1-NEXT: movq %rax, %r15 3039; AVX1-NEXT: movq %rax, %r12 3040; AVX1-NEXT: movq %rax, %rbp 3041; AVX1-NEXT: shlq $46, %rbp 3042; AVX1-NEXT: sarq $63, %rbp 3043; AVX1-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0 3044; AVX1-NEXT: movq %rax, %rbp 3045; AVX1-NEXT: shlq $45, %r8 3046; AVX1-NEXT: sarq $63, %r8 3047; AVX1-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0 3048; AVX1-NEXT: movq %rax, %r8 3049; AVX1-NEXT: shlq $44, %rdx 3050; AVX1-NEXT: sarq $63, %rdx 3051; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 3052; AVX1-NEXT: movq %rax, %rdx 3053; AVX1-NEXT: shlq $43, %rcx 3054; AVX1-NEXT: sarq $63, %rcx 3055; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 3056; AVX1-NEXT: movq %rax, %rcx 3057; AVX1-NEXT: shlq $42, %rdi 3058; AVX1-NEXT: sarq $63, %rdi 3059; AVX1-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 3060; AVX1-NEXT: movq %rax, %rdi 3061; AVX1-NEXT: shlq $41, %r13 3062; AVX1-NEXT: sarq $63, %r13 3063; AVX1-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0 3064; AVX1-NEXT: movq %rax, %r13 3065; AVX1-NEXT: shlq $40, %rsi 3066; AVX1-NEXT: sarq $63, %rsi 3067; AVX1-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0 3068; AVX1-NEXT: movq %rax, %rsi 3069; AVX1-NEXT: shlq $39, %r10 3070; AVX1-NEXT: sarq $63, %r10 3071; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 3072; AVX1-NEXT: movq %rax, %r10 3073; AVX1-NEXT: shlq $38, %r11 3074; AVX1-NEXT: sarq $63, %r11 3075; AVX1-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0 3076; AVX1-NEXT: movsbq %al, %r11 3077; AVX1-NEXT: shlq $37, %r9 3078; AVX1-NEXT: sarq $63, %r9 3079; AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 3080; AVX1-NEXT: movq %rax, %r9 3081; AVX1-NEXT: shlq $36, %rbx 3082; AVX1-NEXT: sarq $63, %rbx 3083; AVX1-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 3084; AVX1-NEXT: movq %rax, %rbx 3085; AVX1-NEXT: shlq $35, %r14 3086; AVX1-NEXT: sarq $63, %r14 3087; AVX1-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0 3088; AVX1-NEXT: movq %rax, %r14 3089; AVX1-NEXT: shlq $34, %r15 3090; AVX1-NEXT: sarq $63, %r15 3091; AVX1-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0 3092; AVX1-NEXT: movq %rax, %r15 3093; AVX1-NEXT: shlq $33, %r12 3094; AVX1-NEXT: sarq $63, %r12 3095; AVX1-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0 3096; AVX1-NEXT: movq %rax, %r12 3097; AVX1-NEXT: shrq $31, %rbp 3098; AVX1-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0 3099; AVX1-NEXT: movq %rax, %rbp 3100; AVX1-NEXT: shlq $63, %rdx 3101; AVX1-NEXT: sarq $63, %rdx 3102; AVX1-NEXT: vmovd %edx, %xmm1 3103; AVX1-NEXT: movq %rax, %rdx 3104; AVX1-NEXT: movswq %ax, %rax 3105; AVX1-NEXT: shlq $62, %r8 3106; AVX1-NEXT: sarq $63, %r8 3107; AVX1-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 3108; AVX1-NEXT: shlq $61, %rcx 3109; AVX1-NEXT: sarq $63, %rcx 3110; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 3111; AVX1-NEXT: shlq $60, %rdi 3112; AVX1-NEXT: sarq $63, %rdi 3113; AVX1-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1 3114; AVX1-NEXT: shlq $59, %r13 3115; AVX1-NEXT: sarq $63, %r13 3116; AVX1-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1 3117; AVX1-NEXT: shlq $58, %rsi 3118; AVX1-NEXT: sarq $63, %rsi 3119; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 3120; AVX1-NEXT: shlq $57, %r10 3121; AVX1-NEXT: sarq $63, %r10 3122; AVX1-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1 3123; AVX1-NEXT: shrq $7, %r11 3124; AVX1-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1 3125; AVX1-NEXT: shlq $55, %r9 3126; AVX1-NEXT: sarq $63, %r9 3127; AVX1-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1 3128; AVX1-NEXT: shlq $54, %rbx 3129; AVX1-NEXT: sarq $63, %rbx 3130; AVX1-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1 3131; AVX1-NEXT: shlq $53, %r14 3132; AVX1-NEXT: sarq $63, %r14 3133; AVX1-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 3134; AVX1-NEXT: shlq $52, %r15 3135; AVX1-NEXT: sarq $63, %r15 3136; AVX1-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1 3137; AVX1-NEXT: shlq $51, %r12 3138; AVX1-NEXT: sarq $63, %r12 3139; AVX1-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1 3140; AVX1-NEXT: shlq $50, %rbp 3141; AVX1-NEXT: sarq $63, %rbp 3142; AVX1-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1 3143; AVX1-NEXT: shlq $49, %rdx 3144; AVX1-NEXT: sarq $63, %rdx 3145; AVX1-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1 3146; AVX1-NEXT: shrq $15, %rax 3147; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 3148; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3149; AVX1-NEXT: popq %rbx 3150; AVX1-NEXT: popq %r12 3151; AVX1-NEXT: popq %r13 3152; AVX1-NEXT: popq %r14 3153; AVX1-NEXT: popq %r15 3154; AVX1-NEXT: popq %rbp 3155; AVX1-NEXT: retq 3156; 3157; AVX2-LABEL: load_sext_32i1_to_32i8: 3158; AVX2: # BB#0: # %entry 3159; AVX2-NEXT: pushq %rbp 3160; AVX2-NEXT: pushq %r15 3161; AVX2-NEXT: pushq %r14 3162; AVX2-NEXT: pushq %r13 3163; AVX2-NEXT: pushq %r12 3164; AVX2-NEXT: pushq %rbx 3165; AVX2-NEXT: movslq (%rdi), %rax 3166; AVX2-NEXT: movq %rax, %rcx 3167; AVX2-NEXT: shlq $47, %rcx 3168; AVX2-NEXT: sarq $63, %rcx 3169; AVX2-NEXT: vmovd %ecx, %xmm0 3170; AVX2-NEXT: movq %rax, %r8 3171; AVX2-NEXT: movq %rax, %rdx 3172; AVX2-NEXT: movq %rax, %rcx 3173; AVX2-NEXT: movq %rax, %rdi 3174; AVX2-NEXT: movq %rax, %r13 3175; AVX2-NEXT: movq %rax, %rsi 3176; AVX2-NEXT: movq %rax, %r10 3177; AVX2-NEXT: movq %rax, %r11 3178; AVX2-NEXT: movq %rax, %r9 3179; AVX2-NEXT: movq %rax, %rbx 3180; AVX2-NEXT: movq %rax, %r14 3181; AVX2-NEXT: movq %rax, %r15 3182; AVX2-NEXT: movq %rax, %r12 3183; AVX2-NEXT: movq %rax, %rbp 3184; AVX2-NEXT: shlq $46, %rbp 3185; AVX2-NEXT: sarq $63, %rbp 3186; AVX2-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0 3187; AVX2-NEXT: movq %rax, %rbp 3188; AVX2-NEXT: shlq $45, %r8 3189; AVX2-NEXT: sarq $63, %r8 3190; AVX2-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0 3191; AVX2-NEXT: movq %rax, %r8 3192; AVX2-NEXT: shlq $44, %rdx 3193; AVX2-NEXT: sarq $63, %rdx 3194; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 3195; AVX2-NEXT: movq %rax, %rdx 3196; AVX2-NEXT: shlq $43, %rcx 3197; AVX2-NEXT: sarq $63, %rcx 3198; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 3199; AVX2-NEXT: movq %rax, %rcx 3200; AVX2-NEXT: shlq $42, %rdi 3201; AVX2-NEXT: sarq $63, %rdi 3202; AVX2-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 3203; AVX2-NEXT: movq %rax, %rdi 3204; AVX2-NEXT: shlq $41, %r13 3205; AVX2-NEXT: sarq $63, %r13 3206; AVX2-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0 3207; AVX2-NEXT: movq %rax, %r13 3208; AVX2-NEXT: shlq $40, %rsi 3209; AVX2-NEXT: sarq $63, %rsi 3210; AVX2-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0 3211; AVX2-NEXT: movq %rax, %rsi 3212; AVX2-NEXT: shlq $39, %r10 3213; AVX2-NEXT: sarq $63, %r10 3214; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 3215; AVX2-NEXT: movq %rax, %r10 3216; AVX2-NEXT: shlq $38, %r11 3217; AVX2-NEXT: sarq $63, %r11 3218; AVX2-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0 3219; AVX2-NEXT: movsbq %al, %r11 3220; AVX2-NEXT: shlq $37, %r9 3221; AVX2-NEXT: sarq $63, %r9 3222; AVX2-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 3223; AVX2-NEXT: movq %rax, %r9 3224; AVX2-NEXT: shlq $36, %rbx 3225; AVX2-NEXT: sarq $63, %rbx 3226; AVX2-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 3227; AVX2-NEXT: movq %rax, %rbx 3228; AVX2-NEXT: shlq $35, %r14 3229; AVX2-NEXT: sarq $63, %r14 3230; AVX2-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0 3231; AVX2-NEXT: movq %rax, %r14 3232; AVX2-NEXT: shlq $34, %r15 3233; AVX2-NEXT: sarq $63, %r15 3234; AVX2-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0 3235; AVX2-NEXT: movq %rax, %r15 3236; AVX2-NEXT: shlq $33, %r12 3237; AVX2-NEXT: sarq $63, %r12 3238; AVX2-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0 3239; AVX2-NEXT: movq %rax, %r12 3240; AVX2-NEXT: shrq $31, %rbp 3241; AVX2-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0 3242; AVX2-NEXT: movq %rax, %rbp 3243; AVX2-NEXT: shlq $63, %rdx 3244; AVX2-NEXT: sarq $63, %rdx 3245; AVX2-NEXT: vmovd %edx, %xmm1 3246; AVX2-NEXT: movq %rax, %rdx 3247; AVX2-NEXT: movswq %ax, %rax 3248; AVX2-NEXT: shlq $62, %r8 3249; AVX2-NEXT: sarq $63, %r8 3250; AVX2-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 3251; AVX2-NEXT: shlq $61, %rcx 3252; AVX2-NEXT: sarq $63, %rcx 3253; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 3254; AVX2-NEXT: shlq $60, %rdi 3255; AVX2-NEXT: sarq $63, %rdi 3256; AVX2-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1 3257; AVX2-NEXT: shlq $59, %r13 3258; AVX2-NEXT: sarq $63, %r13 3259; AVX2-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1 3260; AVX2-NEXT: shlq $58, %rsi 3261; AVX2-NEXT: sarq $63, %rsi 3262; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 3263; AVX2-NEXT: shlq $57, %r10 3264; AVX2-NEXT: sarq $63, %r10 3265; AVX2-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1 3266; AVX2-NEXT: shrq $7, %r11 3267; AVX2-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1 3268; AVX2-NEXT: shlq $55, %r9 3269; AVX2-NEXT: sarq $63, %r9 3270; AVX2-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1 3271; AVX2-NEXT: shlq $54, %rbx 3272; AVX2-NEXT: sarq $63, %rbx 3273; AVX2-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1 3274; AVX2-NEXT: shlq $53, %r14 3275; AVX2-NEXT: sarq $63, %r14 3276; AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 3277; AVX2-NEXT: shlq $52, %r15 3278; AVX2-NEXT: sarq $63, %r15 3279; AVX2-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1 3280; AVX2-NEXT: shlq $51, %r12 3281; AVX2-NEXT: sarq $63, %r12 3282; AVX2-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1 3283; AVX2-NEXT: shlq $50, %rbp 3284; AVX2-NEXT: sarq $63, %rbp 3285; AVX2-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1 3286; AVX2-NEXT: shlq $49, %rdx 3287; AVX2-NEXT: sarq $63, %rdx 3288; AVX2-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1 3289; AVX2-NEXT: shrq $15, %rax 3290; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 3291; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 3292; AVX2-NEXT: popq %rbx 3293; AVX2-NEXT: popq %r12 3294; AVX2-NEXT: popq %r13 3295; AVX2-NEXT: popq %r14 3296; AVX2-NEXT: popq %r15 3297; AVX2-NEXT: popq %rbp 3298; AVX2-NEXT: retq 3299; 3300; X32-SSE41-LABEL: load_sext_32i1_to_32i8: 3301; X32-SSE41: # BB#0: # %entry 3302; X32-SSE41-NEXT: pushl %esi 3303; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3304; X32-SSE41-NEXT: movswl (%eax), %ecx 3305; X32-SSE41-NEXT: movl %ecx, %edx 3306; X32-SSE41-NEXT: shll $30, %edx 3307; X32-SSE41-NEXT: sarl $31, %edx 3308; X32-SSE41-NEXT: movl %ecx, %esi 3309; X32-SSE41-NEXT: shll $31, %esi 3310; X32-SSE41-NEXT: sarl $31, %esi 3311; X32-SSE41-NEXT: movd %esi, %xmm0 3312; X32-SSE41-NEXT: pinsrb $1, %edx, %xmm0 3313; X32-SSE41-NEXT: movl %ecx, %edx 3314; X32-SSE41-NEXT: shll $29, %edx 3315; X32-SSE41-NEXT: sarl $31, %edx 3316; X32-SSE41-NEXT: pinsrb $2, %edx, %xmm0 3317; X32-SSE41-NEXT: movl %ecx, %edx 3318; X32-SSE41-NEXT: shll $28, %edx 3319; X32-SSE41-NEXT: sarl $31, %edx 3320; X32-SSE41-NEXT: pinsrb $3, %edx, %xmm0 3321; X32-SSE41-NEXT: movl %ecx, %edx 3322; X32-SSE41-NEXT: shll $27, %edx 3323; X32-SSE41-NEXT: sarl $31, %edx 3324; X32-SSE41-NEXT: pinsrb $4, %edx, %xmm0 3325; X32-SSE41-NEXT: movl %ecx, %edx 3326; X32-SSE41-NEXT: shll $26, %edx 3327; X32-SSE41-NEXT: sarl $31, %edx 3328; X32-SSE41-NEXT: pinsrb $5, %edx, %xmm0 3329; X32-SSE41-NEXT: movl %ecx, %edx 3330; X32-SSE41-NEXT: shll $25, %edx 3331; X32-SSE41-NEXT: sarl $31, %edx 3332; X32-SSE41-NEXT: pinsrb $6, %edx, %xmm0 3333; X32-SSE41-NEXT: movsbl %cl, %edx 3334; X32-SSE41-NEXT: shrl $7, %edx 3335; X32-SSE41-NEXT: pinsrb $7, %edx, %xmm0 3336; X32-SSE41-NEXT: movl %ecx, %edx 3337; X32-SSE41-NEXT: shll $23, %edx 3338; X32-SSE41-NEXT: sarl $31, %edx 3339; X32-SSE41-NEXT: pinsrb $8, %edx, %xmm0 3340; X32-SSE41-NEXT: movl %ecx, %edx 3341; X32-SSE41-NEXT: shll $22, %edx 3342; X32-SSE41-NEXT: sarl $31, %edx 3343; X32-SSE41-NEXT: pinsrb $9, %edx, %xmm0 3344; X32-SSE41-NEXT: movl %ecx, %edx 3345; X32-SSE41-NEXT: shll $21, %edx 3346; X32-SSE41-NEXT: sarl $31, %edx 3347; X32-SSE41-NEXT: pinsrb $10, %edx, %xmm0 3348; X32-SSE41-NEXT: movl %ecx, %edx 3349; X32-SSE41-NEXT: shll $20, %edx 3350; X32-SSE41-NEXT: sarl $31, %edx 3351; X32-SSE41-NEXT: pinsrb $11, %edx, %xmm0 3352; X32-SSE41-NEXT: movl %ecx, %edx 3353; X32-SSE41-NEXT: shll $19, %edx 3354; X32-SSE41-NEXT: sarl $31, %edx 3355; X32-SSE41-NEXT: pinsrb $12, %edx, %xmm0 3356; X32-SSE41-NEXT: movl %ecx, %edx 3357; X32-SSE41-NEXT: shll $18, %edx 3358; X32-SSE41-NEXT: sarl $31, %edx 3359; X32-SSE41-NEXT: pinsrb $13, %edx, %xmm0 3360; X32-SSE41-NEXT: movl %ecx, %edx 3361; X32-SSE41-NEXT: shll $17, %edx 3362; X32-SSE41-NEXT: sarl $31, %edx 3363; X32-SSE41-NEXT: pinsrb $14, %edx, %xmm0 3364; X32-SSE41-NEXT: shrl $15, %ecx 3365; X32-SSE41-NEXT: pinsrb $15, %ecx, %xmm0 3366; X32-SSE41-NEXT: movswl 2(%eax), %eax 3367; X32-SSE41-NEXT: movl %eax, %ecx 3368; X32-SSE41-NEXT: shll $30, %ecx 3369; X32-SSE41-NEXT: sarl $31, %ecx 3370; X32-SSE41-NEXT: movl %eax, %edx 3371; X32-SSE41-NEXT: shll $31, %edx 3372; X32-SSE41-NEXT: sarl $31, %edx 3373; X32-SSE41-NEXT: movd %edx, %xmm1 3374; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3375; X32-SSE41-NEXT: movl %eax, %ecx 3376; X32-SSE41-NEXT: shll $29, %ecx 3377; X32-SSE41-NEXT: sarl $31, %ecx 3378; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3379; X32-SSE41-NEXT: movl %eax, %ecx 3380; X32-SSE41-NEXT: shll $28, %ecx 3381; X32-SSE41-NEXT: sarl $31, %ecx 3382; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3383; X32-SSE41-NEXT: movl %eax, %ecx 3384; X32-SSE41-NEXT: shll $27, %ecx 3385; X32-SSE41-NEXT: sarl $31, %ecx 3386; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3387; X32-SSE41-NEXT: movl %eax, %ecx 3388; X32-SSE41-NEXT: shll $26, %ecx 3389; X32-SSE41-NEXT: sarl $31, %ecx 3390; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3391; X32-SSE41-NEXT: movl %eax, %ecx 3392; X32-SSE41-NEXT: shll $25, %ecx 3393; X32-SSE41-NEXT: sarl $31, %ecx 3394; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3395; X32-SSE41-NEXT: movsbl %al, %ecx 3396; X32-SSE41-NEXT: shrl $7, %ecx 3397; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3398; X32-SSE41-NEXT: movl %eax, %ecx 3399; X32-SSE41-NEXT: shll $23, %ecx 3400; X32-SSE41-NEXT: sarl $31, %ecx 3401; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3402; X32-SSE41-NEXT: movl %eax, %ecx 3403; X32-SSE41-NEXT: shll $22, %ecx 3404; X32-SSE41-NEXT: sarl $31, %ecx 3405; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3406; X32-SSE41-NEXT: movl %eax, %ecx 3407; X32-SSE41-NEXT: shll $21, %ecx 3408; X32-SSE41-NEXT: sarl $31, %ecx 3409; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3410; X32-SSE41-NEXT: movl %eax, %ecx 3411; X32-SSE41-NEXT: shll $20, %ecx 3412; X32-SSE41-NEXT: sarl $31, %ecx 3413; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3414; X32-SSE41-NEXT: movl %eax, %ecx 3415; X32-SSE41-NEXT: shll $19, %ecx 3416; X32-SSE41-NEXT: sarl $31, %ecx 3417; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3418; X32-SSE41-NEXT: movl %eax, %ecx 3419; X32-SSE41-NEXT: shll $18, %ecx 3420; X32-SSE41-NEXT: sarl $31, %ecx 3421; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3422; X32-SSE41-NEXT: movl %eax, %ecx 3423; X32-SSE41-NEXT: shll $17, %ecx 3424; X32-SSE41-NEXT: sarl $31, %ecx 3425; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3426; X32-SSE41-NEXT: shrl $15, %eax 3427; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1 3428; X32-SSE41-NEXT: popl %esi 3429; X32-SSE41-NEXT: retl 3430entry: 3431 %X = load <32 x i1>, <32 x i1>* %ptr 3432 %Y = sext <32 x i1> %X to <32 x i8> 3433 ret <32 x i8> %Y 3434} 3435 3436define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) { 3437; SSE2-LABEL: load_sext_16i8_to_16i16: 3438; SSE2: # BB#0: # %entry 3439; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3440; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3441; SSE2-NEXT: psraw $8, %xmm0 3442; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 3443; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3444; SSE2-NEXT: psraw $8, %xmm1 3445; SSE2-NEXT: retq 3446; 3447; SSSE3-LABEL: load_sext_16i8_to_16i16: 3448; SSSE3: # BB#0: # %entry 3449; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3450; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3451; SSSE3-NEXT: psraw $8, %xmm0 3452; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 3453; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3454; SSSE3-NEXT: psraw $8, %xmm1 3455; SSSE3-NEXT: retq 3456; 3457; SSE41-LABEL: load_sext_16i8_to_16i16: 3458; SSE41: # BB#0: # %entry 3459; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 3460; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1 3461; SSE41-NEXT: retq 3462; 3463; AVX1-LABEL: load_sext_16i8_to_16i16: 3464; AVX1: # BB#0: # %entry 3465; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0 3466; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm1 3467; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3468; AVX1-NEXT: retq 3469; 3470; AVX2-LABEL: load_sext_16i8_to_16i16: 3471; AVX2: # BB#0: # %entry 3472; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0 3473; AVX2-NEXT: retq 3474; 3475; X32-SSE41-LABEL: load_sext_16i8_to_16i16: 3476; X32-SSE41: # BB#0: # %entry 3477; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3478; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 3479; X32-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1 3480; X32-SSE41-NEXT: retl 3481entry: 3482 %X = load <16 x i8>, <16 x i8>* %ptr 3483 %Y = sext <16 x i8> %X to <16 x i16> 3484 ret <16 x i16> %Y 3485} 3486 3487define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { 3488; SSE2-LABEL: load_sext_2i16_to_2i64: 3489; SSE2: # BB#0: # %entry 3490; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3491; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3492; SSE2-NEXT: movdqa %xmm0, %xmm1 3493; SSE2-NEXT: psrad $31, %xmm1 3494; SSE2-NEXT: psrad $16, %xmm0 3495; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3496; SSE2-NEXT: retq 3497; 3498; SSSE3-LABEL: load_sext_2i16_to_2i64: 3499; SSSE3: # BB#0: # %entry 3500; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3501; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3502; SSSE3-NEXT: movdqa %xmm0, %xmm1 3503; SSSE3-NEXT: psrad $31, %xmm1 3504; SSSE3-NEXT: psrad $16, %xmm0 3505; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3506; SSSE3-NEXT: retq 3507; 3508; SSE41-LABEL: load_sext_2i16_to_2i64: 3509; SSE41: # BB#0: # %entry 3510; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 3511; SSE41-NEXT: retq 3512; 3513; AVX-LABEL: load_sext_2i16_to_2i64: 3514; AVX: # BB#0: # %entry 3515; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 3516; AVX-NEXT: retq 3517; 3518; X32-SSE41-LABEL: load_sext_2i16_to_2i64: 3519; X32-SSE41: # BB#0: # %entry 3520; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3521; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 3522; X32-SSE41-NEXT: retl 3523entry: 3524 %X = load <2 x i16>, <2 x i16>* %ptr 3525 %Y = sext <2 x i16> %X to <2 x i64> 3526 ret <2 x i64> %Y 3527} 3528 3529define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) { 3530; SSE2-LABEL: load_sext_4i16_to_4i32: 3531; SSE2: # BB#0: # %entry 3532; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3533; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3534; SSE2-NEXT: psrad $16, %xmm0 3535; SSE2-NEXT: retq 3536; 3537; SSSE3-LABEL: load_sext_4i16_to_4i32: 3538; SSSE3: # BB#0: # %entry 3539; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3540; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3541; SSSE3-NEXT: psrad $16, %xmm0 3542; SSSE3-NEXT: retq 3543; 3544; SSE41-LABEL: load_sext_4i16_to_4i32: 3545; SSE41: # BB#0: # %entry 3546; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 3547; SSE41-NEXT: retq 3548; 3549; AVX-LABEL: load_sext_4i16_to_4i32: 3550; AVX: # BB#0: # %entry 3551; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 3552; AVX-NEXT: retq 3553; 3554; X32-SSE41-LABEL: load_sext_4i16_to_4i32: 3555; X32-SSE41: # BB#0: # %entry 3556; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3557; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 3558; X32-SSE41-NEXT: retl 3559entry: 3560 %X = load <4 x i16>, <4 x i16>* %ptr 3561 %Y = sext <4 x i16> %X to <4 x i32> 3562 ret <4 x i32> %Y 3563} 3564 3565define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) { 3566; SSE2-LABEL: load_sext_4i16_to_4i64: 3567; SSE2: # BB#0: # %entry 3568; SSE2-NEXT: movswq 2(%rdi), %rax 3569; SSE2-NEXT: movd %rax, %xmm1 3570; SSE2-NEXT: movswq (%rdi), %rax 3571; SSE2-NEXT: movd %rax, %xmm0 3572; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3573; SSE2-NEXT: movswq 6(%rdi), %rax 3574; SSE2-NEXT: movd %rax, %xmm2 3575; SSE2-NEXT: movswq 4(%rdi), %rax 3576; SSE2-NEXT: movd %rax, %xmm1 3577; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3578; SSE2-NEXT: retq 3579; 3580; SSSE3-LABEL: load_sext_4i16_to_4i64: 3581; SSSE3: # BB#0: # %entry 3582; SSSE3-NEXT: movswq 2(%rdi), %rax 3583; SSSE3-NEXT: movd %rax, %xmm1 3584; SSSE3-NEXT: movswq (%rdi), %rax 3585; SSSE3-NEXT: movd %rax, %xmm0 3586; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3587; SSSE3-NEXT: movswq 6(%rdi), %rax 3588; SSSE3-NEXT: movd %rax, %xmm2 3589; SSSE3-NEXT: movswq 4(%rdi), %rax 3590; SSSE3-NEXT: movd %rax, %xmm1 3591; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3592; SSSE3-NEXT: retq 3593; 3594; SSE41-LABEL: load_sext_4i16_to_4i64: 3595; SSE41: # BB#0: # %entry 3596; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 3597; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1 3598; SSE41-NEXT: retq 3599; 3600; AVX1-LABEL: load_sext_4i16_to_4i64: 3601; AVX1: # BB#0: # %entry 3602; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0 3603; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 3604; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 3605; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 3606; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3607; AVX1-NEXT: retq 3608; 3609; AVX2-LABEL: load_sext_4i16_to_4i64: 3610; AVX2: # BB#0: # %entry 3611; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0 3612; AVX2-NEXT: retq 3613; 3614; X32-SSE41-LABEL: load_sext_4i16_to_4i64: 3615; X32-SSE41: # BB#0: # %entry 3616; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3617; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 3618; X32-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1 3619; X32-SSE41-NEXT: retl 3620entry: 3621 %X = load <4 x i16>, <4 x i16>* %ptr 3622 %Y = sext <4 x i16> %X to <4 x i64> 3623 ret <4 x i64> %Y 3624} 3625 3626define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) { 3627; SSE2-LABEL: load_sext_8i16_to_8i32: 3628; SSE2: # BB#0: # %entry 3629; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3630; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3631; SSE2-NEXT: psrad $16, %xmm0 3632; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 3633; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 3634; SSE2-NEXT: psrad $16, %xmm1 3635; SSE2-NEXT: retq 3636; 3637; SSSE3-LABEL: load_sext_8i16_to_8i32: 3638; SSSE3: # BB#0: # %entry 3639; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3640; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3641; SSSE3-NEXT: psrad $16, %xmm0 3642; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 3643; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 3644; SSSE3-NEXT: psrad $16, %xmm1 3645; SSSE3-NEXT: retq 3646; 3647; SSE41-LABEL: load_sext_8i16_to_8i32: 3648; SSE41: # BB#0: # %entry 3649; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 3650; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 3651; SSE41-NEXT: retq 3652; 3653; AVX1-LABEL: load_sext_8i16_to_8i32: 3654; AVX1: # BB#0: # %entry 3655; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0 3656; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1 3657; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3658; AVX1-NEXT: retq 3659; 3660; AVX2-LABEL: load_sext_8i16_to_8i32: 3661; AVX2: # BB#0: # %entry 3662; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 3663; AVX2-NEXT: retq 3664; 3665; X32-SSE41-LABEL: load_sext_8i16_to_8i32: 3666; X32-SSE41: # BB#0: # %entry 3667; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3668; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 3669; X32-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1 3670; X32-SSE41-NEXT: retl 3671entry: 3672 %X = load <8 x i16>, <8 x i16>* %ptr 3673 %Y = sext <8 x i16> %X to <8 x i32> 3674 ret <8 x i32> %Y 3675} 3676 3677define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) { 3678; SSE2-LABEL: load_sext_2i32_to_2i64: 3679; SSE2: # BB#0: # %entry 3680; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3681; SSE2-NEXT: movdqa %xmm0, %xmm1 3682; SSE2-NEXT: psrad $31, %xmm1 3683; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3684; SSE2-NEXT: retq 3685; 3686; SSSE3-LABEL: load_sext_2i32_to_2i64: 3687; SSSE3: # BB#0: # %entry 3688; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3689; SSSE3-NEXT: movdqa %xmm0, %xmm1 3690; SSSE3-NEXT: psrad $31, %xmm1 3691; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3692; SSSE3-NEXT: retq 3693; 3694; SSE41-LABEL: load_sext_2i32_to_2i64: 3695; SSE41: # BB#0: # %entry 3696; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 3697; SSE41-NEXT: retq 3698; 3699; AVX-LABEL: load_sext_2i32_to_2i64: 3700; AVX: # BB#0: # %entry 3701; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 3702; AVX-NEXT: retq 3703; 3704; X32-SSE41-LABEL: load_sext_2i32_to_2i64: 3705; X32-SSE41: # BB#0: # %entry 3706; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3707; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 3708; X32-SSE41-NEXT: retl 3709entry: 3710 %X = load <2 x i32>, <2 x i32>* %ptr 3711 %Y = sext <2 x i32> %X to <2 x i64> 3712 ret <2 x i64> %Y 3713} 3714 3715define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) { 3716; SSE2-LABEL: load_sext_4i32_to_4i64: 3717; SSE2: # BB#0: # %entry 3718; SSE2-NEXT: movdqa (%rdi), %xmm0 3719; SSE2-NEXT: movdqa %xmm0, %xmm2 3720; SSE2-NEXT: psrad $31, %xmm2 3721; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 3722; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3723; SSE2-NEXT: movdqa %xmm1, %xmm2 3724; SSE2-NEXT: psrad $31, %xmm2 3725; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3726; SSE2-NEXT: retq 3727; 3728; SSSE3-LABEL: load_sext_4i32_to_4i64: 3729; SSSE3: # BB#0: # %entry 3730; SSSE3-NEXT: movdqa (%rdi), %xmm0 3731; SSSE3-NEXT: movdqa %xmm0, %xmm2 3732; SSSE3-NEXT: psrad $31, %xmm2 3733; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 3734; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3735; SSSE3-NEXT: movdqa %xmm1, %xmm2 3736; SSSE3-NEXT: psrad $31, %xmm2 3737; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3738; SSSE3-NEXT: retq 3739; 3740; SSE41-LABEL: load_sext_4i32_to_4i64: 3741; SSE41: # BB#0: # %entry 3742; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 3743; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1 3744; SSE41-NEXT: retq 3745; 3746; AVX1-LABEL: load_sext_4i32_to_4i64: 3747; AVX1: # BB#0: # %entry 3748; AVX1-NEXT: vpmovsxdq (%rdi), %xmm0 3749; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm1 3750; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3751; AVX1-NEXT: retq 3752; 3753; AVX2-LABEL: load_sext_4i32_to_4i64: 3754; AVX2: # BB#0: # %entry 3755; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0 3756; AVX2-NEXT: retq 3757; 3758; X32-SSE41-LABEL: load_sext_4i32_to_4i64: 3759; X32-SSE41: # BB#0: # %entry 3760; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3761; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 3762; X32-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1 3763; X32-SSE41-NEXT: retl 3764entry: 3765 %X = load <4 x i32>, <4 x i32>* %ptr 3766 %Y = sext <4 x i32> %X to <4 x i64> 3767 ret <4 x i64> %Y 3768} 3769 3770define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { 3771; SSE2-LABEL: sext_2i8_to_i32: 3772; SSE2: # BB#0: # %entry 3773; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3774; SSE2-NEXT: psraw $8, %xmm0 3775; SSE2-NEXT: movd %xmm0, %eax 3776; SSE2-NEXT: retq 3777; 3778; SSSE3-LABEL: sext_2i8_to_i32: 3779; SSSE3: # BB#0: # %entry 3780; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3781; SSSE3-NEXT: psraw $8, %xmm0 3782; SSSE3-NEXT: movd %xmm0, %eax 3783; SSSE3-NEXT: retq 3784; 3785; SSE41-LABEL: sext_2i8_to_i32: 3786; SSE41: # BB#0: # %entry 3787; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 3788; SSE41-NEXT: movd %xmm0, %eax 3789; SSE41-NEXT: retq 3790; 3791; AVX-LABEL: sext_2i8_to_i32: 3792; AVX: # BB#0: # %entry 3793; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 3794; AVX-NEXT: vmovd %xmm0, %eax 3795; AVX-NEXT: retq 3796; 3797; X32-SSE41-LABEL: sext_2i8_to_i32: 3798; X32-SSE41: # BB#0: # %entry 3799; X32-SSE41-NEXT: pushl %eax 3800; X32-SSE41-NEXT: .Ltmp0: 3801; X32-SSE41-NEXT: .cfi_def_cfa_offset 8 3802; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 3803; X32-SSE41-NEXT: movd %xmm0, %eax 3804; X32-SSE41-NEXT: popl %ecx 3805; X32-SSE41-NEXT: retl 3806entry: 3807 %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 3808 %Ex = sext <2 x i8> %Shuf to <2 x i16> 3809 %Bc = bitcast <2 x i16> %Ex to i32 3810 ret i32 %Bc 3811} 3812 3813define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { 3814; SSE2-LABEL: sext_4i1_to_4i64: 3815; SSE2: # BB#0: 3816; SSE2-NEXT: pslld $31, %xmm0 3817; SSE2-NEXT: psrad $31, %xmm0 3818; SSE2-NEXT: movdqa %xmm0, %xmm2 3819; SSE2-NEXT: psrad $31, %xmm2 3820; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 3821; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3822; SSE2-NEXT: movdqa %xmm1, %xmm2 3823; SSE2-NEXT: psrad $31, %xmm2 3824; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3825; SSE2-NEXT: retq 3826; 3827; SSSE3-LABEL: sext_4i1_to_4i64: 3828; SSSE3: # BB#0: 3829; SSSE3-NEXT: pslld $31, %xmm0 3830; SSSE3-NEXT: psrad $31, %xmm0 3831; SSSE3-NEXT: movdqa %xmm0, %xmm2 3832; SSSE3-NEXT: psrad $31, %xmm2 3833; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 3834; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3835; SSSE3-NEXT: movdqa %xmm1, %xmm2 3836; SSSE3-NEXT: psrad $31, %xmm2 3837; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3838; SSSE3-NEXT: retq 3839; 3840; SSE41-LABEL: sext_4i1_to_4i64: 3841; SSE41: # BB#0: 3842; SSE41-NEXT: pslld $31, %xmm0 3843; SSE41-NEXT: psrad $31, %xmm0 3844; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3845; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 3846; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3847; SSE41-NEXT: movdqa %xmm2, %xmm0 3848; SSE41-NEXT: retq 3849; 3850; AVX1-LABEL: sext_4i1_to_4i64: 3851; AVX1: # BB#0: 3852; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 3853; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 3854; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 3855; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 3856; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 3857; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3858; AVX1-NEXT: retq 3859; 3860; AVX2-LABEL: sext_4i1_to_4i64: 3861; AVX2: # BB#0: 3862; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 3863; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 3864; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 3865; AVX2-NEXT: retq 3866; 3867; X32-SSE41-LABEL: sext_4i1_to_4i64: 3868; X32-SSE41: # BB#0: 3869; X32-SSE41-NEXT: pslld $31, %xmm0 3870; X32-SSE41-NEXT: psrad $31, %xmm0 3871; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3872; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 3873; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3874; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 3875; X32-SSE41-NEXT: retl 3876 %extmask = sext <4 x i1> %mask to <4 x i64> 3877 ret <4 x i64> %extmask 3878} 3879 3880define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { 3881; SSE2-LABEL: sext_4i8_to_4i64: 3882; SSE2: # BB#0: 3883; SSE2-NEXT: pslld $24, %xmm0 3884; SSE2-NEXT: psrad $24, %xmm0 3885; SSE2-NEXT: movdqa %xmm0, %xmm2 3886; SSE2-NEXT: psrad $31, %xmm2 3887; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 3888; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3889; SSE2-NEXT: movdqa %xmm1, %xmm2 3890; SSE2-NEXT: psrad $31, %xmm2 3891; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3892; SSE2-NEXT: retq 3893; 3894; SSSE3-LABEL: sext_4i8_to_4i64: 3895; SSSE3: # BB#0: 3896; SSSE3-NEXT: pslld $24, %xmm0 3897; SSSE3-NEXT: psrad $24, %xmm0 3898; SSSE3-NEXT: movdqa %xmm0, %xmm2 3899; SSSE3-NEXT: psrad $31, %xmm2 3900; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 3901; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3902; SSSE3-NEXT: movdqa %xmm1, %xmm2 3903; SSSE3-NEXT: psrad $31, %xmm2 3904; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3905; SSSE3-NEXT: retq 3906; 3907; SSE41-LABEL: sext_4i8_to_4i64: 3908; SSE41: # BB#0: 3909; SSE41-NEXT: pslld $24, %xmm0 3910; SSE41-NEXT: psrad $24, %xmm0 3911; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3912; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 3913; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3914; SSE41-NEXT: movdqa %xmm2, %xmm0 3915; SSE41-NEXT: retq 3916; 3917; AVX1-LABEL: sext_4i8_to_4i64: 3918; AVX1: # BB#0: 3919; AVX1-NEXT: vpslld $24, %xmm0, %xmm0 3920; AVX1-NEXT: vpsrad $24, %xmm0, %xmm0 3921; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 3922; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 3923; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 3924; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3925; AVX1-NEXT: retq 3926; 3927; AVX2-LABEL: sext_4i8_to_4i64: 3928; AVX2: # BB#0: 3929; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 3930; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 3931; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 3932; AVX2-NEXT: retq 3933; 3934; X32-SSE41-LABEL: sext_4i8_to_4i64: 3935; X32-SSE41: # BB#0: 3936; X32-SSE41-NEXT: pslld $24, %xmm0 3937; X32-SSE41-NEXT: psrad $24, %xmm0 3938; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3939; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 3940; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3941; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 3942; X32-SSE41-NEXT: retl 3943 %extmask = sext <4 x i8> %mask to <4 x i64> 3944 ret <4 x i64> %extmask 3945} 3946