1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2 6; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL 8; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ 9; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ 10; 11; 32-bit tests to make sure we're not doing anything stupid. 12; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown 13; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse 14; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 15; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 16 17; 18; Signed Integer to Double 19; 20 21define <2 x float> @sitofp_2i32_to_2f32(<2 x i32> %a) { 22; SSE-LABEL: sitofp_2i32_to_2f32: 23; SSE: # %bb.0: 24; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 25; SSE-NEXT: retq 26; 27; AVX-LABEL: sitofp_2i32_to_2f32: 28; AVX: # %bb.0: 29; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 30; AVX-NEXT: retq 31 %cvt = sitofp <2 x i32> %a to <2 x float> 32 ret <2 x float> %cvt 33} 34 35define <2 x float> @uitofp_2i32_to_2f32(<2 x i32> %a) { 36; SSE2-LABEL: uitofp_2i32_to_2f32: 37; SSE2: # %bb.0: 38; SSE2-NEXT: xorpd %xmm1, %xmm1 39; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 40; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 41; SSE2-NEXT: orpd %xmm1, %xmm0 42; SSE2-NEXT: subpd %xmm1, %xmm0 43; SSE2-NEXT: cvtpd2ps %xmm0, %xmm0 44; SSE2-NEXT: retq 45; 46; SSE41-LABEL: uitofp_2i32_to_2f32: 47; SSE41: # %bb.0: 48; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 49; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 50; SSE41-NEXT: por %xmm1, %xmm0 51; SSE41-NEXT: subpd %xmm1, %xmm0 52; SSE41-NEXT: cvtpd2ps %xmm0, %xmm0 53; SSE41-NEXT: retq 54; 55; VEX-LABEL: uitofp_2i32_to_2f32: 56; VEX: # %bb.0: 57; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 58; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 59; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 60; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 61; VEX-NEXT: vcvtpd2ps %xmm0, %xmm0 62; VEX-NEXT: retq 63; 64; AVX512F-LABEL: uitofp_2i32_to_2f32: 65; AVX512F: # %bb.0: 66; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 67; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 68; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 69; AVX512F-NEXT: vzeroupper 70; AVX512F-NEXT: retq 71; 72; AVX512VL-LABEL: uitofp_2i32_to_2f32: 73; AVX512VL: # %bb.0: 74; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 75; AVX512VL-NEXT: retq 76; 77; AVX512DQ-LABEL: uitofp_2i32_to_2f32: 78; AVX512DQ: # %bb.0: 79; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 80; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 81; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 82; AVX512DQ-NEXT: vzeroupper 83; AVX512DQ-NEXT: retq 84; 85; AVX512VLDQ-LABEL: uitofp_2i32_to_2f32: 86; AVX512VLDQ: # %bb.0: 87; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 88; AVX512VLDQ-NEXT: retq 89 %cvt = uitofp <2 x i32> %a to <2 x float> 90 ret <2 x float> %cvt 91} 92 93define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { 94; SSE2-LABEL: sitofp_2i64_to_2f64: 95; SSE2: # %bb.0: 96; SSE2-NEXT: movq %xmm0, %rax 97; SSE2-NEXT: cvtsi2sd %rax, %xmm1 98; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 99; SSE2-NEXT: movq %xmm0, %rax 100; SSE2-NEXT: xorps %xmm0, %xmm0 101; SSE2-NEXT: cvtsi2sd %rax, %xmm0 102; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 103; SSE2-NEXT: movapd %xmm1, %xmm0 104; SSE2-NEXT: retq 105; 106; SSE41-LABEL: sitofp_2i64_to_2f64: 107; SSE41: # %bb.0: 108; SSE41-NEXT: pextrq $1, %xmm0, %rax 109; SSE41-NEXT: cvtsi2sd %rax, %xmm1 110; SSE41-NEXT: movq %xmm0, %rax 111; SSE41-NEXT: xorps %xmm0, %xmm0 112; SSE41-NEXT: cvtsi2sd %rax, %xmm0 113; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 114; SSE41-NEXT: retq 115; 116; VEX-LABEL: sitofp_2i64_to_2f64: 117; VEX: # %bb.0: 118; VEX-NEXT: vpextrq $1, %xmm0, %rax 119; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 120; VEX-NEXT: vmovq %xmm0, %rax 121; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 122; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 123; VEX-NEXT: retq 124; 125; AVX512F-LABEL: sitofp_2i64_to_2f64: 126; AVX512F: # %bb.0: 127; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 128; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 129; AVX512F-NEXT: vmovq %xmm0, %rax 130; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 131; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 132; AVX512F-NEXT: retq 133; 134; AVX512VL-LABEL: sitofp_2i64_to_2f64: 135; AVX512VL: # %bb.0: 136; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 137; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 138; AVX512VL-NEXT: vmovq %xmm0, %rax 139; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 140; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 141; AVX512VL-NEXT: retq 142; 143; AVX512DQ-LABEL: sitofp_2i64_to_2f64: 144; AVX512DQ: # %bb.0: 145; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 146; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 147; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 148; AVX512DQ-NEXT: vzeroupper 149; AVX512DQ-NEXT: retq 150; 151; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64: 152; AVX512VLDQ: # %bb.0: 153; AVX512VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0 154; AVX512VLDQ-NEXT: retq 155 %cvt = sitofp <2 x i64> %a to <2 x double> 156 ret <2 x double> %cvt 157} 158 159define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) { 160; SSE-LABEL: sitofp_2i32_to_2f64: 161; SSE: # %bb.0: 162; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 163; SSE-NEXT: retq 164; 165; AVX-LABEL: sitofp_2i32_to_2f64: 166; AVX: # %bb.0: 167; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 168; AVX-NEXT: retq 169 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 170 %cvt = sitofp <2 x i32> %shuf to <2 x double> 171 ret <2 x double> %cvt 172} 173 174define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) { 175; SSE-LABEL: sitofp_4i32_to_2f64: 176; SSE: # %bb.0: 177; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 178; SSE-NEXT: retq 179; 180; AVX-LABEL: sitofp_4i32_to_2f64: 181; AVX: # %bb.0: 182; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 183; AVX-NEXT: retq 184 %cvt = sitofp <4 x i32> %a to <4 x double> 185 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1> 186 ret <2 x double> %shuf 187} 188 189define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) { 190; SSE2-LABEL: sitofp_2i16_to_2f64: 191; SSE2: # %bb.0: 192; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 193; SSE2-NEXT: psrad $16, %xmm0 194; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 195; SSE2-NEXT: retq 196; 197; SSE41-LABEL: sitofp_2i16_to_2f64: 198; SSE41: # %bb.0: 199; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 200; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 201; SSE41-NEXT: retq 202; 203; AVX-LABEL: sitofp_2i16_to_2f64: 204; AVX: # %bb.0: 205; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 206; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 207; AVX-NEXT: retq 208 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 209 %cvt = sitofp <2 x i16> %shuf to <2 x double> 210 ret <2 x double> %cvt 211} 212 213define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) { 214; SSE2-LABEL: sitofp_8i16_to_2f64: 215; SSE2: # %bb.0: 216; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 217; SSE2-NEXT: psrad $16, %xmm0 218; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 219; SSE2-NEXT: retq 220; 221; SSE41-LABEL: sitofp_8i16_to_2f64: 222; SSE41: # %bb.0: 223; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 224; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 225; SSE41-NEXT: retq 226; 227; VEX-LABEL: sitofp_8i16_to_2f64: 228; VEX: # %bb.0: 229; VEX-NEXT: vpmovsxwd %xmm0, %xmm0 230; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 231; VEX-NEXT: retq 232; 233; AVX512-LABEL: sitofp_8i16_to_2f64: 234; AVX512: # %bb.0: 235; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 236; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 237; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 238; AVX512-NEXT: vzeroupper 239; AVX512-NEXT: retq 240 %cvt = sitofp <8 x i16> %a to <8 x double> 241 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1> 242 ret <2 x double> %shuf 243} 244 245define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) { 246; SSE2-LABEL: sitofp_2i8_to_2f64: 247; SSE2: # %bb.0: 248; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 249; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 250; SSE2-NEXT: psrad $24, %xmm0 251; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 252; SSE2-NEXT: retq 253; 254; SSE41-LABEL: sitofp_2i8_to_2f64: 255; SSE41: # %bb.0: 256; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 257; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 258; SSE41-NEXT: retq 259; 260; AVX-LABEL: sitofp_2i8_to_2f64: 261; AVX: # %bb.0: 262; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 263; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 264; AVX-NEXT: retq 265 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 266 %cvt = sitofp <2 x i8> %shuf to <2 x double> 267 ret <2 x double> %cvt 268} 269 270define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) { 271; SSE2-LABEL: sitofp_16i8_to_2f64: 272; SSE2: # %bb.0: 273; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 274; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 275; SSE2-NEXT: psrad $24, %xmm0 276; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 277; SSE2-NEXT: retq 278; 279; SSE41-LABEL: sitofp_16i8_to_2f64: 280; SSE41: # %bb.0: 281; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 282; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 283; SSE41-NEXT: retq 284; 285; VEX-LABEL: sitofp_16i8_to_2f64: 286; VEX: # %bb.0: 287; VEX-NEXT: vpmovsxbd %xmm0, %xmm0 288; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 289; VEX-NEXT: retq 290; 291; AVX512-LABEL: sitofp_16i8_to_2f64: 292; AVX512: # %bb.0: 293; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 294; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 295; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 296; AVX512-NEXT: vzeroupper 297; AVX512-NEXT: retq 298 %cvt = sitofp <16 x i8> %a to <16 x double> 299 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1> 300 ret <2 x double> %shuf 301} 302 303define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { 304; SSE2-LABEL: sitofp_4i64_to_4f64: 305; SSE2: # %bb.0: 306; SSE2-NEXT: movq %xmm0, %rax 307; SSE2-NEXT: cvtsi2sd %rax, %xmm2 308; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 309; SSE2-NEXT: movq %xmm0, %rax 310; SSE2-NEXT: xorps %xmm0, %xmm0 311; SSE2-NEXT: cvtsi2sd %rax, %xmm0 312; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 313; SSE2-NEXT: movq %xmm1, %rax 314; SSE2-NEXT: cvtsi2sd %rax, %xmm3 315; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 316; SSE2-NEXT: movq %xmm0, %rax 317; SSE2-NEXT: xorps %xmm0, %xmm0 318; SSE2-NEXT: cvtsi2sd %rax, %xmm0 319; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] 320; SSE2-NEXT: movapd %xmm2, %xmm0 321; SSE2-NEXT: movapd %xmm3, %xmm1 322; SSE2-NEXT: retq 323; 324; SSE41-LABEL: sitofp_4i64_to_4f64: 325; SSE41: # %bb.0: 326; SSE41-NEXT: pextrq $1, %xmm0, %rax 327; SSE41-NEXT: cvtsi2sd %rax, %xmm2 328; SSE41-NEXT: movq %xmm0, %rax 329; SSE41-NEXT: xorps %xmm0, %xmm0 330; SSE41-NEXT: cvtsi2sd %rax, %xmm0 331; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 332; SSE41-NEXT: pextrq $1, %xmm1, %rax 333; SSE41-NEXT: xorps %xmm2, %xmm2 334; SSE41-NEXT: cvtsi2sd %rax, %xmm2 335; SSE41-NEXT: movq %xmm1, %rax 336; SSE41-NEXT: xorps %xmm1, %xmm1 337; SSE41-NEXT: cvtsi2sd %rax, %xmm1 338; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 339; SSE41-NEXT: retq 340; 341; AVX1-LABEL: sitofp_4i64_to_4f64: 342; AVX1: # %bb.0: 343; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 344; AVX1-NEXT: vpextrq $1, %xmm1, %rax 345; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 346; AVX1-NEXT: vmovq %xmm1, %rax 347; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 348; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 349; AVX1-NEXT: vpextrq $1, %xmm0, %rax 350; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 351; AVX1-NEXT: vmovq %xmm0, %rax 352; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 353; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 354; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 355; AVX1-NEXT: retq 356; 357; AVX2-LABEL: sitofp_4i64_to_4f64: 358; AVX2: # %bb.0: 359; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 360; AVX2-NEXT: vpextrq $1, %xmm1, %rax 361; AVX2-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 362; AVX2-NEXT: vmovq %xmm1, %rax 363; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 364; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 365; AVX2-NEXT: vpextrq $1, %xmm0, %rax 366; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 367; AVX2-NEXT: vmovq %xmm0, %rax 368; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 369; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 370; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 371; AVX2-NEXT: retq 372; 373; AVX512F-LABEL: sitofp_4i64_to_4f64: 374; AVX512F: # %bb.0: 375; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 376; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 377; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 378; AVX512F-NEXT: vmovq %xmm1, %rax 379; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 380; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 381; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 382; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 383; AVX512F-NEXT: vmovq %xmm0, %rax 384; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 385; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 386; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 387; AVX512F-NEXT: retq 388; 389; AVX512VL-LABEL: sitofp_4i64_to_4f64: 390; AVX512VL: # %bb.0: 391; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 392; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax 393; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 394; AVX512VL-NEXT: vmovq %xmm1, %rax 395; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 396; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 397; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 398; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 399; AVX512VL-NEXT: vmovq %xmm0, %rax 400; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 401; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 402; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 403; AVX512VL-NEXT: retq 404; 405; AVX512DQ-LABEL: sitofp_4i64_to_4f64: 406; AVX512DQ: # %bb.0: 407; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 408; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 409; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 410; AVX512DQ-NEXT: retq 411; 412; AVX512VLDQ-LABEL: sitofp_4i64_to_4f64: 413; AVX512VLDQ: # %bb.0: 414; AVX512VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0 415; AVX512VLDQ-NEXT: retq 416 %cvt = sitofp <4 x i64> %a to <4 x double> 417 ret <4 x double> %cvt 418} 419 420define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) { 421; SSE-LABEL: sitofp_4i32_to_4f64: 422; SSE: # %bb.0: 423; SSE-NEXT: cvtdq2pd %xmm0, %xmm2 424; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 425; SSE-NEXT: cvtdq2pd %xmm0, %xmm1 426; SSE-NEXT: movaps %xmm2, %xmm0 427; SSE-NEXT: retq 428; 429; AVX-LABEL: sitofp_4i32_to_4f64: 430; AVX: # %bb.0: 431; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 432; AVX-NEXT: retq 433 %cvt = sitofp <4 x i32> %a to <4 x double> 434 ret <4 x double> %cvt 435} 436 437define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) { 438; SSE2-LABEL: sitofp_4i16_to_4f64: 439; SSE2: # %bb.0: 440; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 441; SSE2-NEXT: psrad $16, %xmm1 442; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 443; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 444; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 445; SSE2-NEXT: retq 446; 447; SSE41-LABEL: sitofp_4i16_to_4f64: 448; SSE41: # %bb.0: 449; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 450; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 451; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 452; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 453; SSE41-NEXT: retq 454; 455; AVX-LABEL: sitofp_4i16_to_4f64: 456; AVX: # %bb.0: 457; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 458; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 459; AVX-NEXT: retq 460 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 461 %cvt = sitofp <4 x i16> %shuf to <4 x double> 462 ret <4 x double> %cvt 463} 464 465define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) { 466; SSE2-LABEL: sitofp_8i16_to_4f64: 467; SSE2: # %bb.0: 468; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 469; SSE2-NEXT: psrad $16, %xmm1 470; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 471; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 472; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 473; SSE2-NEXT: retq 474; 475; SSE41-LABEL: sitofp_8i16_to_4f64: 476; SSE41: # %bb.0: 477; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 478; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 479; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 480; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 481; SSE41-NEXT: retq 482; 483; VEX-LABEL: sitofp_8i16_to_4f64: 484; VEX: # %bb.0: 485; VEX-NEXT: vpmovsxwd %xmm0, %xmm0 486; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 487; VEX-NEXT: retq 488; 489; AVX512-LABEL: sitofp_8i16_to_4f64: 490; AVX512: # %bb.0: 491; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 492; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 493; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 494; AVX512-NEXT: retq 495 %cvt = sitofp <8 x i16> %a to <8 x double> 496 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 497 ret <4 x double> %shuf 498} 499 500define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) { 501; SSE2-LABEL: sitofp_4i8_to_4f64: 502; SSE2: # %bb.0: 503; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 504; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 505; SSE2-NEXT: psrad $24, %xmm1 506; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 507; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 508; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 509; SSE2-NEXT: retq 510; 511; SSE41-LABEL: sitofp_4i8_to_4f64: 512; SSE41: # %bb.0: 513; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 514; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 515; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 516; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 517; SSE41-NEXT: retq 518; 519; AVX-LABEL: sitofp_4i8_to_4f64: 520; AVX: # %bb.0: 521; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 522; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 523; AVX-NEXT: retq 524 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 525 %cvt = sitofp <4 x i8> %shuf to <4 x double> 526 ret <4 x double> %cvt 527} 528 529define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) { 530; SSE2-LABEL: sitofp_16i8_to_4f64: 531; SSE2: # %bb.0: 532; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 533; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 534; SSE2-NEXT: psrad $24, %xmm1 535; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 536; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 537; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 538; SSE2-NEXT: retq 539; 540; SSE41-LABEL: sitofp_16i8_to_4f64: 541; SSE41: # %bb.0: 542; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 543; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 544; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 545; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 546; SSE41-NEXT: retq 547; 548; VEX-LABEL: sitofp_16i8_to_4f64: 549; VEX: # %bb.0: 550; VEX-NEXT: vpmovsxbd %xmm0, %xmm0 551; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 552; VEX-NEXT: retq 553; 554; AVX512-LABEL: sitofp_16i8_to_4f64: 555; AVX512: # %bb.0: 556; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 557; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 558; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 559; AVX512-NEXT: retq 560 %cvt = sitofp <16 x i8> %a to <16 x double> 561 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 562 ret <4 x double> %shuf 563} 564 565; 566; Unsigned Integer to Double 567; 568 569define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { 570; SSE2-LABEL: uitofp_2i64_to_2f64: 571; SSE2: # %bb.0: 572; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295] 573; SSE2-NEXT: pand %xmm0, %xmm1 574; SSE2-NEXT: por {{.*}}(%rip), %xmm1 575; SSE2-NEXT: psrlq $32, %xmm0 576; SSE2-NEXT: por {{.*}}(%rip), %xmm0 577; SSE2-NEXT: subpd {{.*}}(%rip), %xmm0 578; SSE2-NEXT: addpd %xmm1, %xmm0 579; SSE2-NEXT: retq 580; 581; SSE41-LABEL: uitofp_2i64_to_2f64: 582; SSE41: # %bb.0: 583; SSE41-NEXT: pxor %xmm1, %xmm1 584; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 585; SSE41-NEXT: por {{.*}}(%rip), %xmm1 586; SSE41-NEXT: psrlq $32, %xmm0 587; SSE41-NEXT: por {{.*}}(%rip), %xmm0 588; SSE41-NEXT: subpd {{.*}}(%rip), %xmm0 589; SSE41-NEXT: addpd %xmm1, %xmm0 590; SSE41-NEXT: retq 591; 592; AVX1-LABEL: uitofp_2i64_to_2f64: 593; AVX1: # %bb.0: 594; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 595; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 596; AVX1-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 597; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 598; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 599; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 600; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 601; AVX1-NEXT: retq 602; 603; AVX2-LABEL: uitofp_2i64_to_2f64: 604; AVX2: # %bb.0: 605; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 606; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 607; AVX2-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 608; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 609; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 610; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 611; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 612; AVX2-NEXT: retq 613; 614; AVX512F-LABEL: uitofp_2i64_to_2f64: 615; AVX512F: # %bb.0: 616; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 617; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 618; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 619; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 620; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 621; AVX512F-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 622; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0 623; AVX512F-NEXT: retq 624; 625; AVX512VL-LABEL: uitofp_2i64_to_2f64: 626; AVX512VL: # %bb.0: 627; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 628; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 629; AVX512VL-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 630; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 631; AVX512VL-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 632; AVX512VL-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 633; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 634; AVX512VL-NEXT: retq 635; 636; AVX512DQ-LABEL: uitofp_2i64_to_2f64: 637; AVX512DQ: # %bb.0: 638; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 639; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 640; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 641; AVX512DQ-NEXT: vzeroupper 642; AVX512DQ-NEXT: retq 643; 644; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64: 645; AVX512VLDQ: # %bb.0: 646; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0 647; AVX512VLDQ-NEXT: retq 648 %cvt = uitofp <2 x i64> %a to <2 x double> 649 ret <2 x double> %cvt 650} 651 652define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) { 653; SSE2-LABEL: uitofp_2i32_to_2f64: 654; SSE2: # %bb.0: 655; SSE2-NEXT: xorpd %xmm1, %xmm1 656; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 657; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 658; SSE2-NEXT: orpd %xmm1, %xmm0 659; SSE2-NEXT: subpd %xmm1, %xmm0 660; SSE2-NEXT: retq 661; 662; SSE41-LABEL: uitofp_2i32_to_2f64: 663; SSE41: # %bb.0: 664; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 665; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 666; SSE41-NEXT: por %xmm1, %xmm0 667; SSE41-NEXT: subpd %xmm1, %xmm0 668; SSE41-NEXT: retq 669; 670; VEX-LABEL: uitofp_2i32_to_2f64: 671; VEX: # %bb.0: 672; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 673; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 674; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 675; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 676; VEX-NEXT: retq 677; 678; AVX512F-LABEL: uitofp_2i32_to_2f64: 679; AVX512F: # %bb.0: 680; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 681; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 682; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 683; AVX512F-NEXT: vzeroupper 684; AVX512F-NEXT: retq 685; 686; AVX512VL-LABEL: uitofp_2i32_to_2f64: 687; AVX512VL: # %bb.0: 688; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 689; AVX512VL-NEXT: retq 690; 691; AVX512DQ-LABEL: uitofp_2i32_to_2f64: 692; AVX512DQ: # %bb.0: 693; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 694; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 695; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 696; AVX512DQ-NEXT: vzeroupper 697; AVX512DQ-NEXT: retq 698; 699; AVX512VLDQ-LABEL: uitofp_2i32_to_2f64: 700; AVX512VLDQ: # %bb.0: 701; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 702; AVX512VLDQ-NEXT: retq 703 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 704 %cvt = uitofp <2 x i32> %shuf to <2 x double> 705 ret <2 x double> %cvt 706} 707 708define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { 709; SSE2-LABEL: uitofp_4i32_to_2f64: 710; SSE2: # %bb.0: 711; SSE2-NEXT: xorpd %xmm1, %xmm1 712; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 713; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 714; SSE2-NEXT: orpd %xmm1, %xmm0 715; SSE2-NEXT: subpd %xmm1, %xmm0 716; SSE2-NEXT: retq 717; 718; SSE41-LABEL: uitofp_4i32_to_2f64: 719; SSE41: # %bb.0: 720; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 721; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 722; SSE41-NEXT: por %xmm1, %xmm0 723; SSE41-NEXT: subpd %xmm1, %xmm0 724; SSE41-NEXT: retq 725; 726; AVX1-LABEL: uitofp_4i32_to_2f64: 727; AVX1: # %bb.0: 728; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 729; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 730; AVX1-NEXT: # xmm1 = mem[0,0] 731; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 732; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 733; AVX1-NEXT: retq 734; 735; AVX2-LABEL: uitofp_4i32_to_2f64: 736; AVX2: # %bb.0: 737; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 738; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 739; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 740; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 741; AVX2-NEXT: retq 742; 743; AVX512F-LABEL: uitofp_4i32_to_2f64: 744; AVX512F: # %bb.0: 745; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 746; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 747; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 748; AVX512F-NEXT: vzeroupper 749; AVX512F-NEXT: retq 750; 751; AVX512VL-LABEL: uitofp_4i32_to_2f64: 752; AVX512VL: # %bb.0: 753; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 754; AVX512VL-NEXT: retq 755; 756; AVX512DQ-LABEL: uitofp_4i32_to_2f64: 757; AVX512DQ: # %bb.0: 758; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 759; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 760; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 761; AVX512DQ-NEXT: vzeroupper 762; AVX512DQ-NEXT: retq 763; 764; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64: 765; AVX512VLDQ: # %bb.0: 766; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 767; AVX512VLDQ-NEXT: retq 768 %cvt = uitofp <4 x i32> %a to <4 x double> 769 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1> 770 ret <2 x double> %shuf 771} 772 773define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) { 774; SSE2-LABEL: uitofp_2i16_to_2f64: 775; SSE2: # %bb.0: 776; SSE2-NEXT: pxor %xmm1, %xmm1 777; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 778; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 779; SSE2-NEXT: retq 780; 781; SSE41-LABEL: uitofp_2i16_to_2f64: 782; SSE41: # %bb.0: 783; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 784; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 785; SSE41-NEXT: retq 786; 787; AVX-LABEL: uitofp_2i16_to_2f64: 788; AVX: # %bb.0: 789; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 790; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 791; AVX-NEXT: retq 792 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 793 %cvt = uitofp <2 x i16> %shuf to <2 x double> 794 ret <2 x double> %cvt 795} 796 797define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) { 798; SSE2-LABEL: uitofp_8i16_to_2f64: 799; SSE2: # %bb.0: 800; SSE2-NEXT: pxor %xmm1, %xmm1 801; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 802; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 803; SSE2-NEXT: retq 804; 805; SSE41-LABEL: uitofp_8i16_to_2f64: 806; SSE41: # %bb.0: 807; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 808; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 809; SSE41-NEXT: retq 810; 811; VEX-LABEL: uitofp_8i16_to_2f64: 812; VEX: # %bb.0: 813; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 814; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 815; VEX-NEXT: retq 816; 817; AVX512-LABEL: uitofp_8i16_to_2f64: 818; AVX512: # %bb.0: 819; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 820; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 821; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 822; AVX512-NEXT: vzeroupper 823; AVX512-NEXT: retq 824 %cvt = uitofp <8 x i16> %a to <8 x double> 825 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1> 826 ret <2 x double> %shuf 827} 828 829define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) { 830; SSE2-LABEL: uitofp_2i8_to_2f64: 831; SSE2: # %bb.0: 832; SSE2-NEXT: pxor %xmm1, %xmm1 833; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 834; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 835; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 836; SSE2-NEXT: retq 837; 838; SSE41-LABEL: uitofp_2i8_to_2f64: 839; SSE41: # %bb.0: 840; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 841; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 842; SSE41-NEXT: retq 843; 844; AVX-LABEL: uitofp_2i8_to_2f64: 845; AVX: # %bb.0: 846; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 847; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 848; AVX-NEXT: retq 849 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 850 %cvt = uitofp <2 x i8> %shuf to <2 x double> 851 ret <2 x double> %cvt 852} 853 854define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) { 855; SSE2-LABEL: uitofp_16i8_to_2f64: 856; SSE2: # %bb.0: 857; SSE2-NEXT: pxor %xmm1, %xmm1 858; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 859; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 860; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 861; SSE2-NEXT: retq 862; 863; SSE41-LABEL: uitofp_16i8_to_2f64: 864; SSE41: # %bb.0: 865; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 866; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 867; SSE41-NEXT: retq 868; 869; VEX-LABEL: uitofp_16i8_to_2f64: 870; VEX: # %bb.0: 871; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 872; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 873; VEX-NEXT: retq 874; 875; AVX512-LABEL: uitofp_16i8_to_2f64: 876; AVX512: # %bb.0: 877; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 878; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 879; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 880; AVX512-NEXT: vzeroupper 881; AVX512-NEXT: retq 882 %cvt = uitofp <16 x i8> %a to <16 x double> 883 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1> 884 ret <2 x double> %shuf 885} 886 887define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { 888; SSE2-LABEL: uitofp_4i64_to_4f64: 889; SSE2: # %bb.0: 890; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295] 891; SSE2-NEXT: movdqa %xmm0, %xmm3 892; SSE2-NEXT: pand %xmm2, %xmm3 893; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 894; SSE2-NEXT: por %xmm4, %xmm3 895; SSE2-NEXT: psrlq $32, %xmm0 896; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 897; SSE2-NEXT: por %xmm5, %xmm0 898; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 899; SSE2-NEXT: subpd %xmm6, %xmm0 900; SSE2-NEXT: addpd %xmm3, %xmm0 901; SSE2-NEXT: pand %xmm1, %xmm2 902; SSE2-NEXT: por %xmm4, %xmm2 903; SSE2-NEXT: psrlq $32, %xmm1 904; SSE2-NEXT: por %xmm5, %xmm1 905; SSE2-NEXT: subpd %xmm6, %xmm1 906; SSE2-NEXT: addpd %xmm2, %xmm1 907; SSE2-NEXT: retq 908; 909; SSE41-LABEL: uitofp_4i64_to_4f64: 910; SSE41: # %bb.0: 911; SSE41-NEXT: pxor %xmm2, %xmm2 912; SSE41-NEXT: movdqa %xmm0, %xmm3 913; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 914; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 915; SSE41-NEXT: por %xmm4, %xmm3 916; SSE41-NEXT: psrlq $32, %xmm0 917; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 918; SSE41-NEXT: por %xmm5, %xmm0 919; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 920; SSE41-NEXT: subpd %xmm6, %xmm0 921; SSE41-NEXT: addpd %xmm3, %xmm0 922; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 923; SSE41-NEXT: por %xmm4, %xmm2 924; SSE41-NEXT: psrlq $32, %xmm1 925; SSE41-NEXT: por %xmm5, %xmm1 926; SSE41-NEXT: subpd %xmm6, %xmm1 927; SSE41-NEXT: addpd %xmm2, %xmm1 928; SSE41-NEXT: retq 929; 930; AVX1-LABEL: uitofp_4i64_to_4f64: 931; AVX1: # %bb.0: 932; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 933; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 934; AVX1-NEXT: vorps {{.*}}(%rip), %ymm1, %ymm1 935; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm2 936; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 937; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 938; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 939; AVX1-NEXT: vorpd {{.*}}(%rip), %ymm0, %ymm0 940; AVX1-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 941; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 942; AVX1-NEXT: retq 943; 944; AVX2-LABEL: uitofp_4i64_to_4f64: 945; AVX2: # %bb.0: 946; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 947; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 948; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 949; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 950; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 951; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 952; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 953; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 954; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0 955; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 956; AVX2-NEXT: retq 957; 958; AVX512F-LABEL: uitofp_4i64_to_4f64: 959; AVX512F: # %bb.0: 960; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 961; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 962; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 963; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1 964; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0 965; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 966; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 967; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 968; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0 969; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0 970; AVX512F-NEXT: retq 971; 972; AVX512VL-LABEL: uitofp_4i64_to_4f64: 973; AVX512VL: # %bb.0: 974; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 975; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 976; AVX512VL-NEXT: vporq {{.*}}(%rip){1to4}, %ymm1, %ymm1 977; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 978; AVX512VL-NEXT: vporq {{.*}}(%rip){1to4}, %ymm0, %ymm0 979; AVX512VL-NEXT: vsubpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 980; AVX512VL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 981; AVX512VL-NEXT: retq 982; 983; AVX512DQ-LABEL: uitofp_4i64_to_4f64: 984; AVX512DQ: # %bb.0: 985; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 986; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 987; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 988; AVX512DQ-NEXT: retq 989; 990; AVX512VLDQ-LABEL: uitofp_4i64_to_4f64: 991; AVX512VLDQ: # %bb.0: 992; AVX512VLDQ-NEXT: vcvtuqq2pd %ymm0, %ymm0 993; AVX512VLDQ-NEXT: retq 994 %cvt = uitofp <4 x i64> %a to <4 x double> 995 ret <4 x double> %cvt 996} 997 998define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) { 999; SSE2-LABEL: uitofp_4i32_to_4f64: 1000; SSE2: # %bb.0: 1001; SSE2-NEXT: movapd %xmm0, %xmm1 1002; SSE2-NEXT: xorpd %xmm2, %xmm2 1003; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1004; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15] 1005; SSE2-NEXT: orpd %xmm3, %xmm0 1006; SSE2-NEXT: subpd %xmm3, %xmm0 1007; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1008; SSE2-NEXT: orpd %xmm3, %xmm1 1009; SSE2-NEXT: subpd %xmm3, %xmm1 1010; SSE2-NEXT: retq 1011; 1012; SSE41-LABEL: uitofp_4i32_to_4f64: 1013; SSE41: # %bb.0: 1014; SSE41-NEXT: movdqa %xmm0, %xmm1 1015; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1016; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] 1017; SSE41-NEXT: por %xmm2, %xmm0 1018; SSE41-NEXT: subpd %xmm2, %xmm0 1019; SSE41-NEXT: pxor %xmm3, %xmm3 1020; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 1021; SSE41-NEXT: por %xmm2, %xmm1 1022; SSE41-NEXT: subpd %xmm2, %xmm1 1023; SSE41-NEXT: retq 1024; 1025; AVX1-LABEL: uitofp_4i32_to_4f64: 1026; AVX1: # %bb.0: 1027; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1028; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1029; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1030; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1031; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 1032; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 1033; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 1034; AVX1-NEXT: retq 1035; 1036; AVX2-LABEL: uitofp_4i32_to_4f64: 1037; AVX2: # %bb.0: 1038; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1039; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 1040; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1041; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 1042; AVX2-NEXT: retq 1043; 1044; AVX512F-LABEL: uitofp_4i32_to_4f64: 1045; AVX512F: # %bb.0: 1046; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1047; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 1048; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1049; AVX512F-NEXT: retq 1050; 1051; AVX512VL-LABEL: uitofp_4i32_to_4f64: 1052; AVX512VL: # %bb.0: 1053; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 1054; AVX512VL-NEXT: retq 1055; 1056; AVX512DQ-LABEL: uitofp_4i32_to_4f64: 1057; AVX512DQ: # %bb.0: 1058; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1059; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 1060; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1061; AVX512DQ-NEXT: retq 1062; 1063; AVX512VLDQ-LABEL: uitofp_4i32_to_4f64: 1064; AVX512VLDQ: # %bb.0: 1065; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 1066; AVX512VLDQ-NEXT: retq 1067 %cvt = uitofp <4 x i32> %a to <4 x double> 1068 ret <4 x double> %cvt 1069} 1070 1071define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) { 1072; SSE2-LABEL: uitofp_4i16_to_4f64: 1073; SSE2: # %bb.0: 1074; SSE2-NEXT: pxor %xmm1, %xmm1 1075; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1076; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 1077; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1078; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 1079; SSE2-NEXT: movaps %xmm2, %xmm0 1080; SSE2-NEXT: retq 1081; 1082; SSE41-LABEL: uitofp_4i16_to_4f64: 1083; SSE41: # %bb.0: 1084; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1085; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 1086; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1087; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 1088; SSE41-NEXT: retq 1089; 1090; AVX-LABEL: uitofp_4i16_to_4f64: 1091; AVX: # %bb.0: 1092; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1093; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 1094; AVX-NEXT: retq 1095 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1096 %cvt = uitofp <4 x i16> %shuf to <4 x double> 1097 ret <4 x double> %cvt 1098} 1099 1100define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) { 1101; SSE2-LABEL: uitofp_8i16_to_4f64: 1102; SSE2: # %bb.0: 1103; SSE2-NEXT: pxor %xmm1, %xmm1 1104; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1105; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 1106; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1107; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 1108; SSE2-NEXT: movaps %xmm2, %xmm0 1109; SSE2-NEXT: retq 1110; 1111; SSE41-LABEL: uitofp_8i16_to_4f64: 1112; SSE41: # %bb.0: 1113; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1114; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 1115; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1116; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 1117; SSE41-NEXT: retq 1118; 1119; VEX-LABEL: uitofp_8i16_to_4f64: 1120; VEX: # %bb.0: 1121; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1122; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 1123; VEX-NEXT: retq 1124; 1125; AVX512-LABEL: uitofp_8i16_to_4f64: 1126; AVX512: # %bb.0: 1127; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1128; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 1129; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1130; AVX512-NEXT: retq 1131 %cvt = uitofp <8 x i16> %a to <8 x double> 1132 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1133 ret <4 x double> %shuf 1134} 1135 1136define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) { 1137; SSE2-LABEL: uitofp_4i8_to_4f64: 1138; SSE2: # %bb.0: 1139; SSE2-NEXT: pxor %xmm1, %xmm1 1140; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1141; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1142; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 1143; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1144; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 1145; SSE2-NEXT: movaps %xmm2, %xmm0 1146; SSE2-NEXT: retq 1147; 1148; SSE41-LABEL: uitofp_4i8_to_4f64: 1149; SSE41: # %bb.0: 1150; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1151; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 1152; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1153; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 1154; SSE41-NEXT: retq 1155; 1156; AVX-LABEL: uitofp_4i8_to_4f64: 1157; AVX: # %bb.0: 1158; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1159; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 1160; AVX-NEXT: retq 1161 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1162 %cvt = uitofp <4 x i8> %shuf to <4 x double> 1163 ret <4 x double> %cvt 1164} 1165 1166define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) { 1167; SSE2-LABEL: uitofp_16i8_to_4f64: 1168; SSE2: # %bb.0: 1169; SSE2-NEXT: pxor %xmm1, %xmm1 1170; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1171; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1172; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 1173; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1174; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 1175; SSE2-NEXT: movaps %xmm2, %xmm0 1176; SSE2-NEXT: retq 1177; 1178; SSE41-LABEL: uitofp_16i8_to_4f64: 1179; SSE41: # %bb.0: 1180; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1181; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 1182; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1183; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 1184; SSE41-NEXT: retq 1185; 1186; VEX-LABEL: uitofp_16i8_to_4f64: 1187; VEX: # %bb.0: 1188; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1189; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 1190; VEX-NEXT: retq 1191; 1192; AVX512-LABEL: uitofp_16i8_to_4f64: 1193; AVX512: # %bb.0: 1194; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1195; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 1196; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1197; AVX512-NEXT: retq 1198 %cvt = uitofp <16 x i8> %a to <16 x double> 1199 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1200 ret <4 x double> %shuf 1201} 1202 1203; 1204; Signed Integer to Float 1205; 1206 1207define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { 1208; SSE2-LABEL: sitofp_2i64_to_4f32: 1209; SSE2: # %bb.0: 1210; SSE2-NEXT: movq %xmm0, %rax 1211; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1212; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1213; SSE2-NEXT: movq %xmm0, %rax 1214; SSE2-NEXT: xorps %xmm0, %xmm0 1215; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1216; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1217; SSE2-NEXT: movaps %xmm1, %xmm0 1218; SSE2-NEXT: retq 1219; 1220; SSE41-LABEL: sitofp_2i64_to_4f32: 1221; SSE41: # %bb.0: 1222; SSE41-NEXT: pextrq $1, %xmm0, %rax 1223; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1224; SSE41-NEXT: movq %xmm0, %rax 1225; SSE41-NEXT: xorps %xmm0, %xmm0 1226; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1227; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 1228; SSE41-NEXT: retq 1229; 1230; VEX-LABEL: sitofp_2i64_to_4f32: 1231; VEX: # %bb.0: 1232; VEX-NEXT: vpextrq $1, %xmm0, %rax 1233; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1234; VEX-NEXT: vmovq %xmm0, %rax 1235; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1236; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1237; VEX-NEXT: retq 1238; 1239; AVX512F-LABEL: sitofp_2i64_to_4f32: 1240; AVX512F: # %bb.0: 1241; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1242; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1243; AVX512F-NEXT: vmovq %xmm0, %rax 1244; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1245; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1246; AVX512F-NEXT: retq 1247; 1248; AVX512VL-LABEL: sitofp_2i64_to_4f32: 1249; AVX512VL: # %bb.0: 1250; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1251; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1252; AVX512VL-NEXT: vmovq %xmm0, %rax 1253; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1254; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1255; AVX512VL-NEXT: retq 1256; 1257; AVX512DQ-LABEL: sitofp_2i64_to_4f32: 1258; AVX512DQ: # %bb.0: 1259; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1260; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1261; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1262; AVX512DQ-NEXT: vzeroupper 1263; AVX512DQ-NEXT: retq 1264; 1265; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32: 1266; AVX512VLDQ: # %bb.0: 1267; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 1268; AVX512VLDQ-NEXT: retq 1269 %cvt = sitofp <2 x i64> %a to <2 x float> 1270 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1271 ret <4 x float> %ext 1272} 1273 1274define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { 1275; SSE2-LABEL: sitofp_2i64_to_4f32_zero: 1276; SSE2: # %bb.0: 1277; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1278; SSE2-NEXT: movq %xmm1, %rax 1279; SSE2-NEXT: xorps %xmm1, %xmm1 1280; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1281; SSE2-NEXT: movq %xmm0, %rax 1282; SSE2-NEXT: xorps %xmm0, %xmm0 1283; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1284; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1285; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1286; SSE2-NEXT: retq 1287; 1288; SSE41-LABEL: sitofp_2i64_to_4f32_zero: 1289; SSE41: # %bb.0: 1290; SSE41-NEXT: movq %xmm0, %rax 1291; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1292; SSE41-NEXT: pextrq $1, %xmm0, %rax 1293; SSE41-NEXT: xorps %xmm0, %xmm0 1294; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1295; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],zero,zero 1296; SSE41-NEXT: movaps %xmm1, %xmm0 1297; SSE41-NEXT: retq 1298; 1299; VEX-LABEL: sitofp_2i64_to_4f32_zero: 1300; VEX: # %bb.0: 1301; VEX-NEXT: vmovq %xmm0, %rax 1302; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1303; VEX-NEXT: vpextrq $1, %xmm0, %rax 1304; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1305; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 1306; VEX-NEXT: retq 1307; 1308; AVX512F-LABEL: sitofp_2i64_to_4f32_zero: 1309; AVX512F: # %bb.0: 1310; AVX512F-NEXT: vmovq %xmm0, %rax 1311; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1312; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1313; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1314; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 1315; AVX512F-NEXT: retq 1316; 1317; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero: 1318; AVX512VL: # %bb.0: 1319; AVX512VL-NEXT: vmovq %xmm0, %rax 1320; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1321; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1322; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1323; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 1324; AVX512VL-NEXT: retq 1325; 1326; AVX512DQ-LABEL: sitofp_2i64_to_4f32_zero: 1327; AVX512DQ: # %bb.0: 1328; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1329; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1330; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1331; AVX512DQ-NEXT: vzeroupper 1332; AVX512DQ-NEXT: retq 1333; 1334; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32_zero: 1335; AVX512VLDQ: # %bb.0: 1336; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 1337; AVX512VLDQ-NEXT: retq 1338 %cvt = sitofp <2 x i64> %a to <2 x float> 1339 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1340 ret <4 x float> %ext 1341} 1342 1343define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { 1344; SSE2-LABEL: sitofp_4i64_to_4f32_undef: 1345; SSE2: # %bb.0: 1346; SSE2-NEXT: movq %xmm0, %rax 1347; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1348; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1349; SSE2-NEXT: movq %xmm0, %rax 1350; SSE2-NEXT: xorps %xmm0, %xmm0 1351; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1352; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1353; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 1354; SSE2-NEXT: retq 1355; 1356; SSE41-LABEL: sitofp_4i64_to_4f32_undef: 1357; SSE41: # %bb.0: 1358; SSE41-NEXT: pextrq $1, %xmm0, %rax 1359; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1360; SSE41-NEXT: movq %xmm0, %rax 1361; SSE41-NEXT: xorps %xmm0, %xmm0 1362; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1363; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1364; SSE41-NEXT: retq 1365; 1366; VEX-LABEL: sitofp_4i64_to_4f32_undef: 1367; VEX: # %bb.0: 1368; VEX-NEXT: vpextrq $1, %xmm0, %rax 1369; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1370; VEX-NEXT: vmovq %xmm0, %rax 1371; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1372; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1373; VEX-NEXT: retq 1374; 1375; AVX512F-LABEL: sitofp_4i64_to_4f32_undef: 1376; AVX512F: # %bb.0: 1377; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1378; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1379; AVX512F-NEXT: vmovq %xmm0, %rax 1380; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1381; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1382; AVX512F-NEXT: retq 1383; 1384; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef: 1385; AVX512VL: # %bb.0: 1386; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1387; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1388; AVX512VL-NEXT: vmovq %xmm0, %rax 1389; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1390; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1391; AVX512VL-NEXT: retq 1392; 1393; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef: 1394; AVX512DQ: # %bb.0: 1395; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1396; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1397; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1398; AVX512DQ-NEXT: vzeroupper 1399; AVX512DQ-NEXT: retq 1400; 1401; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32_undef: 1402; AVX512VLDQ: # %bb.0: 1403; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1404; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 1405; AVX512VLDQ-NEXT: vzeroupper 1406; AVX512VLDQ-NEXT: retq 1407 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1408 %cvt = sitofp <4 x i64> %ext to <4 x float> 1409 ret <4 x float> %cvt 1410} 1411 1412define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) { 1413; SSE-LABEL: sitofp_4i32_to_4f32: 1414; SSE: # %bb.0: 1415; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1416; SSE-NEXT: retq 1417; 1418; AVX-LABEL: sitofp_4i32_to_4f32: 1419; AVX: # %bb.0: 1420; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1421; AVX-NEXT: retq 1422 %cvt = sitofp <4 x i32> %a to <4 x float> 1423 ret <4 x float> %cvt 1424} 1425 1426define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) { 1427; SSE2-LABEL: sitofp_4i16_to_4f32: 1428; SSE2: # %bb.0: 1429; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1430; SSE2-NEXT: psrad $16, %xmm0 1431; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1432; SSE2-NEXT: retq 1433; 1434; SSE41-LABEL: sitofp_4i16_to_4f32: 1435; SSE41: # %bb.0: 1436; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 1437; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 1438; SSE41-NEXT: retq 1439; 1440; AVX-LABEL: sitofp_4i16_to_4f32: 1441; AVX: # %bb.0: 1442; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 1443; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1444; AVX-NEXT: retq 1445 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1446 %cvt = sitofp <4 x i16> %shuf to <4 x float> 1447 ret <4 x float> %cvt 1448} 1449 1450define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) { 1451; SSE2-LABEL: sitofp_8i16_to_4f32: 1452; SSE2: # %bb.0: 1453; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1454; SSE2-NEXT: psrad $16, %xmm0 1455; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1456; SSE2-NEXT: retq 1457; 1458; SSE41-LABEL: sitofp_8i16_to_4f32: 1459; SSE41: # %bb.0: 1460; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 1461; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 1462; SSE41-NEXT: retq 1463; 1464; AVX1-LABEL: sitofp_8i16_to_4f32: 1465; AVX1: # %bb.0: 1466; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 1467; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1468; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 1469; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1470; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1471; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1472; AVX1-NEXT: vzeroupper 1473; AVX1-NEXT: retq 1474; 1475; AVX2-LABEL: sitofp_8i16_to_4f32: 1476; AVX2: # %bb.0: 1477; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 1478; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1479; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1480; AVX2-NEXT: vzeroupper 1481; AVX2-NEXT: retq 1482; 1483; AVX512-LABEL: sitofp_8i16_to_4f32: 1484; AVX512: # %bb.0: 1485; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 1486; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 1487; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1488; AVX512-NEXT: vzeroupper 1489; AVX512-NEXT: retq 1490 %cvt = sitofp <8 x i16> %a to <8 x float> 1491 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1492 ret <4 x float> %shuf 1493} 1494 1495define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) { 1496; SSE2-LABEL: sitofp_4i8_to_4f32: 1497; SSE2: # %bb.0: 1498; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1499; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1500; SSE2-NEXT: psrad $24, %xmm0 1501; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1502; SSE2-NEXT: retq 1503; 1504; SSE41-LABEL: sitofp_4i8_to_4f32: 1505; SSE41: # %bb.0: 1506; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 1507; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 1508; SSE41-NEXT: retq 1509; 1510; AVX-LABEL: sitofp_4i8_to_4f32: 1511; AVX: # %bb.0: 1512; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 1513; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1514; AVX-NEXT: retq 1515 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1516 %cvt = sitofp <4 x i8> %shuf to <4 x float> 1517 ret <4 x float> %cvt 1518} 1519 1520define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) { 1521; SSE2-LABEL: sitofp_16i8_to_4f32: 1522; SSE2: # %bb.0: 1523; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1524; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1525; SSE2-NEXT: psrad $24, %xmm0 1526; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1527; SSE2-NEXT: retq 1528; 1529; SSE41-LABEL: sitofp_16i8_to_4f32: 1530; SSE41: # %bb.0: 1531; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 1532; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 1533; SSE41-NEXT: retq 1534; 1535; AVX1-LABEL: sitofp_16i8_to_4f32: 1536; AVX1: # %bb.0: 1537; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 1538; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1539; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 1540; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1541; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1542; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1543; AVX1-NEXT: vzeroupper 1544; AVX1-NEXT: retq 1545; 1546; AVX2-LABEL: sitofp_16i8_to_4f32: 1547; AVX2: # %bb.0: 1548; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 1549; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1550; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1551; AVX2-NEXT: vzeroupper 1552; AVX2-NEXT: retq 1553; 1554; AVX512-LABEL: sitofp_16i8_to_4f32: 1555; AVX512: # %bb.0: 1556; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1557; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 1558; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1559; AVX512-NEXT: vzeroupper 1560; AVX512-NEXT: retq 1561 %cvt = sitofp <16 x i8> %a to <16 x float> 1562 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1563 ret <4 x float> %shuf 1564} 1565 1566define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { 1567; SSE2-LABEL: sitofp_4i64_to_4f32: 1568; SSE2: # %bb.0: 1569; SSE2-NEXT: movq %xmm1, %rax 1570; SSE2-NEXT: cvtsi2ss %rax, %xmm2 1571; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1572; SSE2-NEXT: movq %xmm1, %rax 1573; SSE2-NEXT: xorps %xmm1, %xmm1 1574; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1575; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1576; SSE2-NEXT: movq %xmm0, %rax 1577; SSE2-NEXT: xorps %xmm1, %xmm1 1578; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1579; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1580; SSE2-NEXT: movq %xmm0, %rax 1581; SSE2-NEXT: xorps %xmm0, %xmm0 1582; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1583; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1584; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1585; SSE2-NEXT: movaps %xmm1, %xmm0 1586; SSE2-NEXT: retq 1587; 1588; SSE41-LABEL: sitofp_4i64_to_4f32: 1589; SSE41: # %bb.0: 1590; SSE41-NEXT: pextrq $1, %xmm0, %rax 1591; SSE41-NEXT: cvtsi2ss %rax, %xmm2 1592; SSE41-NEXT: movq %xmm0, %rax 1593; SSE41-NEXT: xorps %xmm0, %xmm0 1594; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1595; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 1596; SSE41-NEXT: movq %xmm1, %rax 1597; SSE41-NEXT: xorps %xmm2, %xmm2 1598; SSE41-NEXT: cvtsi2ss %rax, %xmm2 1599; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 1600; SSE41-NEXT: pextrq $1, %xmm1, %rax 1601; SSE41-NEXT: xorps %xmm1, %xmm1 1602; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1603; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 1604; SSE41-NEXT: retq 1605; 1606; AVX1-LABEL: sitofp_4i64_to_4f32: 1607; AVX1: # %bb.0: 1608; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1609; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1610; AVX1-NEXT: vmovq %xmm0, %rax 1611; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1612; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1613; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1614; AVX1-NEXT: vmovq %xmm0, %rax 1615; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1616; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1617; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1618; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1619; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1620; AVX1-NEXT: vzeroupper 1621; AVX1-NEXT: retq 1622; 1623; AVX2-LABEL: sitofp_4i64_to_4f32: 1624; AVX2: # %bb.0: 1625; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1626; AVX2-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1627; AVX2-NEXT: vmovq %xmm0, %rax 1628; AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1629; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1630; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1631; AVX2-NEXT: vmovq %xmm0, %rax 1632; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1633; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1634; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1635; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1636; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1637; AVX2-NEXT: vzeroupper 1638; AVX2-NEXT: retq 1639; 1640; AVX512F-LABEL: sitofp_4i64_to_4f32: 1641; AVX512F: # %bb.0: 1642; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1643; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1644; AVX512F-NEXT: vmovq %xmm0, %rax 1645; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1646; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1647; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 1648; AVX512F-NEXT: vmovq %xmm0, %rax 1649; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1650; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1651; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1652; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1653; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1654; AVX512F-NEXT: vzeroupper 1655; AVX512F-NEXT: retq 1656; 1657; AVX512VL-LABEL: sitofp_4i64_to_4f32: 1658; AVX512VL: # %bb.0: 1659; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1660; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1661; AVX512VL-NEXT: vmovq %xmm0, %rax 1662; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1663; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1664; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 1665; AVX512VL-NEXT: vmovq %xmm0, %rax 1666; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1667; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1668; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1669; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1670; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1671; AVX512VL-NEXT: vzeroupper 1672; AVX512VL-NEXT: retq 1673; 1674; AVX512DQ-LABEL: sitofp_4i64_to_4f32: 1675; AVX512DQ: # %bb.0: 1676; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1677; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1678; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1679; AVX512DQ-NEXT: vzeroupper 1680; AVX512DQ-NEXT: retq 1681; 1682; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32: 1683; AVX512VLDQ: # %bb.0: 1684; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 1685; AVX512VLDQ-NEXT: vzeroupper 1686; AVX512VLDQ-NEXT: retq 1687 %cvt = sitofp <4 x i64> %a to <4 x float> 1688 ret <4 x float> %cvt 1689} 1690 1691define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) { 1692; SSE-LABEL: sitofp_8i32_to_8f32: 1693; SSE: # %bb.0: 1694; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1695; SSE-NEXT: cvtdq2ps %xmm1, %xmm1 1696; SSE-NEXT: retq 1697; 1698; AVX-LABEL: sitofp_8i32_to_8f32: 1699; AVX: # %bb.0: 1700; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 1701; AVX-NEXT: retq 1702 %cvt = sitofp <8 x i32> %a to <8 x float> 1703 ret <8 x float> %cvt 1704} 1705 1706define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) { 1707; SSE2-LABEL: sitofp_8i16_to_8f32: 1708; SSE2: # %bb.0: 1709; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1710; SSE2-NEXT: psrad $16, %xmm1 1711; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2 1712; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 1713; SSE2-NEXT: psrad $16, %xmm0 1714; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 1715; SSE2-NEXT: movaps %xmm2, %xmm0 1716; SSE2-NEXT: retq 1717; 1718; SSE41-LABEL: sitofp_8i16_to_8f32: 1719; SSE41: # %bb.0: 1720; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 1721; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 1722; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1723; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 1724; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 1725; SSE41-NEXT: movaps %xmm2, %xmm0 1726; SSE41-NEXT: retq 1727; 1728; AVX1-LABEL: sitofp_8i16_to_8f32: 1729; AVX1: # %bb.0: 1730; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 1731; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1732; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 1733; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1734; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1735; AVX1-NEXT: retq 1736; 1737; AVX2-LABEL: sitofp_8i16_to_8f32: 1738; AVX2: # %bb.0: 1739; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 1740; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1741; AVX2-NEXT: retq 1742; 1743; AVX512-LABEL: sitofp_8i16_to_8f32: 1744; AVX512: # %bb.0: 1745; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 1746; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 1747; AVX512-NEXT: retq 1748 %cvt = sitofp <8 x i16> %a to <8 x float> 1749 ret <8 x float> %cvt 1750} 1751 1752define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) { 1753; SSE2-LABEL: sitofp_8i8_to_8f32: 1754; SSE2: # %bb.0: 1755; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1756; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1757; SSE2-NEXT: psrad $24, %xmm0 1758; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1759; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 1760; SSE2-NEXT: psrad $24, %xmm1 1761; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 1762; SSE2-NEXT: retq 1763; 1764; SSE41-LABEL: sitofp_8i8_to_8f32: 1765; SSE41: # %bb.0: 1766; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 1767; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 1768; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1769; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 1770; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 1771; SSE41-NEXT: movaps %xmm2, %xmm0 1772; SSE41-NEXT: retq 1773; 1774; AVX1-LABEL: sitofp_8i8_to_8f32: 1775; AVX1: # %bb.0: 1776; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 1777; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1778; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 1779; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1780; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1781; AVX1-NEXT: retq 1782; 1783; AVX2-LABEL: sitofp_8i8_to_8f32: 1784; AVX2: # %bb.0: 1785; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 1786; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1787; AVX2-NEXT: retq 1788; 1789; AVX512-LABEL: sitofp_8i8_to_8f32: 1790; AVX512: # %bb.0: 1791; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 1792; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 1793; AVX512-NEXT: retq 1794 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1795 %cvt = sitofp <8 x i8> %shuf to <8 x float> 1796 ret <8 x float> %cvt 1797} 1798 1799define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) { 1800; SSE2-LABEL: sitofp_16i8_to_8f32: 1801; SSE2: # %bb.0: 1802; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1803; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1804; SSE2-NEXT: psrad $24, %xmm0 1805; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1806; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 1807; SSE2-NEXT: psrad $24, %xmm1 1808; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 1809; SSE2-NEXT: retq 1810; 1811; SSE41-LABEL: sitofp_16i8_to_8f32: 1812; SSE41: # %bb.0: 1813; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 1814; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 1815; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1816; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 1817; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 1818; SSE41-NEXT: movaps %xmm2, %xmm0 1819; SSE41-NEXT: retq 1820; 1821; AVX1-LABEL: sitofp_16i8_to_8f32: 1822; AVX1: # %bb.0: 1823; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 1824; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1825; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 1826; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1827; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1828; AVX1-NEXT: retq 1829; 1830; AVX2-LABEL: sitofp_16i8_to_8f32: 1831; AVX2: # %bb.0: 1832; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 1833; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1834; AVX2-NEXT: retq 1835; 1836; AVX512-LABEL: sitofp_16i8_to_8f32: 1837; AVX512: # %bb.0: 1838; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1839; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 1840; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1841; AVX512-NEXT: retq 1842 %cvt = sitofp <16 x i8> %a to <16 x float> 1843 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1844 ret <8 x float> %shuf 1845} 1846 1847; 1848; Unsigned Integer to Float 1849; 1850 1851define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { 1852; SSE2-LABEL: uitofp_2i64_to_4f32: 1853; SSE2: # %bb.0: 1854; SSE2-NEXT: movdqa %xmm0, %xmm1 1855; SSE2-NEXT: movq %xmm0, %rax 1856; SSE2-NEXT: testq %rax, %rax 1857; SSE2-NEXT: js .LBB41_1 1858; SSE2-NEXT: # %bb.2: 1859; SSE2-NEXT: xorps %xmm0, %xmm0 1860; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1861; SSE2-NEXT: jmp .LBB41_3 1862; SSE2-NEXT: .LBB41_1: 1863; SSE2-NEXT: movq %rax, %rcx 1864; SSE2-NEXT: shrq %rcx 1865; SSE2-NEXT: andl $1, %eax 1866; SSE2-NEXT: orq %rcx, %rax 1867; SSE2-NEXT: xorps %xmm0, %xmm0 1868; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1869; SSE2-NEXT: addss %xmm0, %xmm0 1870; SSE2-NEXT: .LBB41_3: 1871; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1872; SSE2-NEXT: movq %xmm1, %rax 1873; SSE2-NEXT: testq %rax, %rax 1874; SSE2-NEXT: js .LBB41_4 1875; SSE2-NEXT: # %bb.5: 1876; SSE2-NEXT: xorps %xmm1, %xmm1 1877; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1878; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1879; SSE2-NEXT: retq 1880; SSE2-NEXT: .LBB41_4: 1881; SSE2-NEXT: movq %rax, %rcx 1882; SSE2-NEXT: shrq %rcx 1883; SSE2-NEXT: andl $1, %eax 1884; SSE2-NEXT: orq %rcx, %rax 1885; SSE2-NEXT: xorps %xmm1, %xmm1 1886; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1887; SSE2-NEXT: addss %xmm1, %xmm1 1888; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1889; SSE2-NEXT: retq 1890; 1891; SSE41-LABEL: uitofp_2i64_to_4f32: 1892; SSE41: # %bb.0: 1893; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1] 1894; SSE41-NEXT: pand %xmm0, %xmm1 1895; SSE41-NEXT: movdqa %xmm0, %xmm2 1896; SSE41-NEXT: pxor %xmm3, %xmm3 1897; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 1898; SSE41-NEXT: movdqa %xmm0, %xmm4 1899; SSE41-NEXT: psrlq $1, %xmm4 1900; SSE41-NEXT: por %xmm1, %xmm4 1901; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1902; SSE41-NEXT: pextrq $1, %xmm2, %rax 1903; SSE41-NEXT: xorps %xmm0, %xmm0 1904; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1905; SSE41-NEXT: movq %xmm2, %rax 1906; SSE41-NEXT: xorps %xmm1, %xmm1 1907; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1908; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],zero,zero 1909; SSE41-NEXT: movaps %xmm1, %xmm2 1910; SSE41-NEXT: addps %xmm1, %xmm2 1911; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] 1912; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 1913; SSE41-NEXT: movaps %xmm1, %xmm0 1914; SSE41-NEXT: retq 1915; 1916; VEX-LABEL: uitofp_2i64_to_4f32: 1917; VEX: # %bb.0: 1918; VEX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 1919; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2 1920; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1 1921; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 1922; VEX-NEXT: vpextrq $1, %xmm1, %rax 1923; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1924; VEX-NEXT: vmovq %xmm1, %rax 1925; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 1926; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero 1927; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2 1928; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3 1929; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 1930; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 1931; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 1932; VEX-NEXT: retq 1933; 1934; AVX512F-LABEL: uitofp_2i64_to_4f32: 1935; AVX512F: # %bb.0: 1936; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1937; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 1938; AVX512F-NEXT: vmovq %xmm0, %rax 1939; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 1940; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1941; AVX512F-NEXT: retq 1942; 1943; AVX512VL-LABEL: uitofp_2i64_to_4f32: 1944; AVX512VL: # %bb.0: 1945; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1946; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 1947; AVX512VL-NEXT: vmovq %xmm0, %rax 1948; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 1949; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1950; AVX512VL-NEXT: retq 1951; 1952; AVX512DQ-LABEL: uitofp_2i64_to_4f32: 1953; AVX512DQ: # %bb.0: 1954; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1955; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 1956; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1957; AVX512DQ-NEXT: vzeroupper 1958; AVX512DQ-NEXT: retq 1959; 1960; AVX512VLDQ-LABEL: uitofp_2i64_to_4f32: 1961; AVX512VLDQ: # %bb.0: 1962; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0 1963; AVX512VLDQ-NEXT: retq 1964 %cvt = uitofp <2 x i64> %a to <2 x float> 1965 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1966 ret <4 x float> %ext 1967} 1968 1969define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { 1970; SSE2-LABEL: uitofp_2i64_to_2f32: 1971; SSE2: # %bb.0: 1972; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1973; SSE2-NEXT: movq %xmm1, %rax 1974; SSE2-NEXT: testq %rax, %rax 1975; SSE2-NEXT: js .LBB42_1 1976; SSE2-NEXT: # %bb.2: 1977; SSE2-NEXT: xorps %xmm1, %xmm1 1978; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1979; SSE2-NEXT: jmp .LBB42_3 1980; SSE2-NEXT: .LBB42_1: 1981; SSE2-NEXT: movq %rax, %rcx 1982; SSE2-NEXT: shrq %rcx 1983; SSE2-NEXT: andl $1, %eax 1984; SSE2-NEXT: orq %rcx, %rax 1985; SSE2-NEXT: xorps %xmm1, %xmm1 1986; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1987; SSE2-NEXT: addss %xmm1, %xmm1 1988; SSE2-NEXT: .LBB42_3: 1989; SSE2-NEXT: movq %xmm0, %rax 1990; SSE2-NEXT: testq %rax, %rax 1991; SSE2-NEXT: js .LBB42_4 1992; SSE2-NEXT: # %bb.5: 1993; SSE2-NEXT: xorps %xmm0, %xmm0 1994; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1995; SSE2-NEXT: jmp .LBB42_6 1996; SSE2-NEXT: .LBB42_4: 1997; SSE2-NEXT: movq %rax, %rcx 1998; SSE2-NEXT: shrq %rcx 1999; SSE2-NEXT: andl $1, %eax 2000; SSE2-NEXT: orq %rcx, %rax 2001; SSE2-NEXT: xorps %xmm0, %xmm0 2002; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2003; SSE2-NEXT: addss %xmm0, %xmm0 2004; SSE2-NEXT: .LBB42_6: 2005; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2006; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2007; SSE2-NEXT: retq 2008; 2009; SSE41-LABEL: uitofp_2i64_to_2f32: 2010; SSE41: # %bb.0: 2011; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1] 2012; SSE41-NEXT: pand %xmm0, %xmm1 2013; SSE41-NEXT: movdqa %xmm0, %xmm2 2014; SSE41-NEXT: pxor %xmm3, %xmm3 2015; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 2016; SSE41-NEXT: movdqa %xmm0, %xmm4 2017; SSE41-NEXT: psrlq $1, %xmm4 2018; SSE41-NEXT: por %xmm1, %xmm4 2019; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 2020; SSE41-NEXT: pextrq $1, %xmm2, %rax 2021; SSE41-NEXT: xorps %xmm0, %xmm0 2022; SSE41-NEXT: cvtsi2ss %rax, %xmm0 2023; SSE41-NEXT: movq %xmm2, %rax 2024; SSE41-NEXT: xorps %xmm1, %xmm1 2025; SSE41-NEXT: cvtsi2ss %rax, %xmm1 2026; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],zero,zero 2027; SSE41-NEXT: movaps %xmm1, %xmm2 2028; SSE41-NEXT: addps %xmm1, %xmm2 2029; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] 2030; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 2031; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 2032; SSE41-NEXT: retq 2033; 2034; VEX-LABEL: uitofp_2i64_to_2f32: 2035; VEX: # %bb.0: 2036; VEX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 2037; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2 2038; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1 2039; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 2040; VEX-NEXT: vpextrq $1, %xmm1, %rax 2041; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 2042; VEX-NEXT: vmovq %xmm1, %rax 2043; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 2044; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero 2045; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2 2046; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3 2047; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 2048; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 2049; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 2050; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2051; VEX-NEXT: retq 2052; 2053; AVX512F-LABEL: uitofp_2i64_to_2f32: 2054; AVX512F: # %bb.0: 2055; AVX512F-NEXT: vmovq %xmm0, %rax 2056; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2057; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2058; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 2059; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 2060; AVX512F-NEXT: retq 2061; 2062; AVX512VL-LABEL: uitofp_2i64_to_2f32: 2063; AVX512VL: # %bb.0: 2064; AVX512VL-NEXT: vmovq %xmm0, %rax 2065; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2066; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2067; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 2068; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 2069; AVX512VL-NEXT: retq 2070; 2071; AVX512DQ-LABEL: uitofp_2i64_to_2f32: 2072; AVX512DQ: # %bb.0: 2073; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2074; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 2075; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2076; AVX512DQ-NEXT: vzeroupper 2077; AVX512DQ-NEXT: retq 2078; 2079; AVX512VLDQ-LABEL: uitofp_2i64_to_2f32: 2080; AVX512VLDQ: # %bb.0: 2081; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0 2082; AVX512VLDQ-NEXT: retq 2083 %cvt = uitofp <2 x i64> %a to <2 x float> 2084 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2085 ret <4 x float> %ext 2086} 2087 2088define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { 2089; SSE2-LABEL: uitofp_4i64_to_4f32_undef: 2090; SSE2: # %bb.0: 2091; SSE2-NEXT: movq %xmm0, %rax 2092; SSE2-NEXT: testq %rax, %rax 2093; SSE2-NEXT: js .LBB43_1 2094; SSE2-NEXT: # %bb.2: 2095; SSE2-NEXT: cvtsi2ss %rax, %xmm1 2096; SSE2-NEXT: jmp .LBB43_3 2097; SSE2-NEXT: .LBB43_1: 2098; SSE2-NEXT: movq %rax, %rcx 2099; SSE2-NEXT: shrq %rcx 2100; SSE2-NEXT: andl $1, %eax 2101; SSE2-NEXT: orq %rcx, %rax 2102; SSE2-NEXT: cvtsi2ss %rax, %xmm1 2103; SSE2-NEXT: addss %xmm1, %xmm1 2104; SSE2-NEXT: .LBB43_3: 2105; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2106; SSE2-NEXT: movq %xmm0, %rax 2107; SSE2-NEXT: testq %rax, %rax 2108; SSE2-NEXT: js .LBB43_4 2109; SSE2-NEXT: # %bb.5: 2110; SSE2-NEXT: xorps %xmm0, %xmm0 2111; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2112; SSE2-NEXT: jmp .LBB43_6 2113; SSE2-NEXT: .LBB43_4: 2114; SSE2-NEXT: movq %rax, %rcx 2115; SSE2-NEXT: shrq %rcx 2116; SSE2-NEXT: andl $1, %eax 2117; SSE2-NEXT: orq %rcx, %rax 2118; SSE2-NEXT: xorps %xmm0, %xmm0 2119; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2120; SSE2-NEXT: addss %xmm0, %xmm0 2121; SSE2-NEXT: .LBB43_6: 2122; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2123; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 2124; SSE2-NEXT: retq 2125; 2126; SSE41-LABEL: uitofp_4i64_to_4f32_undef: 2127; SSE41: # %bb.0: 2128; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1] 2129; SSE41-NEXT: pand %xmm0, %xmm1 2130; SSE41-NEXT: movdqa %xmm0, %xmm2 2131; SSE41-NEXT: psrlq $1, %xmm2 2132; SSE41-NEXT: por %xmm1, %xmm2 2133; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 2134; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm0 2135; SSE41-NEXT: pextrq $1, %xmm0, %rax 2136; SSE41-NEXT: cvtsi2ss %rax, %xmm3 2137; SSE41-NEXT: movq %xmm0, %rax 2138; SSE41-NEXT: xorps %xmm2, %xmm2 2139; SSE41-NEXT: cvtsi2ss %rax, %xmm2 2140; SSE41-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],zero,zero 2141; SSE41-NEXT: movaps %xmm2, %xmm3 2142; SSE41-NEXT: addps %xmm2, %xmm3 2143; SSE41-NEXT: movdqa %xmm1, %xmm0 2144; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2 2145; SSE41-NEXT: movaps %xmm2, %xmm0 2146; SSE41-NEXT: retq 2147; 2148; AVX1-LABEL: uitofp_4i64_to_4f32_undef: 2149; AVX1: # %bb.0: 2150; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2151; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm1 2152; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 2153; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 2154; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 2155; AVX1-NEXT: vpextrq $1, %xmm1, %rax 2156; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 2157; AVX1-NEXT: vmovq %xmm1, %rax 2158; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 2159; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2160; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2161; AVX1-NEXT: vmovq %xmm1, %rax 2162; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 2163; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 2164; AVX1-NEXT: vpextrq $1, %xmm1, %rax 2165; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1 2166; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0] 2167; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2 2168; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2169; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 2170; AVX1-NEXT: vzeroupper 2171; AVX1-NEXT: retq 2172; 2173; AVX2-LABEL: uitofp_4i64_to_4f32_undef: 2174; AVX2: # %bb.0: 2175; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2176; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 2177; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1 2178; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2 2179; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 2180; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 2181; AVX2-NEXT: vpextrq $1, %xmm1, %rax 2182; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 2183; AVX2-NEXT: vmovq %xmm1, %rax 2184; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 2185; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2186; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 2187; AVX2-NEXT: vmovq %xmm1, %rax 2188; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 2189; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 2190; AVX2-NEXT: vpextrq $1, %xmm1, %rax 2191; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1 2192; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0] 2193; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 2194; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2195; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 2196; AVX2-NEXT: vzeroupper 2197; AVX2-NEXT: retq 2198; 2199; AVX512F-LABEL: uitofp_4i64_to_4f32_undef: 2200; AVX512F: # %bb.0: 2201; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2202; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2203; AVX512F-NEXT: vmovq %xmm0, %rax 2204; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 2205; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 2206; AVX512F-NEXT: retq 2207; 2208; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef: 2209; AVX512VL: # %bb.0: 2210; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2211; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2212; AVX512VL-NEXT: vmovq %xmm0, %rax 2213; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 2214; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 2215; AVX512VL-NEXT: retq 2216; 2217; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef: 2218; AVX512DQ: # %bb.0: 2219; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2220; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 2221; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2222; AVX512DQ-NEXT: vzeroupper 2223; AVX512DQ-NEXT: retq 2224; 2225; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32_undef: 2226; AVX512VLDQ: # %bb.0: 2227; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2228; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 2229; AVX512VLDQ-NEXT: vzeroupper 2230; AVX512VLDQ-NEXT: retq 2231 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2232 %cvt = uitofp <4 x i64> %ext to <4 x float> 2233 ret <4 x float> %cvt 2234} 2235 2236define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) { 2237; SSE2-LABEL: uitofp_4i32_to_4f32: 2238; SSE2: # %bb.0: 2239; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 2240; SSE2-NEXT: pand %xmm0, %xmm1 2241; SSE2-NEXT: por {{.*}}(%rip), %xmm1 2242; SSE2-NEXT: psrld $16, %xmm0 2243; SSE2-NEXT: por {{.*}}(%rip), %xmm0 2244; SSE2-NEXT: subps {{.*}}(%rip), %xmm0 2245; SSE2-NEXT: addps %xmm1, %xmm0 2246; SSE2-NEXT: retq 2247; 2248; SSE41-LABEL: uitofp_4i32_to_4f32: 2249; SSE41: # %bb.0: 2250; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200] 2251; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 2252; SSE41-NEXT: psrld $16, %xmm0 2253; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 2254; SSE41-NEXT: subps {{.*}}(%rip), %xmm0 2255; SSE41-NEXT: addps %xmm1, %xmm0 2256; SSE41-NEXT: retq 2257; 2258; AVX1-LABEL: uitofp_4i32_to_4f32: 2259; AVX1: # %bb.0: 2260; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 2261; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 2262; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 2263; AVX1-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 2264; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 2265; AVX1-NEXT: retq 2266; 2267; AVX2-LABEL: uitofp_4i32_to_4f32: 2268; AVX2: # %bb.0: 2269; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200] 2270; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 2271; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0 2272; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928] 2273; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 2274; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 2275; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm0 2276; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 2277; AVX2-NEXT: retq 2278; 2279; AVX512F-LABEL: uitofp_4i32_to_4f32: 2280; AVX512F: # %bb.0: 2281; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2282; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 2283; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2284; AVX512F-NEXT: vzeroupper 2285; AVX512F-NEXT: retq 2286; 2287; AVX512VL-LABEL: uitofp_4i32_to_4f32: 2288; AVX512VL: # %bb.0: 2289; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 2290; AVX512VL-NEXT: retq 2291; 2292; AVX512DQ-LABEL: uitofp_4i32_to_4f32: 2293; AVX512DQ: # %bb.0: 2294; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2295; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 2296; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2297; AVX512DQ-NEXT: vzeroupper 2298; AVX512DQ-NEXT: retq 2299; 2300; AVX512VLDQ-LABEL: uitofp_4i32_to_4f32: 2301; AVX512VLDQ: # %bb.0: 2302; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 2303; AVX512VLDQ-NEXT: retq 2304 %cvt = uitofp <4 x i32> %a to <4 x float> 2305 ret <4 x float> %cvt 2306} 2307 2308define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) { 2309; SSE2-LABEL: uitofp_4i16_to_4f32: 2310; SSE2: # %bb.0: 2311; SSE2-NEXT: pxor %xmm1, %xmm1 2312; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2313; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 2314; SSE2-NEXT: retq 2315; 2316; SSE41-LABEL: uitofp_4i16_to_4f32: 2317; SSE41: # %bb.0: 2318; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2319; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 2320; SSE41-NEXT: retq 2321; 2322; AVX-LABEL: uitofp_4i16_to_4f32: 2323; AVX: # %bb.0: 2324; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2325; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 2326; AVX-NEXT: retq 2327 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2328 %cvt = uitofp <4 x i16> %shuf to <4 x float> 2329 ret <4 x float> %cvt 2330} 2331 2332define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) { 2333; SSE2-LABEL: uitofp_8i16_to_4f32: 2334; SSE2: # %bb.0: 2335; SSE2-NEXT: pxor %xmm1, %xmm1 2336; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2337; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 2338; SSE2-NEXT: retq 2339; 2340; SSE41-LABEL: uitofp_8i16_to_4f32: 2341; SSE41: # %bb.0: 2342; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2343; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 2344; SSE41-NEXT: retq 2345; 2346; AVX1-LABEL: uitofp_8i16_to_4f32: 2347; AVX1: # %bb.0: 2348; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2349; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2350; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2351; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2352; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2353; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2354; AVX1-NEXT: vzeroupper 2355; AVX1-NEXT: retq 2356; 2357; AVX2-LABEL: uitofp_8i16_to_4f32: 2358; AVX2: # %bb.0: 2359; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2360; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 2361; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2362; AVX2-NEXT: vzeroupper 2363; AVX2-NEXT: retq 2364; 2365; AVX512-LABEL: uitofp_8i16_to_4f32: 2366; AVX512: # %bb.0: 2367; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2368; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 2369; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2370; AVX512-NEXT: vzeroupper 2371; AVX512-NEXT: retq 2372 %cvt = uitofp <8 x i16> %a to <8 x float> 2373 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2374 ret <4 x float> %shuf 2375} 2376 2377define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) { 2378; SSE2-LABEL: uitofp_4i8_to_4f32: 2379; SSE2: # %bb.0: 2380; SSE2-NEXT: pxor %xmm1, %xmm1 2381; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2382; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2383; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 2384; SSE2-NEXT: retq 2385; 2386; SSE41-LABEL: uitofp_4i8_to_4f32: 2387; SSE41: # %bb.0: 2388; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2389; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 2390; SSE41-NEXT: retq 2391; 2392; AVX-LABEL: uitofp_4i8_to_4f32: 2393; AVX: # %bb.0: 2394; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2395; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 2396; AVX-NEXT: retq 2397 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2398 %cvt = uitofp <4 x i8> %shuf to <4 x float> 2399 ret <4 x float> %cvt 2400} 2401 2402define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) { 2403; SSE2-LABEL: uitofp_16i8_to_4f32: 2404; SSE2: # %bb.0: 2405; SSE2-NEXT: pxor %xmm1, %xmm1 2406; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2407; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2408; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 2409; SSE2-NEXT: retq 2410; 2411; SSE41-LABEL: uitofp_16i8_to_4f32: 2412; SSE41: # %bb.0: 2413; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2414; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 2415; SSE41-NEXT: retq 2416; 2417; AVX1-LABEL: uitofp_16i8_to_4f32: 2418; AVX1: # %bb.0: 2419; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2420; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2421; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2422; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2423; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2424; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2425; AVX1-NEXT: vzeroupper 2426; AVX1-NEXT: retq 2427; 2428; AVX2-LABEL: uitofp_16i8_to_4f32: 2429; AVX2: # %bb.0: 2430; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2431; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 2432; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2433; AVX2-NEXT: vzeroupper 2434; AVX2-NEXT: retq 2435; 2436; AVX512-LABEL: uitofp_16i8_to_4f32: 2437; AVX512: # %bb.0: 2438; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2439; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 2440; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2441; AVX512-NEXT: vzeroupper 2442; AVX512-NEXT: retq 2443 %cvt = uitofp <16 x i8> %a to <16 x float> 2444 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2445 ret <4 x float> %shuf 2446} 2447 2448define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { 2449; SSE2-LABEL: uitofp_4i64_to_4f32: 2450; SSE2: # %bb.0: 2451; SSE2-NEXT: movq %xmm1, %rax 2452; SSE2-NEXT: testq %rax, %rax 2453; SSE2-NEXT: js .LBB49_1 2454; SSE2-NEXT: # %bb.2: 2455; SSE2-NEXT: cvtsi2ss %rax, %xmm2 2456; SSE2-NEXT: jmp .LBB49_3 2457; SSE2-NEXT: .LBB49_1: 2458; SSE2-NEXT: movq %rax, %rcx 2459; SSE2-NEXT: shrq %rcx 2460; SSE2-NEXT: andl $1, %eax 2461; SSE2-NEXT: orq %rcx, %rax 2462; SSE2-NEXT: cvtsi2ss %rax, %xmm2 2463; SSE2-NEXT: addss %xmm2, %xmm2 2464; SSE2-NEXT: .LBB49_3: 2465; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 2466; SSE2-NEXT: movq %xmm1, %rax 2467; SSE2-NEXT: testq %rax, %rax 2468; SSE2-NEXT: js .LBB49_4 2469; SSE2-NEXT: # %bb.5: 2470; SSE2-NEXT: cvtsi2ss %rax, %xmm3 2471; SSE2-NEXT: jmp .LBB49_6 2472; SSE2-NEXT: .LBB49_4: 2473; SSE2-NEXT: movq %rax, %rcx 2474; SSE2-NEXT: shrq %rcx 2475; SSE2-NEXT: andl $1, %eax 2476; SSE2-NEXT: orq %rcx, %rax 2477; SSE2-NEXT: cvtsi2ss %rax, %xmm3 2478; SSE2-NEXT: addss %xmm3, %xmm3 2479; SSE2-NEXT: .LBB49_6: 2480; SSE2-NEXT: movq %xmm0, %rax 2481; SSE2-NEXT: testq %rax, %rax 2482; SSE2-NEXT: js .LBB49_7 2483; SSE2-NEXT: # %bb.8: 2484; SSE2-NEXT: xorps %xmm1, %xmm1 2485; SSE2-NEXT: cvtsi2ss %rax, %xmm1 2486; SSE2-NEXT: jmp .LBB49_9 2487; SSE2-NEXT: .LBB49_7: 2488; SSE2-NEXT: movq %rax, %rcx 2489; SSE2-NEXT: shrq %rcx 2490; SSE2-NEXT: andl $1, %eax 2491; SSE2-NEXT: orq %rcx, %rax 2492; SSE2-NEXT: xorps %xmm1, %xmm1 2493; SSE2-NEXT: cvtsi2ss %rax, %xmm1 2494; SSE2-NEXT: addss %xmm1, %xmm1 2495; SSE2-NEXT: .LBB49_9: 2496; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2497; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2498; SSE2-NEXT: movq %xmm0, %rax 2499; SSE2-NEXT: testq %rax, %rax 2500; SSE2-NEXT: js .LBB49_10 2501; SSE2-NEXT: # %bb.11: 2502; SSE2-NEXT: xorps %xmm0, %xmm0 2503; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2504; SSE2-NEXT: jmp .LBB49_12 2505; SSE2-NEXT: .LBB49_10: 2506; SSE2-NEXT: movq %rax, %rcx 2507; SSE2-NEXT: shrq %rcx 2508; SSE2-NEXT: andl $1, %eax 2509; SSE2-NEXT: orq %rcx, %rax 2510; SSE2-NEXT: xorps %xmm0, %xmm0 2511; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2512; SSE2-NEXT: addss %xmm0, %xmm0 2513; SSE2-NEXT: .LBB49_12: 2514; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2515; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2516; SSE2-NEXT: movaps %xmm1, %xmm0 2517; SSE2-NEXT: retq 2518; 2519; SSE41-LABEL: uitofp_4i64_to_4f32: 2520; SSE41: # %bb.0: 2521; SSE41-NEXT: movdqa %xmm1, %xmm2 2522; SSE41-NEXT: movdqa %xmm0, %xmm1 2523; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1,1] 2524; SSE41-NEXT: pand %xmm4, %xmm0 2525; SSE41-NEXT: movdqa %xmm1, %xmm3 2526; SSE41-NEXT: psrlq $1, %xmm3 2527; SSE41-NEXT: por %xmm0, %xmm3 2528; SSE41-NEXT: movdqa %xmm1, %xmm5 2529; SSE41-NEXT: movdqa %xmm1, %xmm0 2530; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm5 2531; SSE41-NEXT: pextrq $1, %xmm5, %rax 2532; SSE41-NEXT: xorps %xmm0, %xmm0 2533; SSE41-NEXT: cvtsi2ss %rax, %xmm0 2534; SSE41-NEXT: movq %xmm5, %rax 2535; SSE41-NEXT: xorps %xmm3, %xmm3 2536; SSE41-NEXT: cvtsi2ss %rax, %xmm3 2537; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3] 2538; SSE41-NEXT: pand %xmm2, %xmm4 2539; SSE41-NEXT: movdqa %xmm2, %xmm5 2540; SSE41-NEXT: psrlq $1, %xmm5 2541; SSE41-NEXT: por %xmm4, %xmm5 2542; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 2543; SSE41-NEXT: movaps %xmm2, %xmm0 2544; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 2545; SSE41-NEXT: movq %xmm2, %rax 2546; SSE41-NEXT: xorps %xmm0, %xmm0 2547; SSE41-NEXT: cvtsi2ss %rax, %xmm0 2548; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3] 2549; SSE41-NEXT: pextrq $1, %xmm2, %rax 2550; SSE41-NEXT: xorps %xmm0, %xmm0 2551; SSE41-NEXT: cvtsi2ss %rax, %xmm0 2552; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0] 2553; SSE41-NEXT: movaps %xmm3, %xmm2 2554; SSE41-NEXT: addps %xmm3, %xmm2 2555; SSE41-NEXT: movaps %xmm1, %xmm0 2556; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3 2557; SSE41-NEXT: movaps %xmm3, %xmm0 2558; SSE41-NEXT: retq 2559; 2560; AVX1-LABEL: uitofp_4i64_to_4f32: 2561; AVX1: # %bb.0: 2562; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1 2563; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2564; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3 2565; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2566; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3 2567; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1 2568; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 2569; AVX1-NEXT: vpextrq $1, %xmm1, %rax 2570; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 2571; AVX1-NEXT: vmovq %xmm1, %rax 2572; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 2573; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 2574; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2575; AVX1-NEXT: vmovq %xmm1, %rax 2576; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 2577; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 2578; AVX1-NEXT: vpextrq $1, %xmm1, %rax 2579; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 2580; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 2581; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3 2582; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2583; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0 2584; AVX1-NEXT: vzeroupper 2585; AVX1-NEXT: retq 2586; 2587; AVX2-LABEL: uitofp_4i64_to_4f32: 2588; AVX2: # %bb.0: 2589; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 2590; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1 2591; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2 2592; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 2593; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 2594; AVX2-NEXT: vpextrq $1, %xmm1, %rax 2595; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 2596; AVX2-NEXT: vmovq %xmm1, %rax 2597; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 2598; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2599; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 2600; AVX2-NEXT: vmovq %xmm1, %rax 2601; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 2602; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 2603; AVX2-NEXT: vpextrq $1, %xmm1, %rax 2604; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1 2605; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0] 2606; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 2607; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 2608; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 2609; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 2610; AVX2-NEXT: vzeroupper 2611; AVX2-NEXT: retq 2612; 2613; AVX512F-LABEL: uitofp_4i64_to_4f32: 2614; AVX512F: # %bb.0: 2615; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2616; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2617; AVX512F-NEXT: vmovq %xmm0, %rax 2618; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 2619; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 2620; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 2621; AVX512F-NEXT: vmovq %xmm0, %rax 2622; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 2623; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 2624; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2625; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 2626; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 2627; AVX512F-NEXT: vzeroupper 2628; AVX512F-NEXT: retq 2629; 2630; AVX512VL-LABEL: uitofp_4i64_to_4f32: 2631; AVX512VL: # %bb.0: 2632; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2633; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2634; AVX512VL-NEXT: vmovq %xmm0, %rax 2635; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 2636; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 2637; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 2638; AVX512VL-NEXT: vmovq %xmm0, %rax 2639; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 2640; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 2641; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2642; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 2643; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 2644; AVX512VL-NEXT: vzeroupper 2645; AVX512VL-NEXT: retq 2646; 2647; AVX512DQ-LABEL: uitofp_4i64_to_4f32: 2648; AVX512DQ: # %bb.0: 2649; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2650; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 2651; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2652; AVX512DQ-NEXT: vzeroupper 2653; AVX512DQ-NEXT: retq 2654; 2655; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32: 2656; AVX512VLDQ: # %bb.0: 2657; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 2658; AVX512VLDQ-NEXT: vzeroupper 2659; AVX512VLDQ-NEXT: retq 2660 %cvt = uitofp <4 x i64> %a to <4 x float> 2661 ret <4 x float> %cvt 2662} 2663 2664define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) { 2665; SSE2-LABEL: uitofp_8i32_to_8f32: 2666; SSE2: # %bb.0: 2667; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] 2668; SSE2-NEXT: movdqa %xmm0, %xmm3 2669; SSE2-NEXT: pand %xmm2, %xmm3 2670; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200] 2671; SSE2-NEXT: por %xmm4, %xmm3 2672; SSE2-NEXT: psrld $16, %xmm0 2673; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928] 2674; SSE2-NEXT: por %xmm5, %xmm0 2675; SSE2-NEXT: movaps {{.*#+}} xmm6 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 2676; SSE2-NEXT: subps %xmm6, %xmm0 2677; SSE2-NEXT: addps %xmm3, %xmm0 2678; SSE2-NEXT: pand %xmm1, %xmm2 2679; SSE2-NEXT: por %xmm4, %xmm2 2680; SSE2-NEXT: psrld $16, %xmm1 2681; SSE2-NEXT: por %xmm5, %xmm1 2682; SSE2-NEXT: subps %xmm6, %xmm1 2683; SSE2-NEXT: addps %xmm2, %xmm1 2684; SSE2-NEXT: retq 2685; 2686; SSE41-LABEL: uitofp_8i32_to_8f32: 2687; SSE41: # %bb.0: 2688; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200] 2689; SSE41-NEXT: movdqa %xmm0, %xmm3 2690; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 2691; SSE41-NEXT: psrld $16, %xmm0 2692; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928] 2693; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] 2694; SSE41-NEXT: movaps {{.*#+}} xmm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 2695; SSE41-NEXT: subps %xmm5, %xmm0 2696; SSE41-NEXT: addps %xmm3, %xmm0 2697; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 2698; SSE41-NEXT: psrld $16, %xmm1 2699; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] 2700; SSE41-NEXT: subps %xmm5, %xmm1 2701; SSE41-NEXT: addps %xmm2, %xmm1 2702; SSE41-NEXT: retq 2703; 2704; AVX1-LABEL: uitofp_8i32_to_8f32: 2705; AVX1: # %bb.0: 2706; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 2707; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2708; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 2709; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 2710; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 2711; AVX1-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 2712; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 2713; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2714; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 2715; AVX1-NEXT: retq 2716; 2717; AVX2-LABEL: uitofp_8i32_to_8f32: 2718; AVX2: # %bb.0: 2719; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] 2720; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2721; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 2722; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] 2723; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] 2724; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 2725; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 2726; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 2727; AVX2-NEXT: retq 2728; 2729; AVX512F-LABEL: uitofp_8i32_to_8f32: 2730; AVX512F: # %bb.0: 2731; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2732; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 2733; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2734; AVX512F-NEXT: retq 2735; 2736; AVX512VL-LABEL: uitofp_8i32_to_8f32: 2737; AVX512VL: # %bb.0: 2738; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0 2739; AVX512VL-NEXT: retq 2740; 2741; AVX512DQ-LABEL: uitofp_8i32_to_8f32: 2742; AVX512DQ: # %bb.0: 2743; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2744; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 2745; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2746; AVX512DQ-NEXT: retq 2747; 2748; AVX512VLDQ-LABEL: uitofp_8i32_to_8f32: 2749; AVX512VLDQ: # %bb.0: 2750; AVX512VLDQ-NEXT: vcvtudq2ps %ymm0, %ymm0 2751; AVX512VLDQ-NEXT: retq 2752 %cvt = uitofp <8 x i32> %a to <8 x float> 2753 ret <8 x float> %cvt 2754} 2755 2756define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) { 2757; SSE2-LABEL: uitofp_8i16_to_8f32: 2758; SSE2: # %bb.0: 2759; SSE2-NEXT: pxor %xmm1, %xmm1 2760; SSE2-NEXT: movdqa %xmm0, %xmm2 2761; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2762; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 2763; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2764; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 2765; SSE2-NEXT: movaps %xmm2, %xmm0 2766; SSE2-NEXT: retq 2767; 2768; SSE41-LABEL: uitofp_8i16_to_8f32: 2769; SSE41: # %bb.0: 2770; SSE41-NEXT: pxor %xmm1, %xmm1 2771; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2772; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2773; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 2774; SSE41-NEXT: cvtdq2ps %xmm2, %xmm0 2775; SSE41-NEXT: retq 2776; 2777; AVX1-LABEL: uitofp_8i16_to_8f32: 2778; AVX1: # %bb.0: 2779; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2780; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2781; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2782; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2783; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2784; AVX1-NEXT: retq 2785; 2786; AVX2-LABEL: uitofp_8i16_to_8f32: 2787; AVX2: # %bb.0: 2788; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2789; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 2790; AVX2-NEXT: retq 2791; 2792; AVX512-LABEL: uitofp_8i16_to_8f32: 2793; AVX512: # %bb.0: 2794; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2795; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 2796; AVX512-NEXT: retq 2797 %cvt = uitofp <8 x i16> %a to <8 x float> 2798 ret <8 x float> %cvt 2799} 2800 2801define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) { 2802; SSE2-LABEL: uitofp_8i8_to_8f32: 2803; SSE2: # %bb.0: 2804; SSE2-NEXT: pxor %xmm1, %xmm1 2805; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2806; SSE2-NEXT: movdqa %xmm0, %xmm2 2807; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2808; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 2809; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2810; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 2811; SSE2-NEXT: movaps %xmm2, %xmm0 2812; SSE2-NEXT: retq 2813; 2814; SSE41-LABEL: uitofp_8i8_to_8f32: 2815; SSE41: # %bb.0: 2816; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2817; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 2818; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2819; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2820; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 2821; SSE41-NEXT: movaps %xmm2, %xmm0 2822; SSE41-NEXT: retq 2823; 2824; AVX1-LABEL: uitofp_8i8_to_8f32: 2825; AVX1: # %bb.0: 2826; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2827; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2828; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2829; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2830; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2831; AVX1-NEXT: retq 2832; 2833; AVX2-LABEL: uitofp_8i8_to_8f32: 2834; AVX2: # %bb.0: 2835; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2836; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 2837; AVX2-NEXT: retq 2838; 2839; AVX512-LABEL: uitofp_8i8_to_8f32: 2840; AVX512: # %bb.0: 2841; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2842; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 2843; AVX512-NEXT: retq 2844 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2845 %cvt = uitofp <8 x i8> %shuf to <8 x float> 2846 ret <8 x float> %cvt 2847} 2848 2849define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) { 2850; SSE2-LABEL: uitofp_16i8_to_8f32: 2851; SSE2: # %bb.0: 2852; SSE2-NEXT: pxor %xmm1, %xmm1 2853; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2854; SSE2-NEXT: movdqa %xmm0, %xmm2 2855; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2856; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 2857; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2858; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 2859; SSE2-NEXT: movaps %xmm2, %xmm0 2860; SSE2-NEXT: retq 2861; 2862; SSE41-LABEL: uitofp_16i8_to_8f32: 2863; SSE41: # %bb.0: 2864; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2865; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 2866; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2867; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2868; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 2869; SSE41-NEXT: movaps %xmm2, %xmm0 2870; SSE41-NEXT: retq 2871; 2872; AVX1-LABEL: uitofp_16i8_to_8f32: 2873; AVX1: # %bb.0: 2874; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2875; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2876; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2877; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2878; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2879; AVX1-NEXT: retq 2880; 2881; AVX2-LABEL: uitofp_16i8_to_8f32: 2882; AVX2: # %bb.0: 2883; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2884; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 2885; AVX2-NEXT: retq 2886; 2887; AVX512-LABEL: uitofp_16i8_to_8f32: 2888; AVX512: # %bb.0: 2889; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2890; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 2891; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2892; AVX512-NEXT: retq 2893 %cvt = uitofp <16 x i8> %a to <16 x float> 2894 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2895 ret <8 x float> %shuf 2896} 2897 2898; 2899; Load Signed Integer to Double 2900; 2901 2902define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { 2903; SSE2-LABEL: sitofp_load_2i64_to_2f64: 2904; SSE2: # %bb.0: 2905; SSE2-NEXT: movdqa (%rdi), %xmm1 2906; SSE2-NEXT: movq %xmm1, %rax 2907; SSE2-NEXT: cvtsi2sd %rax, %xmm0 2908; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 2909; SSE2-NEXT: movq %xmm1, %rax 2910; SSE2-NEXT: xorps %xmm1, %xmm1 2911; SSE2-NEXT: cvtsi2sd %rax, %xmm1 2912; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2913; SSE2-NEXT: retq 2914; 2915; SSE41-LABEL: sitofp_load_2i64_to_2f64: 2916; SSE41: # %bb.0: 2917; SSE41-NEXT: movdqa (%rdi), %xmm0 2918; SSE41-NEXT: pextrq $1, %xmm0, %rax 2919; SSE41-NEXT: cvtsi2sd %rax, %xmm1 2920; SSE41-NEXT: movq %xmm0, %rax 2921; SSE41-NEXT: xorps %xmm0, %xmm0 2922; SSE41-NEXT: cvtsi2sd %rax, %xmm0 2923; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2924; SSE41-NEXT: retq 2925; 2926; VEX-LABEL: sitofp_load_2i64_to_2f64: 2927; VEX: # %bb.0: 2928; VEX-NEXT: vmovdqa (%rdi), %xmm0 2929; VEX-NEXT: vpextrq $1, %xmm0, %rax 2930; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 2931; VEX-NEXT: vmovq %xmm0, %rax 2932; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 2933; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2934; VEX-NEXT: retq 2935; 2936; AVX512F-LABEL: sitofp_load_2i64_to_2f64: 2937; AVX512F: # %bb.0: 2938; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 2939; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2940; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 2941; AVX512F-NEXT: vmovq %xmm0, %rax 2942; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 2943; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2944; AVX512F-NEXT: retq 2945; 2946; AVX512VL-LABEL: sitofp_load_2i64_to_2f64: 2947; AVX512VL: # %bb.0: 2948; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 2949; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2950; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 2951; AVX512VL-NEXT: vmovq %xmm0, %rax 2952; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 2953; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2954; AVX512VL-NEXT: retq 2955; 2956; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64: 2957; AVX512DQ: # %bb.0: 2958; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 2959; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 2960; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2961; AVX512DQ-NEXT: vzeroupper 2962; AVX512DQ-NEXT: retq 2963; 2964; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64: 2965; AVX512VLDQ: # %bb.0: 2966; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %xmm0 2967; AVX512VLDQ-NEXT: retq 2968 %ld = load <2 x i64>, <2 x i64> *%a 2969 %cvt = sitofp <2 x i64> %ld to <2 x double> 2970 ret <2 x double> %cvt 2971} 2972 2973define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) { 2974; SSE-LABEL: sitofp_load_2i32_to_2f64: 2975; SSE: # %bb.0: 2976; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 2977; SSE-NEXT: retq 2978; 2979; AVX-LABEL: sitofp_load_2i32_to_2f64: 2980; AVX: # %bb.0: 2981; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 2982; AVX-NEXT: retq 2983 %ld = load <2 x i32>, <2 x i32> *%a 2984 %cvt = sitofp <2 x i32> %ld to <2 x double> 2985 ret <2 x double> %cvt 2986} 2987 2988define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) { 2989; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64: 2990; SSE: # %bb.0: 2991; SSE-NEXT: movaps (%rdi), %xmm0 2992; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 2993; SSE-NEXT: retq 2994; 2995; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64: 2996; AVX: # %bb.0: 2997; AVX-NEXT: vmovaps (%rdi), %xmm0 2998; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 2999; AVX-NEXT: retq 3000 %ld = load volatile <4 x i32>, <4 x i32> *%a 3001 %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 3002 %cvt = sitofp <2 x i32> %b to <2 x double> 3003 ret <2 x double> %cvt 3004} 3005 3006define <2 x double> @sitofp_load_4i32_to_2f64_2(<4 x i32>* %x) { 3007; SSE-LABEL: sitofp_load_4i32_to_2f64_2: 3008; SSE: # %bb.0: 3009; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 3010; SSE-NEXT: retq 3011; 3012; AVX-LABEL: sitofp_load_4i32_to_2f64_2: 3013; AVX: # %bb.0: 3014; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 3015; AVX-NEXT: retq 3016 %a = load <4 x i32>, <4 x i32>* %x 3017 %b = sitofp <4 x i32> %a to <4 x double> 3018 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1> 3019 ret <2 x double> %c 3020} 3021 3022define <2 x double> @sitofp_volatile_load_4i32_to_2f64_2(<4 x i32>* %x) { 3023; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64_2: 3024; SSE: # %bb.0: 3025; SSE-NEXT: movaps (%rdi), %xmm0 3026; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 3027; SSE-NEXT: retq 3028; 3029; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64_2: 3030; AVX: # %bb.0: 3031; AVX-NEXT: vmovaps (%rdi), %xmm0 3032; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 3033; AVX-NEXT: retq 3034 %a = load volatile <4 x i32>, <4 x i32>* %x 3035 %b = sitofp <4 x i32> %a to <4 x double> 3036 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1> 3037 ret <2 x double> %c 3038} 3039 3040define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) { 3041; SSE2-LABEL: sitofp_load_2i16_to_2f64: 3042; SSE2: # %bb.0: 3043; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3044; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 3045; SSE2-NEXT: psrad $16, %xmm0 3046; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 3047; SSE2-NEXT: retq 3048; 3049; SSE41-LABEL: sitofp_load_2i16_to_2f64: 3050; SSE41: # %bb.0: 3051; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3052; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 3053; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 3054; SSE41-NEXT: retq 3055; 3056; AVX-LABEL: sitofp_load_2i16_to_2f64: 3057; AVX: # %bb.0: 3058; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3059; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 3060; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 3061; AVX-NEXT: retq 3062 %ld = load <2 x i16>, <2 x i16> *%a 3063 %cvt = sitofp <2 x i16> %ld to <2 x double> 3064 ret <2 x double> %cvt 3065} 3066 3067define <2 x double> @sitofp_load_2i8_to_2f64(<2 x i8> *%a) { 3068; SSE2-LABEL: sitofp_load_2i8_to_2f64: 3069; SSE2: # %bb.0: 3070; SSE2-NEXT: movzwl (%rdi), %eax 3071; SSE2-NEXT: movd %eax, %xmm0 3072; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3073; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 3074; SSE2-NEXT: psrad $24, %xmm0 3075; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 3076; SSE2-NEXT: retq 3077; 3078; SSE41-LABEL: sitofp_load_2i8_to_2f64: 3079; SSE41: # %bb.0: 3080; SSE41-NEXT: movzwl (%rdi), %eax 3081; SSE41-NEXT: movd %eax, %xmm0 3082; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 3083; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 3084; SSE41-NEXT: retq 3085; 3086; AVX-LABEL: sitofp_load_2i8_to_2f64: 3087; AVX: # %bb.0: 3088; AVX-NEXT: movzwl (%rdi), %eax 3089; AVX-NEXT: vmovd %eax, %xmm0 3090; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 3091; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 3092; AVX-NEXT: retq 3093 %ld = load <2 x i8>, <2 x i8> *%a 3094 %cvt = sitofp <2 x i8> %ld to <2 x double> 3095 ret <2 x double> %cvt 3096} 3097 3098define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { 3099; SSE2-LABEL: sitofp_load_4i64_to_4f64: 3100; SSE2: # %bb.0: 3101; SSE2-NEXT: movdqa (%rdi), %xmm1 3102; SSE2-NEXT: movdqa 16(%rdi), %xmm2 3103; SSE2-NEXT: movq %xmm1, %rax 3104; SSE2-NEXT: cvtsi2sd %rax, %xmm0 3105; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3106; SSE2-NEXT: movq %xmm1, %rax 3107; SSE2-NEXT: xorps %xmm1, %xmm1 3108; SSE2-NEXT: cvtsi2sd %rax, %xmm1 3109; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3110; SSE2-NEXT: movq %xmm2, %rax 3111; SSE2-NEXT: xorps %xmm1, %xmm1 3112; SSE2-NEXT: cvtsi2sd %rax, %xmm1 3113; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 3114; SSE2-NEXT: movq %xmm2, %rax 3115; SSE2-NEXT: xorps %xmm2, %xmm2 3116; SSE2-NEXT: cvtsi2sd %rax, %xmm2 3117; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3118; SSE2-NEXT: retq 3119; 3120; SSE41-LABEL: sitofp_load_4i64_to_4f64: 3121; SSE41: # %bb.0: 3122; SSE41-NEXT: movdqa (%rdi), %xmm0 3123; SSE41-NEXT: movdqa 16(%rdi), %xmm1 3124; SSE41-NEXT: pextrq $1, %xmm0, %rax 3125; SSE41-NEXT: cvtsi2sd %rax, %xmm2 3126; SSE41-NEXT: movq %xmm0, %rax 3127; SSE41-NEXT: xorps %xmm0, %xmm0 3128; SSE41-NEXT: cvtsi2sd %rax, %xmm0 3129; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3130; SSE41-NEXT: pextrq $1, %xmm1, %rax 3131; SSE41-NEXT: xorps %xmm2, %xmm2 3132; SSE41-NEXT: cvtsi2sd %rax, %xmm2 3133; SSE41-NEXT: movq %xmm1, %rax 3134; SSE41-NEXT: xorps %xmm1, %xmm1 3135; SSE41-NEXT: cvtsi2sd %rax, %xmm1 3136; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3137; SSE41-NEXT: retq 3138; 3139; VEX-LABEL: sitofp_load_4i64_to_4f64: 3140; VEX: # %bb.0: 3141; VEX-NEXT: vmovapd (%rdi), %xmm0 3142; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 3143; VEX-NEXT: vpextrq $1, %xmm1, %rax 3144; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 3145; VEX-NEXT: vmovq %xmm1, %rax 3146; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 3147; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3148; VEX-NEXT: vpextrq $1, %xmm0, %rax 3149; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 3150; VEX-NEXT: vmovq %xmm0, %rax 3151; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 3152; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3153; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3154; VEX-NEXT: retq 3155; 3156; AVX512F-LABEL: sitofp_load_4i64_to_4f64: 3157; AVX512F: # %bb.0: 3158; AVX512F-NEXT: vmovapd (%rdi), %xmm0 3159; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 3160; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 3161; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 3162; AVX512F-NEXT: vmovq %xmm1, %rax 3163; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 3164; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3165; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 3166; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 3167; AVX512F-NEXT: vmovq %xmm0, %rax 3168; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 3169; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3170; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3171; AVX512F-NEXT: retq 3172; 3173; AVX512VL-LABEL: sitofp_load_4i64_to_4f64: 3174; AVX512VL: # %bb.0: 3175; AVX512VL-NEXT: vmovapd (%rdi), %xmm0 3176; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 3177; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax 3178; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 3179; AVX512VL-NEXT: vmovq %xmm1, %rax 3180; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 3181; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3182; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 3183; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 3184; AVX512VL-NEXT: vmovq %xmm0, %rax 3185; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 3186; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3187; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3188; AVX512VL-NEXT: retq 3189; 3190; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64: 3191; AVX512DQ: # %bb.0: 3192; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 3193; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 3194; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3195; AVX512DQ-NEXT: retq 3196; 3197; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f64: 3198; AVX512VLDQ: # %bb.0: 3199; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %ymm0 3200; AVX512VLDQ-NEXT: retq 3201 %ld = load <4 x i64>, <4 x i64> *%a 3202 %cvt = sitofp <4 x i64> %ld to <4 x double> 3203 ret <4 x double> %cvt 3204} 3205 3206define <4 x double> @sitofp_load_4i32_to_4f64(<4 x i32> *%a) { 3207; SSE-LABEL: sitofp_load_4i32_to_4f64: 3208; SSE: # %bb.0: 3209; SSE-NEXT: movdqa (%rdi), %xmm1 3210; SSE-NEXT: cvtdq2pd %xmm1, %xmm0 3211; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3212; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 3213; SSE-NEXT: retq 3214; 3215; AVX-LABEL: sitofp_load_4i32_to_4f64: 3216; AVX: # %bb.0: 3217; AVX-NEXT: vcvtdq2pd (%rdi), %ymm0 3218; AVX-NEXT: retq 3219 %ld = load <4 x i32>, <4 x i32> *%a 3220 %cvt = sitofp <4 x i32> %ld to <4 x double> 3221 ret <4 x double> %cvt 3222} 3223 3224define <4 x double> @sitofp_load_4i16_to_4f64(<4 x i16> *%a) { 3225; SSE2-LABEL: sitofp_load_4i16_to_4f64: 3226; SSE2: # %bb.0: 3227; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3228; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3229; SSE2-NEXT: psrad $16, %xmm1 3230; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 3231; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3232; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 3233; SSE2-NEXT: retq 3234; 3235; SSE41-LABEL: sitofp_load_4i16_to_4f64: 3236; SSE41: # %bb.0: 3237; SSE41-NEXT: pmovsxwd (%rdi), %xmm1 3238; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 3239; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3240; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 3241; SSE41-NEXT: retq 3242; 3243; AVX-LABEL: sitofp_load_4i16_to_4f64: 3244; AVX: # %bb.0: 3245; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 3246; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 3247; AVX-NEXT: retq 3248 %ld = load <4 x i16>, <4 x i16> *%a 3249 %cvt = sitofp <4 x i16> %ld to <4 x double> 3250 ret <4 x double> %cvt 3251} 3252 3253define <4 x double> @sitofp_load_4i8_to_4f64(<4 x i8> *%a) { 3254; SSE2-LABEL: sitofp_load_4i8_to_4f64: 3255; SSE2: # %bb.0: 3256; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3257; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3258; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3259; SSE2-NEXT: psrad $24, %xmm1 3260; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 3261; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3262; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 3263; SSE2-NEXT: retq 3264; 3265; SSE41-LABEL: sitofp_load_4i8_to_4f64: 3266; SSE41: # %bb.0: 3267; SSE41-NEXT: pmovsxbd (%rdi), %xmm1 3268; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 3269; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3270; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 3271; SSE41-NEXT: retq 3272; 3273; AVX-LABEL: sitofp_load_4i8_to_4f64: 3274; AVX: # %bb.0: 3275; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 3276; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 3277; AVX-NEXT: retq 3278 %ld = load <4 x i8>, <4 x i8> *%a 3279 %cvt = sitofp <4 x i8> %ld to <4 x double> 3280 ret <4 x double> %cvt 3281} 3282 3283; 3284; Load Unsigned Integer to Double 3285; 3286 3287define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { 3288; SSE2-LABEL: uitofp_load_2i64_to_2f64: 3289; SSE2: # %bb.0: 3290; SSE2-NEXT: movdqa (%rdi), %xmm0 3291; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295] 3292; SSE2-NEXT: pand %xmm0, %xmm1 3293; SSE2-NEXT: por {{.*}}(%rip), %xmm1 3294; SSE2-NEXT: psrlq $32, %xmm0 3295; SSE2-NEXT: por {{.*}}(%rip), %xmm0 3296; SSE2-NEXT: subpd {{.*}}(%rip), %xmm0 3297; SSE2-NEXT: addpd %xmm1, %xmm0 3298; SSE2-NEXT: retq 3299; 3300; SSE41-LABEL: uitofp_load_2i64_to_2f64: 3301; SSE41: # %bb.0: 3302; SSE41-NEXT: movdqa (%rdi), %xmm0 3303; SSE41-NEXT: pxor %xmm1, %xmm1 3304; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3305; SSE41-NEXT: por {{.*}}(%rip), %xmm1 3306; SSE41-NEXT: psrlq $32, %xmm0 3307; SSE41-NEXT: por {{.*}}(%rip), %xmm0 3308; SSE41-NEXT: subpd {{.*}}(%rip), %xmm0 3309; SSE41-NEXT: addpd %xmm1, %xmm0 3310; SSE41-NEXT: retq 3311; 3312; AVX1-LABEL: uitofp_load_2i64_to_2f64: 3313; AVX1: # %bb.0: 3314; AVX1-NEXT: vmovdqa (%rdi), %xmm0 3315; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 3316; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3317; AVX1-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 3318; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 3319; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 3320; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 3321; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 3322; AVX1-NEXT: retq 3323; 3324; AVX2-LABEL: uitofp_load_2i64_to_2f64: 3325; AVX2: # %bb.0: 3326; AVX2-NEXT: vmovdqa (%rdi), %xmm0 3327; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 3328; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 3329; AVX2-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 3330; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 3331; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 3332; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 3333; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 3334; AVX2-NEXT: retq 3335; 3336; AVX512F-LABEL: uitofp_load_2i64_to_2f64: 3337; AVX512F: # %bb.0: 3338; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 3339; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 3340; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 3341; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 3342; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 3343; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 3344; AVX512F-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 3345; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0 3346; AVX512F-NEXT: retq 3347; 3348; AVX512VL-LABEL: uitofp_load_2i64_to_2f64: 3349; AVX512VL: # %bb.0: 3350; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 3351; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 3352; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 3353; AVX512VL-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 3354; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 3355; AVX512VL-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 3356; AVX512VL-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 3357; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 3358; AVX512VL-NEXT: retq 3359; 3360; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64: 3361; AVX512DQ: # %bb.0: 3362; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 3363; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 3364; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3365; AVX512DQ-NEXT: vzeroupper 3366; AVX512DQ-NEXT: retq 3367; 3368; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64: 3369; AVX512VLDQ: # %bb.0: 3370; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %xmm0 3371; AVX512VLDQ-NEXT: retq 3372 %ld = load <2 x i64>, <2 x i64> *%a 3373 %cvt = uitofp <2 x i64> %ld to <2 x double> 3374 ret <2 x double> %cvt 3375} 3376 3377define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) { 3378; SSE2-LABEL: uitofp_load_2i32_to_2f64: 3379; SSE2: # %bb.0: 3380; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3381; SSE2-NEXT: xorpd %xmm1, %xmm1 3382; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3383; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3384; SSE2-NEXT: orpd %xmm1, %xmm0 3385; SSE2-NEXT: subpd %xmm1, %xmm0 3386; SSE2-NEXT: retq 3387; 3388; SSE41-LABEL: uitofp_load_2i32_to_2f64: 3389; SSE41: # %bb.0: 3390; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3391; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3392; SSE41-NEXT: por %xmm1, %xmm0 3393; SSE41-NEXT: subpd %xmm1, %xmm0 3394; SSE41-NEXT: retq 3395; 3396; VEX-LABEL: uitofp_load_2i32_to_2f64: 3397; VEX: # %bb.0: 3398; VEX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3399; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3400; VEX-NEXT: vpor %xmm1, %xmm0, %xmm0 3401; VEX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3402; VEX-NEXT: retq 3403; 3404; AVX512F-LABEL: uitofp_load_2i32_to_2f64: 3405; AVX512F: # %bb.0: 3406; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3407; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 3408; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3409; AVX512F-NEXT: vzeroupper 3410; AVX512F-NEXT: retq 3411; 3412; AVX512VL-LABEL: uitofp_load_2i32_to_2f64: 3413; AVX512VL: # %bb.0: 3414; AVX512VL-NEXT: vcvtudq2pd (%rdi), %xmm0 3415; AVX512VL-NEXT: retq 3416; 3417; AVX512DQ-LABEL: uitofp_load_2i32_to_2f64: 3418; AVX512DQ: # %bb.0: 3419; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3420; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 3421; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3422; AVX512DQ-NEXT: vzeroupper 3423; AVX512DQ-NEXT: retq 3424; 3425; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64: 3426; AVX512VLDQ: # %bb.0: 3427; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 3428; AVX512VLDQ-NEXT: retq 3429 %ld = load <2 x i32>, <2 x i32> *%a 3430 %cvt = uitofp <2 x i32> %ld to <2 x double> 3431 ret <2 x double> %cvt 3432} 3433 3434define <2 x double> @uitofp_load_4i32_to_2f64_2(<4 x i32>* %x) { 3435; SSE2-LABEL: uitofp_load_4i32_to_2f64_2: 3436; SSE2: # %bb.0: 3437; SSE2-NEXT: movapd (%rdi), %xmm0 3438; SSE2-NEXT: xorpd %xmm1, %xmm1 3439; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3440; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3441; SSE2-NEXT: orpd %xmm1, %xmm0 3442; SSE2-NEXT: subpd %xmm1, %xmm0 3443; SSE2-NEXT: retq 3444; 3445; SSE41-LABEL: uitofp_load_4i32_to_2f64_2: 3446; SSE41: # %bb.0: 3447; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3448; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3449; SSE41-NEXT: por %xmm1, %xmm0 3450; SSE41-NEXT: subpd %xmm1, %xmm0 3451; SSE41-NEXT: retq 3452; 3453; AVX1-LABEL: uitofp_load_4i32_to_2f64_2: 3454; AVX1: # %bb.0: 3455; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3456; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3457; AVX1-NEXT: # xmm1 = mem[0,0] 3458; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 3459; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3460; AVX1-NEXT: retq 3461; 3462; AVX2-LABEL: uitofp_load_4i32_to_2f64_2: 3463; AVX2: # %bb.0: 3464; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3465; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3466; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 3467; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3468; AVX2-NEXT: vzeroupper 3469; AVX2-NEXT: retq 3470; 3471; AVX512F-LABEL: uitofp_load_4i32_to_2f64_2: 3472; AVX512F: # %bb.0: 3473; AVX512F-NEXT: vmovaps (%rdi), %xmm0 3474; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 3475; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3476; AVX512F-NEXT: vzeroupper 3477; AVX512F-NEXT: retq 3478; 3479; AVX512VL-LABEL: uitofp_load_4i32_to_2f64_2: 3480; AVX512VL: # %bb.0: 3481; AVX512VL-NEXT: vcvtudq2pd (%rdi), %xmm0 3482; AVX512VL-NEXT: retq 3483; 3484; AVX512DQ-LABEL: uitofp_load_4i32_to_2f64_2: 3485; AVX512DQ: # %bb.0: 3486; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 3487; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 3488; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3489; AVX512DQ-NEXT: vzeroupper 3490; AVX512DQ-NEXT: retq 3491; 3492; AVX512VLDQ-LABEL: uitofp_load_4i32_to_2f64_2: 3493; AVX512VLDQ: # %bb.0: 3494; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 3495; AVX512VLDQ-NEXT: retq 3496 %a = load <4 x i32>, <4 x i32>* %x 3497 %b = uitofp <4 x i32> %a to <4 x double> 3498 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1> 3499 ret <2 x double> %c 3500} 3501 3502define <2 x double> @uitofp_volatile_load_4i32_to_2f64_2(<4 x i32>* %x) { 3503; SSE2-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3504; SSE2: # %bb.0: 3505; SSE2-NEXT: movapd (%rdi), %xmm0 3506; SSE2-NEXT: xorpd %xmm1, %xmm1 3507; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3508; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3509; SSE2-NEXT: orpd %xmm1, %xmm0 3510; SSE2-NEXT: subpd %xmm1, %xmm0 3511; SSE2-NEXT: retq 3512; 3513; SSE41-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3514; SSE41: # %bb.0: 3515; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3516; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3517; SSE41-NEXT: por %xmm1, %xmm0 3518; SSE41-NEXT: subpd %xmm1, %xmm0 3519; SSE41-NEXT: retq 3520; 3521; AVX1-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3522; AVX1: # %bb.0: 3523; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3524; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3525; AVX1-NEXT: # xmm1 = mem[0,0] 3526; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 3527; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3528; AVX1-NEXT: retq 3529; 3530; AVX2-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3531; AVX2: # %bb.0: 3532; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3533; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3534; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 3535; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3536; AVX2-NEXT: vzeroupper 3537; AVX2-NEXT: retq 3538; 3539; AVX512F-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3540; AVX512F: # %bb.0: 3541; AVX512F-NEXT: vmovaps (%rdi), %xmm0 3542; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 3543; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3544; AVX512F-NEXT: vzeroupper 3545; AVX512F-NEXT: retq 3546; 3547; AVX512VL-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3548; AVX512VL: # %bb.0: 3549; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 3550; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 3551; AVX512VL-NEXT: retq 3552; 3553; AVX512DQ-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3554; AVX512DQ: # %bb.0: 3555; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 3556; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 3557; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3558; AVX512DQ-NEXT: vzeroupper 3559; AVX512DQ-NEXT: retq 3560; 3561; AVX512VLDQ-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3562; AVX512VLDQ: # %bb.0: 3563; AVX512VLDQ-NEXT: vmovaps (%rdi), %xmm0 3564; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 3565; AVX512VLDQ-NEXT: retq 3566 %a = load volatile <4 x i32>, <4 x i32>* %x 3567 %b = uitofp <4 x i32> %a to <4 x double> 3568 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1> 3569 ret <2 x double> %c 3570} 3571 3572define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) { 3573; SSE2-LABEL: uitofp_load_2i16_to_2f64: 3574; SSE2: # %bb.0: 3575; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3576; SSE2-NEXT: pxor %xmm1, %xmm1 3577; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3578; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 3579; SSE2-NEXT: retq 3580; 3581; SSE41-LABEL: uitofp_load_2i16_to_2f64: 3582; SSE41: # %bb.0: 3583; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3584; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 3585; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 3586; SSE41-NEXT: retq 3587; 3588; AVX-LABEL: uitofp_load_2i16_to_2f64: 3589; AVX: # %bb.0: 3590; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3591; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 3592; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 3593; AVX-NEXT: retq 3594 %ld = load <2 x i16>, <2 x i16> *%a 3595 %cvt = uitofp <2 x i16> %ld to <2 x double> 3596 ret <2 x double> %cvt 3597} 3598 3599define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) { 3600; SSE2-LABEL: uitofp_load_2i8_to_2f64: 3601; SSE2: # %bb.0: 3602; SSE2-NEXT: movzwl (%rdi), %eax 3603; SSE2-NEXT: movd %eax, %xmm0 3604; SSE2-NEXT: pxor %xmm1, %xmm1 3605; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3606; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3607; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 3608; SSE2-NEXT: retq 3609; 3610; SSE41-LABEL: uitofp_load_2i8_to_2f64: 3611; SSE41: # %bb.0: 3612; SSE41-NEXT: movzwl (%rdi), %eax 3613; SSE41-NEXT: movd %eax, %xmm0 3614; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3615; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 3616; SSE41-NEXT: retq 3617; 3618; AVX-LABEL: uitofp_load_2i8_to_2f64: 3619; AVX: # %bb.0: 3620; AVX-NEXT: movzwl (%rdi), %eax 3621; AVX-NEXT: vmovd %eax, %xmm0 3622; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3623; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 3624; AVX-NEXT: retq 3625 %ld = load <2 x i8>, <2 x i8> *%a 3626 %cvt = uitofp <2 x i8> %ld to <2 x double> 3627 ret <2 x double> %cvt 3628} 3629 3630define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { 3631; SSE2-LABEL: uitofp_load_4i64_to_4f64: 3632; SSE2: # %bb.0: 3633; SSE2-NEXT: movdqa (%rdi), %xmm0 3634; SSE2-NEXT: movdqa 16(%rdi), %xmm1 3635; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295] 3636; SSE2-NEXT: movdqa %xmm0, %xmm3 3637; SSE2-NEXT: pand %xmm2, %xmm3 3638; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 3639; SSE2-NEXT: por %xmm4, %xmm3 3640; SSE2-NEXT: psrlq $32, %xmm0 3641; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 3642; SSE2-NEXT: por %xmm5, %xmm0 3643; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 3644; SSE2-NEXT: subpd %xmm6, %xmm0 3645; SSE2-NEXT: addpd %xmm3, %xmm0 3646; SSE2-NEXT: pand %xmm1, %xmm2 3647; SSE2-NEXT: por %xmm4, %xmm2 3648; SSE2-NEXT: psrlq $32, %xmm1 3649; SSE2-NEXT: por %xmm5, %xmm1 3650; SSE2-NEXT: subpd %xmm6, %xmm1 3651; SSE2-NEXT: addpd %xmm2, %xmm1 3652; SSE2-NEXT: retq 3653; 3654; SSE41-LABEL: uitofp_load_4i64_to_4f64: 3655; SSE41: # %bb.0: 3656; SSE41-NEXT: movdqa (%rdi), %xmm0 3657; SSE41-NEXT: movdqa 16(%rdi), %xmm1 3658; SSE41-NEXT: pxor %xmm2, %xmm2 3659; SSE41-NEXT: movdqa %xmm0, %xmm3 3660; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 3661; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 3662; SSE41-NEXT: por %xmm4, %xmm3 3663; SSE41-NEXT: psrlq $32, %xmm0 3664; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 3665; SSE41-NEXT: por %xmm5, %xmm0 3666; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 3667; SSE41-NEXT: subpd %xmm6, %xmm0 3668; SSE41-NEXT: addpd %xmm3, %xmm0 3669; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3670; SSE41-NEXT: por %xmm4, %xmm2 3671; SSE41-NEXT: psrlq $32, %xmm1 3672; SSE41-NEXT: por %xmm5, %xmm1 3673; SSE41-NEXT: subpd %xmm6, %xmm1 3674; SSE41-NEXT: addpd %xmm2, %xmm1 3675; SSE41-NEXT: retq 3676; 3677; AVX1-LABEL: uitofp_load_4i64_to_4f64: 3678; AVX1: # %bb.0: 3679; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 3680; AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7] 3681; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0 3682; AVX1-NEXT: vmovdqa (%rdi), %xmm1 3683; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2 3684; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 3685; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2 3686; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 3687; AVX1-NEXT: vorpd {{.*}}(%rip), %ymm1, %ymm1 3688; AVX1-NEXT: vsubpd {{.*}}(%rip), %ymm1, %ymm1 3689; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 3690; AVX1-NEXT: retq 3691; 3692; AVX2-LABEL: uitofp_load_4i64_to_4f64: 3693; AVX2: # %bb.0: 3694; AVX2-NEXT: vmovdqa (%rdi), %ymm0 3695; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 3696; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 3697; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 3698; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 3699; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 3700; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 3701; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 3702; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 3703; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0 3704; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 3705; AVX2-NEXT: retq 3706; 3707; AVX512F-LABEL: uitofp_load_4i64_to_4f64: 3708; AVX512F: # %bb.0: 3709; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 3710; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 3711; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 3712; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 3713; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1 3714; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0 3715; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 3716; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 3717; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 3718; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0 3719; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0 3720; AVX512F-NEXT: retq 3721; 3722; AVX512VL-LABEL: uitofp_load_4i64_to_4f64: 3723; AVX512VL: # %bb.0: 3724; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 3725; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 3726; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 3727; AVX512VL-NEXT: vporq {{.*}}(%rip){1to4}, %ymm1, %ymm1 3728; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 3729; AVX512VL-NEXT: vporq {{.*}}(%rip){1to4}, %ymm0, %ymm0 3730; AVX512VL-NEXT: vsubpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 3731; AVX512VL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 3732; AVX512VL-NEXT: retq 3733; 3734; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64: 3735; AVX512DQ: # %bb.0: 3736; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 3737; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 3738; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3739; AVX512DQ-NEXT: retq 3740; 3741; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f64: 3742; AVX512VLDQ: # %bb.0: 3743; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %ymm0 3744; AVX512VLDQ-NEXT: retq 3745 %ld = load <4 x i64>, <4 x i64> *%a 3746 %cvt = uitofp <4 x i64> %ld to <4 x double> 3747 ret <4 x double> %cvt 3748} 3749 3750define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) { 3751; SSE2-LABEL: uitofp_load_4i32_to_4f64: 3752; SSE2: # %bb.0: 3753; SSE2-NEXT: movapd (%rdi), %xmm1 3754; SSE2-NEXT: xorpd %xmm2, %xmm2 3755; SSE2-NEXT: movapd %xmm1, %xmm0 3756; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3757; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15] 3758; SSE2-NEXT: orpd %xmm3, %xmm0 3759; SSE2-NEXT: subpd %xmm3, %xmm0 3760; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3761; SSE2-NEXT: orpd %xmm3, %xmm1 3762; SSE2-NEXT: subpd %xmm3, %xmm1 3763; SSE2-NEXT: retq 3764; 3765; SSE41-LABEL: uitofp_load_4i32_to_4f64: 3766; SSE41: # %bb.0: 3767; SSE41-NEXT: movdqa (%rdi), %xmm1 3768; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 3769; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] 3770; SSE41-NEXT: por %xmm2, %xmm0 3771; SSE41-NEXT: subpd %xmm2, %xmm0 3772; SSE41-NEXT: pxor %xmm3, %xmm3 3773; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3774; SSE41-NEXT: por %xmm2, %xmm1 3775; SSE41-NEXT: subpd %xmm2, %xmm1 3776; SSE41-NEXT: retq 3777; 3778; AVX1-LABEL: uitofp_load_4i32_to_4f64: 3779; AVX1: # %bb.0: 3780; AVX1-NEXT: vmovdqa (%rdi), %xmm0 3781; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 3782; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3783; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 3784; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3785; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 3786; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 3787; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 3788; AVX1-NEXT: retq 3789; 3790; AVX2-LABEL: uitofp_load_4i32_to_4f64: 3791; AVX2: # %bb.0: 3792; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3793; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 3794; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 3795; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 3796; AVX2-NEXT: retq 3797; 3798; AVX512F-LABEL: uitofp_load_4i32_to_4f64: 3799; AVX512F: # %bb.0: 3800; AVX512F-NEXT: vmovaps (%rdi), %xmm0 3801; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 3802; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3803; AVX512F-NEXT: retq 3804; 3805; AVX512VL-LABEL: uitofp_load_4i32_to_4f64: 3806; AVX512VL: # %bb.0: 3807; AVX512VL-NEXT: vcvtudq2pd (%rdi), %ymm0 3808; AVX512VL-NEXT: retq 3809; 3810; AVX512DQ-LABEL: uitofp_load_4i32_to_4f64: 3811; AVX512DQ: # %bb.0: 3812; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 3813; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 3814; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3815; AVX512DQ-NEXT: retq 3816; 3817; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f64: 3818; AVX512VLDQ: # %bb.0: 3819; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %ymm0 3820; AVX512VLDQ-NEXT: retq 3821 %ld = load <4 x i32>, <4 x i32> *%a 3822 %cvt = uitofp <4 x i32> %ld to <4 x double> 3823 ret <4 x double> %cvt 3824} 3825 3826define <4 x double> @uitofp_load_4i16_to_4f64(<4 x i16> *%a) { 3827; SSE2-LABEL: uitofp_load_4i16_to_4f64: 3828; SSE2: # %bb.0: 3829; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 3830; SSE2-NEXT: pxor %xmm0, %xmm0 3831; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3832; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 3833; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3834; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 3835; SSE2-NEXT: retq 3836; 3837; SSE41-LABEL: uitofp_load_4i16_to_4f64: 3838; SSE41: # %bb.0: 3839; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3840; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 3841; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3842; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 3843; SSE41-NEXT: retq 3844; 3845; AVX-LABEL: uitofp_load_4i16_to_4f64: 3846; AVX: # %bb.0: 3847; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3848; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 3849; AVX-NEXT: retq 3850 %ld = load <4 x i16>, <4 x i16> *%a 3851 %cvt = uitofp <4 x i16> %ld to <4 x double> 3852 ret <4 x double> %cvt 3853} 3854 3855define <4 x double> @uitofp_load_4i8_to_4f64(<4 x i8> *%a) { 3856; SSE2-LABEL: uitofp_load_4i8_to_4f64: 3857; SSE2: # %bb.0: 3858; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 3859; SSE2-NEXT: pxor %xmm0, %xmm0 3860; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3861; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3862; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 3863; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3864; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 3865; SSE2-NEXT: retq 3866; 3867; SSE41-LABEL: uitofp_load_4i8_to_4f64: 3868; SSE41: # %bb.0: 3869; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 3870; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 3871; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3872; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 3873; SSE41-NEXT: retq 3874; 3875; AVX-LABEL: uitofp_load_4i8_to_4f64: 3876; AVX: # %bb.0: 3877; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 3878; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 3879; AVX-NEXT: retq 3880 %ld = load <4 x i8>, <4 x i8> *%a 3881 %cvt = uitofp <4 x i8> %ld to <4 x double> 3882 ret <4 x double> %cvt 3883} 3884 3885; 3886; Load Signed Integer to Float 3887; 3888 3889define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) { 3890; SSE2-LABEL: sitofp_load_4i64_to_4f32: 3891; SSE2: # %bb.0: 3892; SSE2-NEXT: movdqa (%rdi), %xmm1 3893; SSE2-NEXT: movdqa 16(%rdi), %xmm0 3894; SSE2-NEXT: movq %xmm0, %rax 3895; SSE2-NEXT: cvtsi2ss %rax, %xmm2 3896; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3897; SSE2-NEXT: movq %xmm0, %rax 3898; SSE2-NEXT: xorps %xmm0, %xmm0 3899; SSE2-NEXT: cvtsi2ss %rax, %xmm0 3900; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 3901; SSE2-NEXT: movq %xmm1, %rax 3902; SSE2-NEXT: xorps %xmm0, %xmm0 3903; SSE2-NEXT: cvtsi2ss %rax, %xmm0 3904; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3905; SSE2-NEXT: movq %xmm1, %rax 3906; SSE2-NEXT: xorps %xmm1, %xmm1 3907; SSE2-NEXT: cvtsi2ss %rax, %xmm1 3908; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3909; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3910; SSE2-NEXT: retq 3911; 3912; SSE41-LABEL: sitofp_load_4i64_to_4f32: 3913; SSE41: # %bb.0: 3914; SSE41-NEXT: movdqa (%rdi), %xmm0 3915; SSE41-NEXT: movdqa 16(%rdi), %xmm1 3916; SSE41-NEXT: pextrq $1, %xmm0, %rax 3917; SSE41-NEXT: cvtsi2ss %rax, %xmm2 3918; SSE41-NEXT: movq %xmm0, %rax 3919; SSE41-NEXT: xorps %xmm0, %xmm0 3920; SSE41-NEXT: cvtsi2ss %rax, %xmm0 3921; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 3922; SSE41-NEXT: movq %xmm1, %rax 3923; SSE41-NEXT: xorps %xmm2, %xmm2 3924; SSE41-NEXT: cvtsi2ss %rax, %xmm2 3925; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 3926; SSE41-NEXT: pextrq $1, %xmm1, %rax 3927; SSE41-NEXT: xorps %xmm1, %xmm1 3928; SSE41-NEXT: cvtsi2ss %rax, %xmm1 3929; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3930; SSE41-NEXT: retq 3931; 3932; VEX-LABEL: sitofp_load_4i64_to_4f32: 3933; VEX: # %bb.0: 3934; VEX-NEXT: vmovdqa (%rdi), %xmm0 3935; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 3936; VEX-NEXT: vpextrq $1, %xmm0, %rax 3937; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 3938; VEX-NEXT: vmovq %xmm0, %rax 3939; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 3940; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 3941; VEX-NEXT: vmovq %xmm1, %rax 3942; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 3943; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 3944; VEX-NEXT: vpextrq $1, %xmm1, %rax 3945; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 3946; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3947; VEX-NEXT: retq 3948; 3949; AVX512F-LABEL: sitofp_load_4i64_to_4f32: 3950; AVX512F: # %bb.0: 3951; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 3952; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 3953; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 3954; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 3955; AVX512F-NEXT: vmovq %xmm0, %rax 3956; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 3957; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 3958; AVX512F-NEXT: vmovq %xmm1, %rax 3959; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 3960; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 3961; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 3962; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 3963; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3964; AVX512F-NEXT: retq 3965; 3966; AVX512VL-LABEL: sitofp_load_4i64_to_4f32: 3967; AVX512VL: # %bb.0: 3968; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 3969; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 3970; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 3971; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 3972; AVX512VL-NEXT: vmovq %xmm0, %rax 3973; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 3974; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 3975; AVX512VL-NEXT: vmovq %xmm1, %rax 3976; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 3977; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 3978; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax 3979; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 3980; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3981; AVX512VL-NEXT: retq 3982; 3983; AVX512DQ-LABEL: sitofp_load_4i64_to_4f32: 3984; AVX512DQ: # %bb.0: 3985; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 3986; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 3987; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 3988; AVX512DQ-NEXT: vzeroupper 3989; AVX512DQ-NEXT: retq 3990; 3991; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f32: 3992; AVX512VLDQ: # %bb.0: 3993; AVX512VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0 3994; AVX512VLDQ-NEXT: retq 3995 %ld = load <4 x i64>, <4 x i64> *%a 3996 %cvt = sitofp <4 x i64> %ld to <4 x float> 3997 ret <4 x float> %cvt 3998} 3999 4000define <4 x float> @sitofp_load_4i32_to_4f32(<4 x i32> *%a) { 4001; SSE-LABEL: sitofp_load_4i32_to_4f32: 4002; SSE: # %bb.0: 4003; SSE-NEXT: cvtdq2ps (%rdi), %xmm0 4004; SSE-NEXT: retq 4005; 4006; AVX-LABEL: sitofp_load_4i32_to_4f32: 4007; AVX: # %bb.0: 4008; AVX-NEXT: vcvtdq2ps (%rdi), %xmm0 4009; AVX-NEXT: retq 4010 %ld = load <4 x i32>, <4 x i32> *%a 4011 %cvt = sitofp <4 x i32> %ld to <4 x float> 4012 ret <4 x float> %cvt 4013} 4014 4015define <4 x float> @sitofp_load_4i16_to_4f32(<4 x i16> *%a) { 4016; SSE2-LABEL: sitofp_load_4i16_to_4f32: 4017; SSE2: # %bb.0: 4018; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4019; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4020; SSE2-NEXT: psrad $16, %xmm0 4021; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4022; SSE2-NEXT: retq 4023; 4024; SSE41-LABEL: sitofp_load_4i16_to_4f32: 4025; SSE41: # %bb.0: 4026; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4027; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4028; SSE41-NEXT: retq 4029; 4030; AVX-LABEL: sitofp_load_4i16_to_4f32: 4031; AVX: # %bb.0: 4032; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 4033; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 4034; AVX-NEXT: retq 4035 %ld = load <4 x i16>, <4 x i16> *%a 4036 %cvt = sitofp <4 x i16> %ld to <4 x float> 4037 ret <4 x float> %cvt 4038} 4039 4040define <4 x float> @sitofp_load_4i8_to_4f32(<4 x i8> *%a) { 4041; SSE2-LABEL: sitofp_load_4i8_to_4f32: 4042; SSE2: # %bb.0: 4043; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4044; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4045; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4046; SSE2-NEXT: psrad $24, %xmm0 4047; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4048; SSE2-NEXT: retq 4049; 4050; SSE41-LABEL: sitofp_load_4i8_to_4f32: 4051; SSE41: # %bb.0: 4052; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 4053; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4054; SSE41-NEXT: retq 4055; 4056; AVX-LABEL: sitofp_load_4i8_to_4f32: 4057; AVX: # %bb.0: 4058; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 4059; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 4060; AVX-NEXT: retq 4061 %ld = load <4 x i8>, <4 x i8> *%a 4062 %cvt = sitofp <4 x i8> %ld to <4 x float> 4063 ret <4 x float> %cvt 4064} 4065 4066define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { 4067; SSE2-LABEL: sitofp_load_8i64_to_8f32: 4068; SSE2: # %bb.0: 4069; SSE2-NEXT: movdqa (%rdi), %xmm1 4070; SSE2-NEXT: movdqa 16(%rdi), %xmm0 4071; SSE2-NEXT: movdqa 32(%rdi), %xmm2 4072; SSE2-NEXT: movdqa 48(%rdi), %xmm3 4073; SSE2-NEXT: movq %xmm0, %rax 4074; SSE2-NEXT: cvtsi2ss %rax, %xmm4 4075; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 4076; SSE2-NEXT: movq %xmm0, %rax 4077; SSE2-NEXT: xorps %xmm0, %xmm0 4078; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4079; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] 4080; SSE2-NEXT: movq %xmm1, %rax 4081; SSE2-NEXT: xorps %xmm0, %xmm0 4082; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4083; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4084; SSE2-NEXT: movq %xmm1, %rax 4085; SSE2-NEXT: xorps %xmm1, %xmm1 4086; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4087; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4088; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] 4089; SSE2-NEXT: movq %xmm3, %rax 4090; SSE2-NEXT: xorps %xmm4, %xmm4 4091; SSE2-NEXT: cvtsi2ss %rax, %xmm4 4092; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 4093; SSE2-NEXT: movq %xmm1, %rax 4094; SSE2-NEXT: xorps %xmm1, %xmm1 4095; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4096; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 4097; SSE2-NEXT: movq %xmm2, %rax 4098; SSE2-NEXT: xorps %xmm1, %xmm1 4099; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4100; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 4101; SSE2-NEXT: movq %xmm2, %rax 4102; SSE2-NEXT: xorps %xmm2, %xmm2 4103; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4104; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4105; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] 4106; SSE2-NEXT: retq 4107; 4108; SSE41-LABEL: sitofp_load_8i64_to_8f32: 4109; SSE41: # %bb.0: 4110; SSE41-NEXT: movdqa (%rdi), %xmm0 4111; SSE41-NEXT: movdqa 16(%rdi), %xmm1 4112; SSE41-NEXT: movdqa 32(%rdi), %xmm2 4113; SSE41-NEXT: movdqa 48(%rdi), %xmm3 4114; SSE41-NEXT: pextrq $1, %xmm0, %rax 4115; SSE41-NEXT: cvtsi2ss %rax, %xmm4 4116; SSE41-NEXT: movq %xmm0, %rax 4117; SSE41-NEXT: xorps %xmm0, %xmm0 4118; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4119; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2,3] 4120; SSE41-NEXT: movq %xmm1, %rax 4121; SSE41-NEXT: xorps %xmm4, %xmm4 4122; SSE41-NEXT: cvtsi2ss %rax, %xmm4 4123; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm4[0],xmm0[3] 4124; SSE41-NEXT: pextrq $1, %xmm1, %rax 4125; SSE41-NEXT: xorps %xmm1, %xmm1 4126; SSE41-NEXT: cvtsi2ss %rax, %xmm1 4127; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4128; SSE41-NEXT: pextrq $1, %xmm2, %rax 4129; SSE41-NEXT: xorps %xmm4, %xmm4 4130; SSE41-NEXT: cvtsi2ss %rax, %xmm4 4131; SSE41-NEXT: movq %xmm2, %rax 4132; SSE41-NEXT: xorps %xmm1, %xmm1 4133; SSE41-NEXT: cvtsi2ss %rax, %xmm1 4134; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2,3] 4135; SSE41-NEXT: movq %xmm3, %rax 4136; SSE41-NEXT: xorps %xmm2, %xmm2 4137; SSE41-NEXT: cvtsi2ss %rax, %xmm2 4138; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 4139; SSE41-NEXT: pextrq $1, %xmm3, %rax 4140; SSE41-NEXT: xorps %xmm2, %xmm2 4141; SSE41-NEXT: cvtsi2ss %rax, %xmm2 4142; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 4143; SSE41-NEXT: retq 4144; 4145; VEX-LABEL: sitofp_load_8i64_to_8f32: 4146; VEX: # %bb.0: 4147; VEX-NEXT: vmovaps (%rdi), %xmm0 4148; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 4149; VEX-NEXT: vmovdqa 32(%rdi), %xmm2 4150; VEX-NEXT: vmovdqa 48(%rdi), %xmm3 4151; VEX-NEXT: vpextrq $1, %xmm2, %rax 4152; VEX-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 4153; VEX-NEXT: vmovq %xmm2, %rax 4154; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2 4155; VEX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3] 4156; VEX-NEXT: vmovq %xmm3, %rax 4157; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 4158; VEX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 4159; VEX-NEXT: vpextrq $1, %xmm3, %rax 4160; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4161; VEX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 4162; VEX-NEXT: vpextrq $1, %xmm0, %rax 4163; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4164; VEX-NEXT: vmovq %xmm0, %rax 4165; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0 4166; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] 4167; VEX-NEXT: vmovq %xmm1, %rax 4168; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4169; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 4170; VEX-NEXT: vpextrq $1, %xmm1, %rax 4171; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 4172; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4173; VEX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4174; VEX-NEXT: retq 4175; 4176; AVX512F-LABEL: sitofp_load_8i64_to_8f32: 4177; AVX512F: # %bb.0: 4178; AVX512F-NEXT: vmovaps (%rdi), %xmm0 4179; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 4180; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 4181; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 4182; AVX512F-NEXT: vpextrq $1, %xmm2, %rax 4183; AVX512F-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 4184; AVX512F-NEXT: vmovq %xmm2, %rax 4185; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2 4186; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3] 4187; AVX512F-NEXT: vmovq %xmm3, %rax 4188; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 4189; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 4190; AVX512F-NEXT: vpextrq $1, %xmm3, %rax 4191; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4192; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 4193; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 4194; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4195; AVX512F-NEXT: vmovq %xmm0, %rax 4196; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0 4197; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] 4198; AVX512F-NEXT: vmovq %xmm1, %rax 4199; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4200; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 4201; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 4202; AVX512F-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 4203; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4204; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4205; AVX512F-NEXT: retq 4206; 4207; AVX512VL-LABEL: sitofp_load_8i64_to_8f32: 4208; AVX512VL: # %bb.0: 4209; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 4210; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 4211; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 4212; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 4213; AVX512VL-NEXT: vpextrq $1, %xmm2, %rax 4214; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 4215; AVX512VL-NEXT: vmovq %xmm2, %rax 4216; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2 4217; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3] 4218; AVX512VL-NEXT: vmovq %xmm3, %rax 4219; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 4220; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 4221; AVX512VL-NEXT: vpextrq $1, %xmm3, %rax 4222; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4223; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 4224; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 4225; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4226; AVX512VL-NEXT: vmovq %xmm0, %rax 4227; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0 4228; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] 4229; AVX512VL-NEXT: vmovq %xmm1, %rax 4230; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4231; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 4232; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax 4233; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 4234; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4235; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4236; AVX512VL-NEXT: retq 4237; 4238; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32: 4239; AVX512DQ: # %bb.0: 4240; AVX512DQ-NEXT: vcvtqq2ps (%rdi), %ymm0 4241; AVX512DQ-NEXT: retq 4242; 4243; AVX512VLDQ-LABEL: sitofp_load_8i64_to_8f32: 4244; AVX512VLDQ: # %bb.0: 4245; AVX512VLDQ-NEXT: vcvtqq2ps (%rdi), %ymm0 4246; AVX512VLDQ-NEXT: retq 4247 %ld = load <8 x i64>, <8 x i64> *%a 4248 %cvt = sitofp <8 x i64> %ld to <8 x float> 4249 ret <8 x float> %cvt 4250} 4251 4252define <8 x float> @sitofp_load_8i32_to_8f32(<8 x i32> *%a) { 4253; SSE-LABEL: sitofp_load_8i32_to_8f32: 4254; SSE: # %bb.0: 4255; SSE-NEXT: cvtdq2ps (%rdi), %xmm0 4256; SSE-NEXT: cvtdq2ps 16(%rdi), %xmm1 4257; SSE-NEXT: retq 4258; 4259; AVX-LABEL: sitofp_load_8i32_to_8f32: 4260; AVX: # %bb.0: 4261; AVX-NEXT: vcvtdq2ps (%rdi), %ymm0 4262; AVX-NEXT: retq 4263 %ld = load <8 x i32>, <8 x i32> *%a 4264 %cvt = sitofp <8 x i32> %ld to <8 x float> 4265 ret <8 x float> %cvt 4266} 4267 4268define <8 x float> @sitofp_load_8i16_to_8f32(<8 x i16> *%a) { 4269; SSE2-LABEL: sitofp_load_8i16_to_8f32: 4270; SSE2: # %bb.0: 4271; SSE2-NEXT: movdqa (%rdi), %xmm1 4272; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4273; SSE2-NEXT: psrad $16, %xmm0 4274; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4275; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4276; SSE2-NEXT: psrad $16, %xmm1 4277; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 4278; SSE2-NEXT: retq 4279; 4280; SSE41-LABEL: sitofp_load_8i16_to_8f32: 4281; SSE41: # %bb.0: 4282; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 4283; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4284; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4285; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 4286; SSE41-NEXT: retq 4287; 4288; AVX1-LABEL: sitofp_load_8i16_to_8f32: 4289; AVX1: # %bb.0: 4290; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0 4291; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1 4292; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4293; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 4294; AVX1-NEXT: retq 4295; 4296; AVX2-LABEL: sitofp_load_8i16_to_8f32: 4297; AVX2: # %bb.0: 4298; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 4299; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 4300; AVX2-NEXT: retq 4301; 4302; AVX512-LABEL: sitofp_load_8i16_to_8f32: 4303; AVX512: # %bb.0: 4304; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0 4305; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 4306; AVX512-NEXT: retq 4307 %ld = load <8 x i16>, <8 x i16> *%a 4308 %cvt = sitofp <8 x i16> %ld to <8 x float> 4309 ret <8 x float> %cvt 4310} 4311 4312define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) { 4313; SSE2-LABEL: sitofp_load_8i8_to_8f32: 4314; SSE2: # %bb.0: 4315; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4316; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4317; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4318; SSE2-NEXT: psrad $24, %xmm0 4319; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4320; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4321; SSE2-NEXT: psrad $24, %xmm1 4322; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 4323; SSE2-NEXT: retq 4324; 4325; SSE41-LABEL: sitofp_load_8i8_to_8f32: 4326; SSE41: # %bb.0: 4327; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 4328; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 4329; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4330; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 4331; SSE41-NEXT: retq 4332; 4333; AVX1-LABEL: sitofp_load_8i8_to_8f32: 4334; AVX1: # %bb.0: 4335; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm0 4336; AVX1-NEXT: vpmovsxbd (%rdi), %xmm1 4337; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4338; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 4339; AVX1-NEXT: retq 4340; 4341; AVX2-LABEL: sitofp_load_8i8_to_8f32: 4342; AVX2: # %bb.0: 4343; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 4344; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 4345; AVX2-NEXT: retq 4346; 4347; AVX512-LABEL: sitofp_load_8i8_to_8f32: 4348; AVX512: # %bb.0: 4349; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0 4350; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 4351; AVX512-NEXT: retq 4352 %ld = load <8 x i8>, <8 x i8> *%a 4353 %cvt = sitofp <8 x i8> %ld to <8 x float> 4354 ret <8 x float> %cvt 4355} 4356 4357; 4358; Load Unsigned Integer to Float 4359; 4360 4361define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { 4362; SSE2-LABEL: uitofp_load_4i64_to_4f32: 4363; SSE2: # %bb.0: 4364; SSE2-NEXT: movdqa 16(%rdi), %xmm0 4365; SSE2-NEXT: movq %xmm0, %rax 4366; SSE2-NEXT: testq %rax, %rax 4367; SSE2-NEXT: js .LBB83_1 4368; SSE2-NEXT: # %bb.2: 4369; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4370; SSE2-NEXT: jmp .LBB83_3 4371; SSE2-NEXT: .LBB83_1: 4372; SSE2-NEXT: movq %rax, %rcx 4373; SSE2-NEXT: shrq %rcx 4374; SSE2-NEXT: andl $1, %eax 4375; SSE2-NEXT: orq %rcx, %rax 4376; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4377; SSE2-NEXT: addss %xmm1, %xmm1 4378; SSE2-NEXT: .LBB83_3: 4379; SSE2-NEXT: movdqa (%rdi), %xmm2 4380; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 4381; SSE2-NEXT: movq %xmm0, %rax 4382; SSE2-NEXT: testq %rax, %rax 4383; SSE2-NEXT: js .LBB83_4 4384; SSE2-NEXT: # %bb.5: 4385; SSE2-NEXT: cvtsi2ss %rax, %xmm3 4386; SSE2-NEXT: jmp .LBB83_6 4387; SSE2-NEXT: .LBB83_4: 4388; SSE2-NEXT: movq %rax, %rcx 4389; SSE2-NEXT: shrq %rcx 4390; SSE2-NEXT: andl $1, %eax 4391; SSE2-NEXT: orq %rcx, %rax 4392; SSE2-NEXT: cvtsi2ss %rax, %xmm3 4393; SSE2-NEXT: addss %xmm3, %xmm3 4394; SSE2-NEXT: .LBB83_6: 4395; SSE2-NEXT: movq %xmm2, %rax 4396; SSE2-NEXT: testq %rax, %rax 4397; SSE2-NEXT: js .LBB83_7 4398; SSE2-NEXT: # %bb.8: 4399; SSE2-NEXT: xorps %xmm0, %xmm0 4400; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4401; SSE2-NEXT: jmp .LBB83_9 4402; SSE2-NEXT: .LBB83_7: 4403; SSE2-NEXT: movq %rax, %rcx 4404; SSE2-NEXT: shrq %rcx 4405; SSE2-NEXT: andl $1, %eax 4406; SSE2-NEXT: orq %rcx, %rax 4407; SSE2-NEXT: xorps %xmm0, %xmm0 4408; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4409; SSE2-NEXT: addss %xmm0, %xmm0 4410; SSE2-NEXT: .LBB83_9: 4411; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 4412; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 4413; SSE2-NEXT: movq %xmm2, %rax 4414; SSE2-NEXT: testq %rax, %rax 4415; SSE2-NEXT: js .LBB83_10 4416; SSE2-NEXT: # %bb.11: 4417; SSE2-NEXT: xorps %xmm2, %xmm2 4418; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4419; SSE2-NEXT: jmp .LBB83_12 4420; SSE2-NEXT: .LBB83_10: 4421; SSE2-NEXT: movq %rax, %rcx 4422; SSE2-NEXT: shrq %rcx 4423; SSE2-NEXT: andl $1, %eax 4424; SSE2-NEXT: orq %rcx, %rax 4425; SSE2-NEXT: xorps %xmm2, %xmm2 4426; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4427; SSE2-NEXT: addss %xmm2, %xmm2 4428; SSE2-NEXT: .LBB83_12: 4429; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4430; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4431; SSE2-NEXT: retq 4432; 4433; SSE41-LABEL: uitofp_load_4i64_to_4f32: 4434; SSE41: # %bb.0: 4435; SSE41-NEXT: movdqa (%rdi), %xmm1 4436; SSE41-NEXT: movdqa 16(%rdi), %xmm2 4437; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1,1] 4438; SSE41-NEXT: movdqa %xmm1, %xmm0 4439; SSE41-NEXT: pand %xmm4, %xmm0 4440; SSE41-NEXT: movdqa %xmm1, %xmm3 4441; SSE41-NEXT: psrlq $1, %xmm3 4442; SSE41-NEXT: por %xmm0, %xmm3 4443; SSE41-NEXT: movdqa %xmm1, %xmm5 4444; SSE41-NEXT: movdqa %xmm1, %xmm0 4445; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm5 4446; SSE41-NEXT: pextrq $1, %xmm5, %rax 4447; SSE41-NEXT: xorps %xmm0, %xmm0 4448; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4449; SSE41-NEXT: movq %xmm5, %rax 4450; SSE41-NEXT: xorps %xmm3, %xmm3 4451; SSE41-NEXT: cvtsi2ss %rax, %xmm3 4452; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3] 4453; SSE41-NEXT: pand %xmm2, %xmm4 4454; SSE41-NEXT: movdqa %xmm2, %xmm5 4455; SSE41-NEXT: psrlq $1, %xmm5 4456; SSE41-NEXT: por %xmm4, %xmm5 4457; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 4458; SSE41-NEXT: movaps %xmm2, %xmm0 4459; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 4460; SSE41-NEXT: movq %xmm2, %rax 4461; SSE41-NEXT: xorps %xmm0, %xmm0 4462; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4463; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3] 4464; SSE41-NEXT: pextrq $1, %xmm2, %rax 4465; SSE41-NEXT: xorps %xmm0, %xmm0 4466; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4467; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0] 4468; SSE41-NEXT: movaps %xmm3, %xmm2 4469; SSE41-NEXT: addps %xmm3, %xmm2 4470; SSE41-NEXT: movaps %xmm1, %xmm0 4471; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3 4472; SSE41-NEXT: movaps %xmm3, %xmm0 4473; SSE41-NEXT: retq 4474; 4475; AVX1-LABEL: uitofp_load_4i64_to_4f32: 4476; AVX1: # %bb.0: 4477; AVX1-NEXT: vmovapd (%rdi), %ymm0 4478; AVX1-NEXT: vmovdqa (%rdi), %xmm1 4479; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2 4480; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm3 4481; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm4 4482; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 4483; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm4 4484; AVX1-NEXT: vorpd %ymm4, %ymm3, %ymm3 4485; AVX1-NEXT: vblendvpd %ymm0, %ymm3, %ymm0, %ymm0 4486; AVX1-NEXT: vpextrq $1, %xmm0, %rax 4487; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4488; AVX1-NEXT: vmovq %xmm0, %rax 4489; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 4490; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 4491; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4492; AVX1-NEXT: vmovq %xmm0, %rax 4493; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 4494; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 4495; AVX1-NEXT: vpextrq $1, %xmm0, %rax 4496; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0 4497; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] 4498; AVX1-NEXT: vaddps %xmm0, %xmm0, %xmm3 4499; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 4500; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0 4501; AVX1-NEXT: vzeroupper 4502; AVX1-NEXT: retq 4503; 4504; AVX2-LABEL: uitofp_load_4i64_to_4f32: 4505; AVX2: # %bb.0: 4506; AVX2-NEXT: vmovdqa (%rdi), %ymm0 4507; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 4508; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1 4509; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2 4510; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 4511; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 4512; AVX2-NEXT: vpextrq $1, %xmm0, %rax 4513; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 4514; AVX2-NEXT: vmovq %xmm0, %rax 4515; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 4516; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 4517; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 4518; AVX2-NEXT: vmovq %xmm0, %rax 4519; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 4520; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 4521; AVX2-NEXT: vpextrq $1, %xmm0, %rax 4522; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 4523; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 4524; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm1 4525; AVX2-NEXT: vmovdqa (%rdi), %xmm2 4526; AVX2-NEXT: vpackssdw 16(%rdi), %xmm2, %xmm2 4527; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 4528; AVX2-NEXT: vzeroupper 4529; AVX2-NEXT: retq 4530; 4531; AVX512F-LABEL: uitofp_load_4i64_to_4f32: 4532; AVX512F: # %bb.0: 4533; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 4534; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 4535; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 4536; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 4537; AVX512F-NEXT: vmovq %xmm0, %rax 4538; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 4539; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 4540; AVX512F-NEXT: vmovq %xmm1, %rax 4541; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 4542; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 4543; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 4544; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1 4545; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4546; AVX512F-NEXT: retq 4547; 4548; AVX512VL-LABEL: uitofp_load_4i64_to_4f32: 4549; AVX512VL: # %bb.0: 4550; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 4551; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 4552; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 4553; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 4554; AVX512VL-NEXT: vmovq %xmm0, %rax 4555; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 4556; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 4557; AVX512VL-NEXT: vmovq %xmm1, %rax 4558; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 4559; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 4560; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax 4561; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1 4562; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4563; AVX512VL-NEXT: retq 4564; 4565; AVX512DQ-LABEL: uitofp_load_4i64_to_4f32: 4566; AVX512DQ: # %bb.0: 4567; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 4568; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 4569; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 4570; AVX512DQ-NEXT: vzeroupper 4571; AVX512DQ-NEXT: retq 4572; 4573; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f32: 4574; AVX512VLDQ: # %bb.0: 4575; AVX512VLDQ-NEXT: vcvtuqq2psy (%rdi), %xmm0 4576; AVX512VLDQ-NEXT: retq 4577 %ld = load <4 x i64>, <4 x i64> *%a 4578 %cvt = uitofp <4 x i64> %ld to <4 x float> 4579 ret <4 x float> %cvt 4580} 4581 4582define <4 x float> @uitofp_load_4i32_to_4f32(<4 x i32> *%a) { 4583; SSE2-LABEL: uitofp_load_4i32_to_4f32: 4584; SSE2: # %bb.0: 4585; SSE2-NEXT: movdqa (%rdi), %xmm0 4586; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 4587; SSE2-NEXT: pand %xmm0, %xmm1 4588; SSE2-NEXT: por {{.*}}(%rip), %xmm1 4589; SSE2-NEXT: psrld $16, %xmm0 4590; SSE2-NEXT: por {{.*}}(%rip), %xmm0 4591; SSE2-NEXT: subps {{.*}}(%rip), %xmm0 4592; SSE2-NEXT: addps %xmm1, %xmm0 4593; SSE2-NEXT: retq 4594; 4595; SSE41-LABEL: uitofp_load_4i32_to_4f32: 4596; SSE41: # %bb.0: 4597; SSE41-NEXT: movdqa (%rdi), %xmm0 4598; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200] 4599; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 4600; SSE41-NEXT: psrld $16, %xmm0 4601; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 4602; SSE41-NEXT: subps {{.*}}(%rip), %xmm0 4603; SSE41-NEXT: addps %xmm1, %xmm0 4604; SSE41-NEXT: retq 4605; 4606; AVX1-LABEL: uitofp_load_4i32_to_4f32: 4607; AVX1: # %bb.0: 4608; AVX1-NEXT: vmovdqa (%rdi), %xmm0 4609; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 4610; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 4611; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 4612; AVX1-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 4613; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 4614; AVX1-NEXT: retq 4615; 4616; AVX2-LABEL: uitofp_load_4i32_to_4f32: 4617; AVX2: # %bb.0: 4618; AVX2-NEXT: vmovdqa (%rdi), %xmm0 4619; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200] 4620; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 4621; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0 4622; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928] 4623; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 4624; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 4625; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm0 4626; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 4627; AVX2-NEXT: retq 4628; 4629; AVX512F-LABEL: uitofp_load_4i32_to_4f32: 4630; AVX512F: # %bb.0: 4631; AVX512F-NEXT: vmovaps (%rdi), %xmm0 4632; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 4633; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4634; AVX512F-NEXT: vzeroupper 4635; AVX512F-NEXT: retq 4636; 4637; AVX512VL-LABEL: uitofp_load_4i32_to_4f32: 4638; AVX512VL: # %bb.0: 4639; AVX512VL-NEXT: vcvtudq2ps (%rdi), %xmm0 4640; AVX512VL-NEXT: retq 4641; 4642; AVX512DQ-LABEL: uitofp_load_4i32_to_4f32: 4643; AVX512DQ: # %bb.0: 4644; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 4645; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 4646; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4647; AVX512DQ-NEXT: vzeroupper 4648; AVX512DQ-NEXT: retq 4649; 4650; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f32: 4651; AVX512VLDQ: # %bb.0: 4652; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %xmm0 4653; AVX512VLDQ-NEXT: retq 4654 %ld = load <4 x i32>, <4 x i32> *%a 4655 %cvt = uitofp <4 x i32> %ld to <4 x float> 4656 ret <4 x float> %cvt 4657} 4658 4659define <4 x float> @uitofp_load_4i16_to_4f32(<4 x i16> *%a) { 4660; SSE2-LABEL: uitofp_load_4i16_to_4f32: 4661; SSE2: # %bb.0: 4662; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4663; SSE2-NEXT: pxor %xmm1, %xmm1 4664; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4665; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4666; SSE2-NEXT: retq 4667; 4668; SSE41-LABEL: uitofp_load_4i16_to_4f32: 4669; SSE41: # %bb.0: 4670; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 4671; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4672; SSE41-NEXT: retq 4673; 4674; AVX-LABEL: uitofp_load_4i16_to_4f32: 4675; AVX: # %bb.0: 4676; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 4677; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 4678; AVX-NEXT: retq 4679 %ld = load <4 x i16>, <4 x i16> *%a 4680 %cvt = uitofp <4 x i16> %ld to <4 x float> 4681 ret <4 x float> %cvt 4682} 4683 4684define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) { 4685; SSE2-LABEL: uitofp_load_4i8_to_4f32: 4686; SSE2: # %bb.0: 4687; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4688; SSE2-NEXT: pxor %xmm1, %xmm1 4689; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4690; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4691; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4692; SSE2-NEXT: retq 4693; 4694; SSE41-LABEL: uitofp_load_4i8_to_4f32: 4695; SSE41: # %bb.0: 4696; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 4697; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4698; SSE41-NEXT: retq 4699; 4700; AVX-LABEL: uitofp_load_4i8_to_4f32: 4701; AVX: # %bb.0: 4702; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 4703; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 4704; AVX-NEXT: retq 4705 %ld = load <4 x i8>, <4 x i8> *%a 4706 %cvt = uitofp <4 x i8> %ld to <4 x float> 4707 ret <4 x float> %cvt 4708} 4709 4710define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { 4711; SSE2-LABEL: uitofp_load_8i64_to_8f32: 4712; SSE2: # %bb.0: 4713; SSE2-NEXT: movdqa 16(%rdi), %xmm0 4714; SSE2-NEXT: movq %xmm0, %rax 4715; SSE2-NEXT: testq %rax, %rax 4716; SSE2-NEXT: js .LBB87_1 4717; SSE2-NEXT: # %bb.2: 4718; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4719; SSE2-NEXT: jmp .LBB87_3 4720; SSE2-NEXT: .LBB87_1: 4721; SSE2-NEXT: movq %rax, %rcx 4722; SSE2-NEXT: shrq %rcx 4723; SSE2-NEXT: andl $1, %eax 4724; SSE2-NEXT: orq %rcx, %rax 4725; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4726; SSE2-NEXT: addss %xmm2, %xmm2 4727; SSE2-NEXT: .LBB87_3: 4728; SSE2-NEXT: movdqa (%rdi), %xmm3 4729; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 4730; SSE2-NEXT: movq %xmm0, %rax 4731; SSE2-NEXT: testq %rax, %rax 4732; SSE2-NEXT: js .LBB87_4 4733; SSE2-NEXT: # %bb.5: 4734; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4735; SSE2-NEXT: jmp .LBB87_6 4736; SSE2-NEXT: .LBB87_4: 4737; SSE2-NEXT: movq %rax, %rcx 4738; SSE2-NEXT: shrq %rcx 4739; SSE2-NEXT: andl $1, %eax 4740; SSE2-NEXT: orq %rcx, %rax 4741; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4742; SSE2-NEXT: addss %xmm1, %xmm1 4743; SSE2-NEXT: .LBB87_6: 4744; SSE2-NEXT: movq %xmm3, %rax 4745; SSE2-NEXT: testq %rax, %rax 4746; SSE2-NEXT: js .LBB87_7 4747; SSE2-NEXT: # %bb.8: 4748; SSE2-NEXT: xorps %xmm0, %xmm0 4749; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4750; SSE2-NEXT: jmp .LBB87_9 4751; SSE2-NEXT: .LBB87_7: 4752; SSE2-NEXT: movq %rax, %rcx 4753; SSE2-NEXT: shrq %rcx 4754; SSE2-NEXT: andl $1, %eax 4755; SSE2-NEXT: orq %rcx, %rax 4756; SSE2-NEXT: xorps %xmm0, %xmm0 4757; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4758; SSE2-NEXT: addss %xmm0, %xmm0 4759; SSE2-NEXT: .LBB87_9: 4760; SSE2-NEXT: movdqa 48(%rdi), %xmm6 4761; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] 4762; SSE2-NEXT: movq %xmm3, %rax 4763; SSE2-NEXT: testq %rax, %rax 4764; SSE2-NEXT: js .LBB87_10 4765; SSE2-NEXT: # %bb.11: 4766; SSE2-NEXT: cvtsi2ss %rax, %xmm4 4767; SSE2-NEXT: jmp .LBB87_12 4768; SSE2-NEXT: .LBB87_10: 4769; SSE2-NEXT: movq %rax, %rcx 4770; SSE2-NEXT: shrq %rcx 4771; SSE2-NEXT: andl $1, %eax 4772; SSE2-NEXT: orq %rcx, %rax 4773; SSE2-NEXT: cvtsi2ss %rax, %xmm4 4774; SSE2-NEXT: addss %xmm4, %xmm4 4775; SSE2-NEXT: .LBB87_12: 4776; SSE2-NEXT: movq %xmm6, %rax 4777; SSE2-NEXT: testq %rax, %rax 4778; SSE2-NEXT: js .LBB87_13 4779; SSE2-NEXT: # %bb.14: 4780; SSE2-NEXT: xorps %xmm3, %xmm3 4781; SSE2-NEXT: cvtsi2ss %rax, %xmm3 4782; SSE2-NEXT: jmp .LBB87_15 4783; SSE2-NEXT: .LBB87_13: 4784; SSE2-NEXT: movq %rax, %rcx 4785; SSE2-NEXT: shrq %rcx 4786; SSE2-NEXT: andl $1, %eax 4787; SSE2-NEXT: orq %rcx, %rax 4788; SSE2-NEXT: xorps %xmm3, %xmm3 4789; SSE2-NEXT: cvtsi2ss %rax, %xmm3 4790; SSE2-NEXT: addss %xmm3, %xmm3 4791; SSE2-NEXT: .LBB87_15: 4792; SSE2-NEXT: movdqa 32(%rdi), %xmm5 4793; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[2,3,2,3] 4794; SSE2-NEXT: movq %xmm6, %rax 4795; SSE2-NEXT: testq %rax, %rax 4796; SSE2-NEXT: js .LBB87_16 4797; SSE2-NEXT: # %bb.17: 4798; SSE2-NEXT: xorps %xmm6, %xmm6 4799; SSE2-NEXT: cvtsi2ss %rax, %xmm6 4800; SSE2-NEXT: jmp .LBB87_18 4801; SSE2-NEXT: .LBB87_16: 4802; SSE2-NEXT: movq %rax, %rcx 4803; SSE2-NEXT: shrq %rcx 4804; SSE2-NEXT: andl $1, %eax 4805; SSE2-NEXT: orq %rcx, %rax 4806; SSE2-NEXT: xorps %xmm6, %xmm6 4807; SSE2-NEXT: cvtsi2ss %rax, %xmm6 4808; SSE2-NEXT: addss %xmm6, %xmm6 4809; SSE2-NEXT: .LBB87_18: 4810; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4811; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 4812; SSE2-NEXT: movq %xmm5, %rax 4813; SSE2-NEXT: testq %rax, %rax 4814; SSE2-NEXT: js .LBB87_19 4815; SSE2-NEXT: # %bb.20: 4816; SSE2-NEXT: xorps %xmm1, %xmm1 4817; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4818; SSE2-NEXT: jmp .LBB87_21 4819; SSE2-NEXT: .LBB87_19: 4820; SSE2-NEXT: movq %rax, %rcx 4821; SSE2-NEXT: shrq %rcx 4822; SSE2-NEXT: andl $1, %eax 4823; SSE2-NEXT: orq %rcx, %rax 4824; SSE2-NEXT: xorps %xmm1, %xmm1 4825; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4826; SSE2-NEXT: addss %xmm1, %xmm1 4827; SSE2-NEXT: .LBB87_21: 4828; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4829; SSE2-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1] 4830; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,2,3] 4831; SSE2-NEXT: movq %xmm2, %rax 4832; SSE2-NEXT: testq %rax, %rax 4833; SSE2-NEXT: js .LBB87_22 4834; SSE2-NEXT: # %bb.23: 4835; SSE2-NEXT: xorps %xmm2, %xmm2 4836; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4837; SSE2-NEXT: jmp .LBB87_24 4838; SSE2-NEXT: .LBB87_22: 4839; SSE2-NEXT: movq %rax, %rcx 4840; SSE2-NEXT: shrq %rcx 4841; SSE2-NEXT: andl $1, %eax 4842; SSE2-NEXT: orq %rcx, %rax 4843; SSE2-NEXT: xorps %xmm2, %xmm2 4844; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4845; SSE2-NEXT: addss %xmm2, %xmm2 4846; SSE2-NEXT: .LBB87_24: 4847; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4848; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] 4849; SSE2-NEXT: retq 4850; 4851; SSE41-LABEL: uitofp_load_8i64_to_8f32: 4852; SSE41: # %bb.0: 4853; SSE41-NEXT: movdqa (%rdi), %xmm4 4854; SSE41-NEXT: movdqa 16(%rdi), %xmm5 4855; SSE41-NEXT: movdqa 32(%rdi), %xmm6 4856; SSE41-NEXT: movdqa 48(%rdi), %xmm2 4857; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [1,1] 4858; SSE41-NEXT: movdqa %xmm4, %xmm0 4859; SSE41-NEXT: pand %xmm7, %xmm0 4860; SSE41-NEXT: movdqa %xmm4, %xmm1 4861; SSE41-NEXT: psrlq $1, %xmm1 4862; SSE41-NEXT: por %xmm0, %xmm1 4863; SSE41-NEXT: movdqa %xmm4, %xmm3 4864; SSE41-NEXT: movdqa %xmm4, %xmm0 4865; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 4866; SSE41-NEXT: pextrq $1, %xmm3, %rax 4867; SSE41-NEXT: xorps %xmm0, %xmm0 4868; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4869; SSE41-NEXT: movq %xmm3, %rax 4870; SSE41-NEXT: xorps %xmm3, %xmm3 4871; SSE41-NEXT: cvtsi2ss %rax, %xmm3 4872; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3] 4873; SSE41-NEXT: movdqa %xmm5, %xmm0 4874; SSE41-NEXT: pand %xmm7, %xmm0 4875; SSE41-NEXT: movdqa %xmm5, %xmm1 4876; SSE41-NEXT: psrlq $1, %xmm1 4877; SSE41-NEXT: por %xmm0, %xmm1 4878; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3] 4879; SSE41-NEXT: movaps %xmm5, %xmm0 4880; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 4881; SSE41-NEXT: movq %xmm5, %rax 4882; SSE41-NEXT: xorps %xmm0, %xmm0 4883; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4884; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3] 4885; SSE41-NEXT: pextrq $1, %xmm5, %rax 4886; SSE41-NEXT: xorps %xmm0, %xmm0 4887; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4888; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0] 4889; SSE41-NEXT: movaps %xmm3, %xmm1 4890; SSE41-NEXT: addps %xmm3, %xmm1 4891; SSE41-NEXT: movaps %xmm4, %xmm0 4892; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3 4893; SSE41-NEXT: movdqa %xmm6, %xmm0 4894; SSE41-NEXT: pand %xmm7, %xmm0 4895; SSE41-NEXT: movdqa %xmm6, %xmm1 4896; SSE41-NEXT: psrlq $1, %xmm1 4897; SSE41-NEXT: por %xmm0, %xmm1 4898; SSE41-NEXT: movdqa %xmm6, %xmm4 4899; SSE41-NEXT: movdqa %xmm6, %xmm0 4900; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 4901; SSE41-NEXT: pextrq $1, %xmm4, %rax 4902; SSE41-NEXT: xorps %xmm0, %xmm0 4903; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4904; SSE41-NEXT: movq %xmm4, %rax 4905; SSE41-NEXT: xorps %xmm1, %xmm1 4906; SSE41-NEXT: cvtsi2ss %rax, %xmm1 4907; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3] 4908; SSE41-NEXT: pand %xmm2, %xmm7 4909; SSE41-NEXT: movdqa %xmm2, %xmm4 4910; SSE41-NEXT: psrlq $1, %xmm4 4911; SSE41-NEXT: por %xmm7, %xmm4 4912; SSE41-NEXT: shufps {{.*#+}} xmm6 = xmm6[1,3],xmm2[1,3] 4913; SSE41-NEXT: movaps %xmm2, %xmm0 4914; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 4915; SSE41-NEXT: movq %xmm2, %rax 4916; SSE41-NEXT: xorps %xmm0, %xmm0 4917; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4918; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3] 4919; SSE41-NEXT: pextrq $1, %xmm2, %rax 4920; SSE41-NEXT: xorps %xmm0, %xmm0 4921; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4922; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0] 4923; SSE41-NEXT: movaps %xmm1, %xmm2 4924; SSE41-NEXT: addps %xmm1, %xmm2 4925; SSE41-NEXT: movaps %xmm6, %xmm0 4926; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 4927; SSE41-NEXT: movaps %xmm3, %xmm0 4928; SSE41-NEXT: retq 4929; 4930; AVX1-LABEL: uitofp_load_8i64_to_8f32: 4931; AVX1: # %bb.0: 4932; AVX1-NEXT: vmovapd (%rdi), %ymm2 4933; AVX1-NEXT: vmovapd 32(%rdi), %ymm3 4934; AVX1-NEXT: vmovapd {{.*#+}} ymm8 = [1,1,1,1] 4935; AVX1-NEXT: vandpd %ymm3, %ymm8, %ymm5 4936; AVX1-NEXT: vmovdqa (%rdi), %xmm9 4937; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 4938; AVX1-NEXT: vmovdqa 32(%rdi), %xmm6 4939; AVX1-NEXT: vpsrlq $1, %xmm6, %xmm7 4940; AVX1-NEXT: vmovdqa 48(%rdi), %xmm4 4941; AVX1-NEXT: vpsrlq $1, %xmm4, %xmm0 4942; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm7, %ymm0 4943; AVX1-NEXT: vorpd %ymm5, %ymm0, %ymm0 4944; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm3, %ymm0 4945; AVX1-NEXT: vpextrq $1, %xmm0, %rax 4946; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm3 4947; AVX1-NEXT: vmovq %xmm0, %rax 4948; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm5 4949; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3] 4950; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4951; AVX1-NEXT: vmovq %xmm0, %rax 4952; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm5 4953; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0],xmm3[3] 4954; AVX1-NEXT: vpextrq $1, %xmm0, %rax 4955; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm0 4956; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] 4957; AVX1-NEXT: vaddps %xmm0, %xmm0, %xmm3 4958; AVX1-NEXT: vpackssdw %xmm4, %xmm6, %xmm4 4959; AVX1-NEXT: vblendvps %xmm4, %xmm3, %xmm0, %xmm0 4960; AVX1-NEXT: vandpd %ymm2, %ymm8, %ymm3 4961; AVX1-NEXT: vpsrlq $1, %xmm9, %xmm4 4962; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm5 4963; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 4964; AVX1-NEXT: vorpd %ymm3, %ymm4, %ymm3 4965; AVX1-NEXT: vblendvpd %ymm2, %ymm3, %ymm2, %ymm2 4966; AVX1-NEXT: vpextrq $1, %xmm2, %rax 4967; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm3 4968; AVX1-NEXT: vmovq %xmm2, %rax 4969; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm4 4970; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 4971; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 4972; AVX1-NEXT: vmovq %xmm2, %rax 4973; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm4 4974; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 4975; AVX1-NEXT: vpextrq $1, %xmm2, %rax 4976; AVX1-NEXT: vcvtsi2ss %rax, %xmm10, %xmm2 4977; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0] 4978; AVX1-NEXT: vaddps %xmm2, %xmm2, %xmm3 4979; AVX1-NEXT: vpackssdw %xmm1, %xmm9, %xmm1 4980; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm2, %xmm1 4981; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4982; AVX1-NEXT: retq 4983; 4984; AVX2-LABEL: uitofp_load_8i64_to_8f32: 4985; AVX2: # %bb.0: 4986; AVX2-NEXT: vmovaps (%rdi), %ymm0 4987; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 4988; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1] 4989; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3 4990; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4 4991; AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3 4992; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm1, %ymm1 4993; AVX2-NEXT: vpextrq $1, %xmm1, %rax 4994; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 4995; AVX2-NEXT: vmovq %xmm1, %rax 4996; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 4997; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 4998; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 4999; AVX2-NEXT: vmovq %xmm1, %rax 5000; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 5001; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 5002; AVX2-NEXT: vpextrq $1, %xmm1, %rax 5003; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 5004; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 5005; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm3 5006; AVX2-NEXT: vmovdqa (%rdi), %xmm4 5007; AVX2-NEXT: vmovdqa 32(%rdi), %xmm5 5008; AVX2-NEXT: vpackssdw 48(%rdi), %xmm5, %xmm5 5009; AVX2-NEXT: vblendvps %xmm5, %xmm3, %xmm1, %xmm1 5010; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm2 5011; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3 5012; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 5013; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0 5014; AVX2-NEXT: vpextrq $1, %xmm0, %rax 5015; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm2 5016; AVX2-NEXT: vmovq %xmm0, %rax 5017; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3 5018; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 5019; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 5020; AVX2-NEXT: vmovq %xmm0, %rax 5021; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3 5022; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 5023; AVX2-NEXT: vpextrq $1, %xmm0, %rax 5024; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm0 5025; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 5026; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm2 5027; AVX2-NEXT: vpackssdw 16(%rdi), %xmm4, %xmm3 5028; AVX2-NEXT: vblendvps %xmm3, %xmm2, %xmm0, %xmm0 5029; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5030; AVX2-NEXT: retq 5031; 5032; AVX512F-LABEL: uitofp_load_8i64_to_8f32: 5033; AVX512F: # %bb.0: 5034; AVX512F-NEXT: vmovaps (%rdi), %xmm0 5035; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 5036; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 5037; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 5038; AVX512F-NEXT: vpextrq $1, %xmm2, %rax 5039; AVX512F-NEXT: vcvtusi2ss %rax, %xmm4, %xmm4 5040; AVX512F-NEXT: vmovq %xmm2, %rax 5041; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2 5042; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3] 5043; AVX512F-NEXT: vmovq %xmm3, %rax 5044; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 5045; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 5046; AVX512F-NEXT: vpextrq $1, %xmm3, %rax 5047; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 5048; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 5049; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 5050; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 5051; AVX512F-NEXT: vmovq %xmm0, %rax 5052; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm0 5053; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] 5054; AVX512F-NEXT: vmovq %xmm1, %rax 5055; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 5056; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 5057; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 5058; AVX512F-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1 5059; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 5060; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5061; AVX512F-NEXT: retq 5062; 5063; AVX512VL-LABEL: uitofp_load_8i64_to_8f32: 5064; AVX512VL: # %bb.0: 5065; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 5066; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 5067; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 5068; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 5069; AVX512VL-NEXT: vpextrq $1, %xmm2, %rax 5070; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm4, %xmm4 5071; AVX512VL-NEXT: vmovq %xmm2, %rax 5072; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2 5073; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3] 5074; AVX512VL-NEXT: vmovq %xmm3, %rax 5075; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 5076; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 5077; AVX512VL-NEXT: vpextrq $1, %xmm3, %rax 5078; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 5079; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 5080; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 5081; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 5082; AVX512VL-NEXT: vmovq %xmm0, %rax 5083; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm0 5084; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] 5085; AVX512VL-NEXT: vmovq %xmm1, %rax 5086; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 5087; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 5088; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax 5089; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1 5090; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 5091; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5092; AVX512VL-NEXT: retq 5093; 5094; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32: 5095; AVX512DQ: # %bb.0: 5096; AVX512DQ-NEXT: vcvtuqq2ps (%rdi), %ymm0 5097; AVX512DQ-NEXT: retq 5098; 5099; AVX512VLDQ-LABEL: uitofp_load_8i64_to_8f32: 5100; AVX512VLDQ: # %bb.0: 5101; AVX512VLDQ-NEXT: vcvtuqq2ps (%rdi), %ymm0 5102; AVX512VLDQ-NEXT: retq 5103 %ld = load <8 x i64>, <8 x i64> *%a 5104 %cvt = uitofp <8 x i64> %ld to <8 x float> 5105 ret <8 x float> %cvt 5106} 5107 5108define <8 x float> @uitofp_load_8i32_to_8f32(<8 x i32> *%a) { 5109; SSE2-LABEL: uitofp_load_8i32_to_8f32: 5110; SSE2: # %bb.0: 5111; SSE2-NEXT: movdqa (%rdi), %xmm0 5112; SSE2-NEXT: movdqa 16(%rdi), %xmm1 5113; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] 5114; SSE2-NEXT: movdqa %xmm0, %xmm3 5115; SSE2-NEXT: pand %xmm2, %xmm3 5116; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200] 5117; SSE2-NEXT: por %xmm4, %xmm3 5118; SSE2-NEXT: psrld $16, %xmm0 5119; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928] 5120; SSE2-NEXT: por %xmm5, %xmm0 5121; SSE2-NEXT: movaps {{.*#+}} xmm6 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 5122; SSE2-NEXT: subps %xmm6, %xmm0 5123; SSE2-NEXT: addps %xmm3, %xmm0 5124; SSE2-NEXT: pand %xmm1, %xmm2 5125; SSE2-NEXT: por %xmm4, %xmm2 5126; SSE2-NEXT: psrld $16, %xmm1 5127; SSE2-NEXT: por %xmm5, %xmm1 5128; SSE2-NEXT: subps %xmm6, %xmm1 5129; SSE2-NEXT: addps %xmm2, %xmm1 5130; SSE2-NEXT: retq 5131; 5132; SSE41-LABEL: uitofp_load_8i32_to_8f32: 5133; SSE41: # %bb.0: 5134; SSE41-NEXT: movdqa (%rdi), %xmm0 5135; SSE41-NEXT: movdqa 16(%rdi), %xmm1 5136; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200] 5137; SSE41-NEXT: movdqa %xmm0, %xmm3 5138; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 5139; SSE41-NEXT: psrld $16, %xmm0 5140; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928] 5141; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] 5142; SSE41-NEXT: movaps {{.*#+}} xmm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 5143; SSE41-NEXT: subps %xmm5, %xmm0 5144; SSE41-NEXT: addps %xmm3, %xmm0 5145; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 5146; SSE41-NEXT: psrld $16, %xmm1 5147; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] 5148; SSE41-NEXT: subps %xmm5, %xmm1 5149; SSE41-NEXT: addps %xmm2, %xmm1 5150; SSE41-NEXT: retq 5151; 5152; AVX1-LABEL: uitofp_load_8i32_to_8f32: 5153; AVX1: # %bb.0: 5154; AVX1-NEXT: vmovaps (%rdi), %ymm0 5155; AVX1-NEXT: vmovdqa (%rdi), %xmm1 5156; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2 5157; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1 5158; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 5159; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 5160; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 5161; AVX1-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 5162; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 5163; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 5164; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 5165; AVX1-NEXT: retq 5166; 5167; AVX2-LABEL: uitofp_load_8i32_to_8f32: 5168; AVX2: # %bb.0: 5169; AVX2-NEXT: vmovdqa (%rdi), %ymm0 5170; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] 5171; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 5172; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 5173; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] 5174; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] 5175; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 5176; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 5177; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 5178; AVX2-NEXT: retq 5179; 5180; AVX512F-LABEL: uitofp_load_8i32_to_8f32: 5181; AVX512F: # %bb.0: 5182; AVX512F-NEXT: vmovaps (%rdi), %ymm0 5183; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 5184; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 5185; AVX512F-NEXT: retq 5186; 5187; AVX512VL-LABEL: uitofp_load_8i32_to_8f32: 5188; AVX512VL: # %bb.0: 5189; AVX512VL-NEXT: vcvtudq2ps (%rdi), %ymm0 5190; AVX512VL-NEXT: retq 5191; 5192; AVX512DQ-LABEL: uitofp_load_8i32_to_8f32: 5193; AVX512DQ: # %bb.0: 5194; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 5195; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 5196; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 5197; AVX512DQ-NEXT: retq 5198; 5199; AVX512VLDQ-LABEL: uitofp_load_8i32_to_8f32: 5200; AVX512VLDQ: # %bb.0: 5201; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %ymm0 5202; AVX512VLDQ-NEXT: retq 5203 %ld = load <8 x i32>, <8 x i32> *%a 5204 %cvt = uitofp <8 x i32> %ld to <8 x float> 5205 ret <8 x float> %cvt 5206} 5207 5208define <8 x float> @uitofp_load_8i16_to_8f32(<8 x i16> *%a) { 5209; SSE2-LABEL: uitofp_load_8i16_to_8f32: 5210; SSE2: # %bb.0: 5211; SSE2-NEXT: movdqa (%rdi), %xmm1 5212; SSE2-NEXT: pxor %xmm2, %xmm2 5213; SSE2-NEXT: movdqa %xmm1, %xmm0 5214; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 5215; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 5216; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 5217; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 5218; SSE2-NEXT: retq 5219; 5220; SSE41-LABEL: uitofp_load_8i16_to_8f32: 5221; SSE41: # %bb.0: 5222; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 5223; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 5224; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 5225; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 5226; SSE41-NEXT: retq 5227; 5228; AVX1-LABEL: uitofp_load_8i16_to_8f32: 5229; AVX1: # %bb.0: 5230; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 5231; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 5232; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5233; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 5234; AVX1-NEXT: retq 5235; 5236; AVX2-LABEL: uitofp_load_8i16_to_8f32: 5237; AVX2: # %bb.0: 5238; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 5239; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 5240; AVX2-NEXT: retq 5241; 5242; AVX512-LABEL: uitofp_load_8i16_to_8f32: 5243; AVX512: # %bb.0: 5244; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 5245; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 5246; AVX512-NEXT: retq 5247 %ld = load <8 x i16>, <8 x i16> *%a 5248 %cvt = uitofp <8 x i16> %ld to <8 x float> 5249 ret <8 x float> %cvt 5250} 5251 5252define <8 x float> @uitofp_load_8i8_to_8f32(<8 x i8> *%a) { 5253; SSE2-LABEL: uitofp_load_8i8_to_8f32: 5254; SSE2: # %bb.0: 5255; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 5256; SSE2-NEXT: pxor %xmm2, %xmm2 5257; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 5258; SSE2-NEXT: movdqa %xmm1, %xmm0 5259; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 5260; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 5261; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 5262; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 5263; SSE2-NEXT: retq 5264; 5265; SSE41-LABEL: uitofp_load_8i8_to_8f32: 5266; SSE41: # %bb.0: 5267; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 5268; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 5269; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 5270; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 5271; SSE41-NEXT: retq 5272; 5273; AVX1-LABEL: uitofp_load_8i8_to_8f32: 5274; AVX1: # %bb.0: 5275; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 5276; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 5277; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5278; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 5279; AVX1-NEXT: retq 5280; 5281; AVX2-LABEL: uitofp_load_8i8_to_8f32: 5282; AVX2: # %bb.0: 5283; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 5284; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 5285; AVX2-NEXT: retq 5286; 5287; AVX512-LABEL: uitofp_load_8i8_to_8f32: 5288; AVX512: # %bb.0: 5289; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 5290; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 5291; AVX512-NEXT: retq 5292 %ld = load <8 x i8>, <8 x i8> *%a 5293 %cvt = uitofp <8 x i8> %ld to <8 x float> 5294 ret <8 x float> %cvt 5295} 5296 5297; 5298; Aggregates 5299; 5300 5301%Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }> 5302define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) { 5303; SSE2-LABEL: aggregate_sitofp_8i16_to_8f32: 5304; SSE2: # %bb.0: 5305; SSE2-NEXT: movq 24(%rdi), %rax 5306; SSE2-NEXT: movdqu 8(%rdi), %xmm0 5307; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 5308; SSE2-NEXT: psrad $16, %xmm1 5309; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 5310; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 5311; SSE2-NEXT: psrad $16, %xmm0 5312; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 5313; SSE2-NEXT: movaps %xmm0, 16(%rax) 5314; SSE2-NEXT: movaps %xmm1, (%rax) 5315; SSE2-NEXT: retq 5316; 5317; SSE41-LABEL: aggregate_sitofp_8i16_to_8f32: 5318; SSE41: # %bb.0: 5319; SSE41-NEXT: movq 24(%rdi), %rax 5320; SSE41-NEXT: pmovsxwd 16(%rdi), %xmm0 5321; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 5322; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 5323; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 5324; SSE41-NEXT: movaps %xmm0, 16(%rax) 5325; SSE41-NEXT: movaps %xmm1, (%rax) 5326; SSE41-NEXT: retq 5327; 5328; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32: 5329; AVX1: # %bb.0: 5330; AVX1-NEXT: movq 24(%rdi), %rax 5331; AVX1-NEXT: vpmovsxwd 16(%rdi), %xmm0 5332; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1 5333; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5334; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 5335; AVX1-NEXT: vmovaps %ymm0, (%rax) 5336; AVX1-NEXT: vzeroupper 5337; AVX1-NEXT: retq 5338; 5339; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32: 5340; AVX2: # %bb.0: 5341; AVX2-NEXT: movq 24(%rdi), %rax 5342; AVX2-NEXT: vpmovsxwd 8(%rdi), %ymm0 5343; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 5344; AVX2-NEXT: vmovaps %ymm0, (%rax) 5345; AVX2-NEXT: vzeroupper 5346; AVX2-NEXT: retq 5347; 5348; AVX512-LABEL: aggregate_sitofp_8i16_to_8f32: 5349; AVX512: # %bb.0: 5350; AVX512-NEXT: movq 24(%rdi), %rax 5351; AVX512-NEXT: vpmovsxwd 8(%rdi), %ymm0 5352; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 5353; AVX512-NEXT: vmovaps %ymm0, (%rax) 5354; AVX512-NEXT: vzeroupper 5355; AVX512-NEXT: retq 5356 %1 = load %Arguments, %Arguments* %a0, align 1 5357 %2 = extractvalue %Arguments %1, 1 5358 %3 = extractvalue %Arguments %1, 2 5359 %4 = sitofp <8 x i16> %2 to <8 x float> 5360 store <8 x float> %4, <8 x float>* %3, align 32 5361 ret void 5362} 5363 5364define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind { 5365; SSE-LABEL: sitofp_i32_to_2f64: 5366; SSE: # %bb.0: 5367; SSE-NEXT: cvtsi2sd %edi, %xmm0 5368; SSE-NEXT: retq 5369; 5370; AVX-LABEL: sitofp_i32_to_2f64: 5371; AVX: # %bb.0: 5372; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 5373; AVX-NEXT: retq 5374 %cvt = sitofp i32 %a1 to double 5375 %res = insertelement <2 x double> %a0, double %cvt, i32 0 5376 ret <2 x double> %res 5377} 5378 5379define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind { 5380; SSE-LABEL: sitofp_i32_to_4f32: 5381; SSE: # %bb.0: 5382; SSE-NEXT: cvtsi2ss %edi, %xmm0 5383; SSE-NEXT: retq 5384; 5385; AVX-LABEL: sitofp_i32_to_4f32: 5386; AVX: # %bb.0: 5387; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 5388; AVX-NEXT: retq 5389 %cvt = sitofp i32 %a1 to float 5390 %res = insertelement <4 x float> %a0, float %cvt, i32 0 5391 ret <4 x float> %res 5392} 5393 5394define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind { 5395; SSE-LABEL: sitofp_i64_to_2f64: 5396; SSE: # %bb.0: 5397; SSE-NEXT: cvtsi2sd %rdi, %xmm0 5398; SSE-NEXT: retq 5399; 5400; AVX-LABEL: sitofp_i64_to_2f64: 5401; AVX: # %bb.0: 5402; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 5403; AVX-NEXT: retq 5404 %cvt = sitofp i64 %a1 to double 5405 %res = insertelement <2 x double> %a0, double %cvt, i32 0 5406 ret <2 x double> %res 5407} 5408 5409define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind { 5410; SSE-LABEL: sitofp_i64_to_4f32: 5411; SSE: # %bb.0: 5412; SSE-NEXT: cvtsi2ss %rdi, %xmm0 5413; SSE-NEXT: retq 5414; 5415; AVX-LABEL: sitofp_i64_to_4f32: 5416; AVX: # %bb.0: 5417; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 5418; AVX-NEXT: retq 5419 %cvt = sitofp i64 %a1 to float 5420 %res = insertelement <4 x float> %a0, float %cvt, i32 0 5421 ret <4 x float> %res 5422} 5423 5424; Extract from int vector and convert to FP. 5425 5426define float @extract0_sitofp_v4i32_f32(<4 x i32> %x) nounwind { 5427; SSE-LABEL: extract0_sitofp_v4i32_f32: 5428; SSE: # %bb.0: 5429; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 5430; SSE-NEXT: retq 5431; 5432; AVX-LABEL: extract0_sitofp_v4i32_f32: 5433; AVX: # %bb.0: 5434; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 5435; AVX-NEXT: retq 5436 %e = extractelement <4 x i32> %x, i32 0 5437 %r = sitofp i32 %e to float 5438 ret float %r 5439} 5440 5441define float @extract0_sitofp_v4i32_f32i_multiuse1(<4 x i32> %x) nounwind { 5442; SSE-LABEL: extract0_sitofp_v4i32_f32i_multiuse1: 5443; SSE: # %bb.0: 5444; SSE-NEXT: movd %xmm0, %eax 5445; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 5446; SSE-NEXT: incl %eax 5447; SSE-NEXT: cvtsi2ss %eax, %xmm1 5448; SSE-NEXT: divss %xmm1, %xmm0 5449; SSE-NEXT: retq 5450; 5451; AVX-LABEL: extract0_sitofp_v4i32_f32i_multiuse1: 5452; AVX: # %bb.0: 5453; AVX-NEXT: vmovd %xmm0, %eax 5454; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 5455; AVX-NEXT: incl %eax 5456; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1 5457; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 5458; AVX-NEXT: retq 5459 %e = extractelement <4 x i32> %x, i32 0 5460 %f = sitofp i32 %e to float 5461 %e1 = add i32 %e, 1 5462 %f1 = sitofp i32 %e1 to float 5463 %r = fdiv float %f, %f1 5464 ret float %r 5465} 5466 5467define float @extract0_sitofp_v4i32_f32_multiuse2(<4 x i32> %x, i32* %p) nounwind { 5468; SSE-LABEL: extract0_sitofp_v4i32_f32_multiuse2: 5469; SSE: # %bb.0: 5470; SSE-NEXT: cvtdq2ps %xmm0, %xmm1 5471; SSE-NEXT: movss %xmm0, (%rdi) 5472; SSE-NEXT: movaps %xmm1, %xmm0 5473; SSE-NEXT: retq 5474; 5475; AVX-LABEL: extract0_sitofp_v4i32_f32_multiuse2: 5476; AVX: # %bb.0: 5477; AVX-NEXT: vcvtdq2ps %xmm0, %xmm1 5478; AVX-NEXT: vmovss %xmm0, (%rdi) 5479; AVX-NEXT: vmovaps %xmm1, %xmm0 5480; AVX-NEXT: retq 5481 %e = extractelement <4 x i32> %x, i32 0 5482 %r = sitofp i32 %e to float 5483 store i32 %e, i32* %p 5484 ret float %r 5485} 5486 5487define double @extract0_sitofp_v4i32_f64(<4 x i32> %x) nounwind { 5488; SSE-LABEL: extract0_sitofp_v4i32_f64: 5489; SSE: # %bb.0: 5490; SSE-NEXT: movd %xmm0, %eax 5491; SSE-NEXT: xorps %xmm0, %xmm0 5492; SSE-NEXT: cvtsi2sd %eax, %xmm0 5493; SSE-NEXT: retq 5494; 5495; AVX-LABEL: extract0_sitofp_v4i32_f64: 5496; AVX: # %bb.0: 5497; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 5498; AVX-NEXT: retq 5499 %e = extractelement <4 x i32> %x, i32 0 5500 %r = sitofp i32 %e to double 5501 ret double %r 5502} 5503 5504define float @extract0_uitofp_v4i32_f32(<4 x i32> %x) nounwind { 5505; SSE-LABEL: extract0_uitofp_v4i32_f32: 5506; SSE: # %bb.0: 5507; SSE-NEXT: movd %xmm0, %eax 5508; SSE-NEXT: xorps %xmm0, %xmm0 5509; SSE-NEXT: cvtsi2ss %rax, %xmm0 5510; SSE-NEXT: retq 5511; 5512; VEX-LABEL: extract0_uitofp_v4i32_f32: 5513; VEX: # %bb.0: 5514; VEX-NEXT: vmovd %xmm0, %eax 5515; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0 5516; VEX-NEXT: retq 5517; 5518; AVX512F-LABEL: extract0_uitofp_v4i32_f32: 5519; AVX512F: # %bb.0: 5520; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5521; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 5522; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5523; AVX512F-NEXT: vzeroupper 5524; AVX512F-NEXT: retq 5525; 5526; AVX512VL-LABEL: extract0_uitofp_v4i32_f32: 5527; AVX512VL: # %bb.0: 5528; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 5529; AVX512VL-NEXT: retq 5530; 5531; AVX512DQ-LABEL: extract0_uitofp_v4i32_f32: 5532; AVX512DQ: # %bb.0: 5533; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5534; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 5535; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5536; AVX512DQ-NEXT: vzeroupper 5537; AVX512DQ-NEXT: retq 5538; 5539; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f32: 5540; AVX512VLDQ: # %bb.0: 5541; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 5542; AVX512VLDQ-NEXT: retq 5543 %e = extractelement <4 x i32> %x, i32 0 5544 %r = uitofp i32 %e to float 5545 ret float %r 5546} 5547 5548define double @extract0_uitofp_v4i32_f64(<4 x i32> %x) nounwind { 5549; SSE-LABEL: extract0_uitofp_v4i32_f64: 5550; SSE: # %bb.0: 5551; SSE-NEXT: movd %xmm0, %eax 5552; SSE-NEXT: xorps %xmm0, %xmm0 5553; SSE-NEXT: cvtsi2sd %rax, %xmm0 5554; SSE-NEXT: retq 5555; 5556; VEX-LABEL: extract0_uitofp_v4i32_f64: 5557; VEX: # %bb.0: 5558; VEX-NEXT: vmovd %xmm0, %eax 5559; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 5560; VEX-NEXT: retq 5561; 5562; AVX512F-LABEL: extract0_uitofp_v4i32_f64: 5563; AVX512F: # %bb.0: 5564; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 5565; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 5566; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5567; AVX512F-NEXT: vzeroupper 5568; AVX512F-NEXT: retq 5569; 5570; AVX512VL-LABEL: extract0_uitofp_v4i32_f64: 5571; AVX512VL: # %bb.0: 5572; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 5573; AVX512VL-NEXT: retq 5574; 5575; AVX512DQ-LABEL: extract0_uitofp_v4i32_f64: 5576; AVX512DQ: # %bb.0: 5577; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 5578; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 5579; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5580; AVX512DQ-NEXT: vzeroupper 5581; AVX512DQ-NEXT: retq 5582; 5583; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f64: 5584; AVX512VLDQ: # %bb.0: 5585; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 5586; AVX512VLDQ-NEXT: retq 5587 %e = extractelement <4 x i32> %x, i32 0 5588 %r = uitofp i32 %e to double 5589 ret double %r 5590} 5591 5592; Extract non-zero element from int vector and convert to FP. 5593 5594define float @extract3_sitofp_v4i32_f32(<4 x i32> %x) nounwind { 5595; SSE-LABEL: extract3_sitofp_v4i32_f32: 5596; SSE: # %bb.0: 5597; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 5598; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 5599; SSE-NEXT: retq 5600; 5601; AVX-LABEL: extract3_sitofp_v4i32_f32: 5602; AVX: # %bb.0: 5603; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5604; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 5605; AVX-NEXT: retq 5606 %e = extractelement <4 x i32> %x, i32 3 5607 %r = sitofp i32 %e to float 5608 ret float %r 5609} 5610 5611define double @extract3_sitofp_v4i32_f64(<4 x i32> %x) nounwind { 5612; SSE2-LABEL: extract3_sitofp_v4i32_f64: 5613; SSE2: # %bb.0: 5614; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 5615; SSE2-NEXT: movd %xmm0, %eax 5616; SSE2-NEXT: xorps %xmm0, %xmm0 5617; SSE2-NEXT: cvtsi2sd %eax, %xmm0 5618; SSE2-NEXT: retq 5619; 5620; SSE41-LABEL: extract3_sitofp_v4i32_f64: 5621; SSE41: # %bb.0: 5622; SSE41-NEXT: extractps $3, %xmm0, %eax 5623; SSE41-NEXT: xorps %xmm0, %xmm0 5624; SSE41-NEXT: cvtsi2sd %eax, %xmm0 5625; SSE41-NEXT: retq 5626; 5627; AVX-LABEL: extract3_sitofp_v4i32_f64: 5628; AVX: # %bb.0: 5629; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5630; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 5631; AVX-NEXT: retq 5632 %e = extractelement <4 x i32> %x, i32 3 5633 %r = sitofp i32 %e to double 5634 ret double %r 5635} 5636 5637define float @extract3_uitofp_v4i32_f32(<4 x i32> %x) nounwind { 5638; SSE2-LABEL: extract3_uitofp_v4i32_f32: 5639; SSE2: # %bb.0: 5640; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 5641; SSE2-NEXT: movd %xmm0, %eax 5642; SSE2-NEXT: xorps %xmm0, %xmm0 5643; SSE2-NEXT: cvtsi2ss %rax, %xmm0 5644; SSE2-NEXT: retq 5645; 5646; SSE41-LABEL: extract3_uitofp_v4i32_f32: 5647; SSE41: # %bb.0: 5648; SSE41-NEXT: extractps $3, %xmm0, %eax 5649; SSE41-NEXT: xorps %xmm0, %xmm0 5650; SSE41-NEXT: cvtsi2ss %rax, %xmm0 5651; SSE41-NEXT: retq 5652; 5653; VEX-LABEL: extract3_uitofp_v4i32_f32: 5654; VEX: # %bb.0: 5655; VEX-NEXT: vextractps $3, %xmm0, %eax 5656; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0 5657; VEX-NEXT: retq 5658; 5659; AVX512F-LABEL: extract3_uitofp_v4i32_f32: 5660; AVX512F: # %bb.0: 5661; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5662; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 5663; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5664; AVX512F-NEXT: vzeroupper 5665; AVX512F-NEXT: retq 5666; 5667; AVX512VL-LABEL: extract3_uitofp_v4i32_f32: 5668; AVX512VL: # %bb.0: 5669; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5670; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 5671; AVX512VL-NEXT: retq 5672; 5673; AVX512DQ-LABEL: extract3_uitofp_v4i32_f32: 5674; AVX512DQ: # %bb.0: 5675; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5676; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 5677; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5678; AVX512DQ-NEXT: vzeroupper 5679; AVX512DQ-NEXT: retq 5680; 5681; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f32: 5682; AVX512VLDQ: # %bb.0: 5683; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5684; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 5685; AVX512VLDQ-NEXT: retq 5686 %e = extractelement <4 x i32> %x, i32 3 5687 %r = uitofp i32 %e to float 5688 ret float %r 5689} 5690 5691define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind { 5692; SSE2-LABEL: extract3_uitofp_v4i32_f64: 5693; SSE2: # %bb.0: 5694; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 5695; SSE2-NEXT: movd %xmm0, %eax 5696; SSE2-NEXT: xorps %xmm0, %xmm0 5697; SSE2-NEXT: cvtsi2sd %rax, %xmm0 5698; SSE2-NEXT: retq 5699; 5700; SSE41-LABEL: extract3_uitofp_v4i32_f64: 5701; SSE41: # %bb.0: 5702; SSE41-NEXT: extractps $3, %xmm0, %eax 5703; SSE41-NEXT: xorps %xmm0, %xmm0 5704; SSE41-NEXT: cvtsi2sd %rax, %xmm0 5705; SSE41-NEXT: retq 5706; 5707; VEX-LABEL: extract3_uitofp_v4i32_f64: 5708; VEX: # %bb.0: 5709; VEX-NEXT: vextractps $3, %xmm0, %eax 5710; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 5711; VEX-NEXT: retq 5712; 5713; AVX512F-LABEL: extract3_uitofp_v4i32_f64: 5714; AVX512F: # %bb.0: 5715; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5716; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 5717; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5718; AVX512F-NEXT: vzeroupper 5719; AVX512F-NEXT: retq 5720; 5721; AVX512VL-LABEL: extract3_uitofp_v4i32_f64: 5722; AVX512VL: # %bb.0: 5723; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5724; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 5725; AVX512VL-NEXT: retq 5726; 5727; AVX512DQ-LABEL: extract3_uitofp_v4i32_f64: 5728; AVX512DQ: # %bb.0: 5729; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5730; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 5731; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5732; AVX512DQ-NEXT: vzeroupper 5733; AVX512DQ-NEXT: retq 5734; 5735; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f64: 5736; AVX512VLDQ: # %bb.0: 5737; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5738; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 5739; AVX512VLDQ-NEXT: retq 5740 %e = extractelement <4 x i32> %x, i32 3 5741 %r = uitofp i32 %e to double 5742 ret double %r 5743} 5744 5745define void @PR43609(double* nocapture %x, <2 x i64> %y) #0 { 5746; SSE2-LABEL: PR43609: 5747; SSE2: # %bb.0: 5748; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,2] 5749; SSE2-NEXT: paddq %xmm0, %xmm1 5750; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295] 5751; SSE2-NEXT: movdqa %xmm0, %xmm3 5752; SSE2-NEXT: pand %xmm2, %xmm3 5753; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5754; SSE2-NEXT: por %xmm4, %xmm3 5755; SSE2-NEXT: psrlq $32, %xmm0 5756; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5757; SSE2-NEXT: por %xmm5, %xmm0 5758; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5759; SSE2-NEXT: subpd %xmm6, %xmm0 5760; SSE2-NEXT: addpd %xmm3, %xmm0 5761; SSE2-NEXT: pand %xmm1, %xmm2 5762; SSE2-NEXT: por %xmm4, %xmm2 5763; SSE2-NEXT: psrlq $32, %xmm1 5764; SSE2-NEXT: por %xmm5, %xmm1 5765; SSE2-NEXT: subpd %xmm6, %xmm1 5766; SSE2-NEXT: addpd %xmm2, %xmm1 5767; SSE2-NEXT: movapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5768; SSE2-NEXT: addpd %xmm2, %xmm0 5769; SSE2-NEXT: addpd %xmm2, %xmm1 5770; SSE2-NEXT: movupd %xmm0, (%rdi) 5771; SSE2-NEXT: movupd %xmm1, 16(%rdi) 5772; SSE2-NEXT: retq 5773; 5774; SSE41-LABEL: PR43609: 5775; SSE41: # %bb.0: 5776; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2,2] 5777; SSE41-NEXT: paddq %xmm0, %xmm1 5778; SSE41-NEXT: pxor %xmm2, %xmm2 5779; SSE41-NEXT: movdqa %xmm0, %xmm3 5780; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 5781; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5782; SSE41-NEXT: por %xmm4, %xmm3 5783; SSE41-NEXT: psrlq $32, %xmm0 5784; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5785; SSE41-NEXT: por %xmm5, %xmm0 5786; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5787; SSE41-NEXT: subpd %xmm6, %xmm0 5788; SSE41-NEXT: addpd %xmm3, %xmm0 5789; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 5790; SSE41-NEXT: por %xmm4, %xmm2 5791; SSE41-NEXT: psrlq $32, %xmm1 5792; SSE41-NEXT: por %xmm5, %xmm1 5793; SSE41-NEXT: subpd %xmm6, %xmm1 5794; SSE41-NEXT: addpd %xmm2, %xmm1 5795; SSE41-NEXT: movapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5796; SSE41-NEXT: addpd %xmm2, %xmm0 5797; SSE41-NEXT: addpd %xmm2, %xmm1 5798; SSE41-NEXT: movupd %xmm0, (%rdi) 5799; SSE41-NEXT: movupd %xmm1, 16(%rdi) 5800; SSE41-NEXT: retq 5801; 5802; AVX1-LABEL: PR43609: 5803; AVX1: # %bb.0: 5804; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 5805; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 5806; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 5807; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5808; AVX1-NEXT: vpor %xmm4, %xmm3, %xmm3 5809; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 5810; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5811; AVX1-NEXT: vpor %xmm5, %xmm0, %xmm0 5812; AVX1-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5813; AVX1-NEXT: vsubpd %xmm6, %xmm0, %xmm0 5814; AVX1-NEXT: vaddpd %xmm0, %xmm3, %xmm0 5815; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 5816; AVX1-NEXT: vpor %xmm4, %xmm2, %xmm2 5817; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 5818; AVX1-NEXT: vpor %xmm5, %xmm1, %xmm1 5819; AVX1-NEXT: vsubpd %xmm6, %xmm1, %xmm1 5820; AVX1-NEXT: vaddpd %xmm1, %xmm2, %xmm1 5821; AVX1-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5822; AVX1-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5823; AVX1-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5824; AVX1-NEXT: vmovupd %xmm0, (%rdi) 5825; AVX1-NEXT: vmovupd %xmm1, 16(%rdi) 5826; AVX1-NEXT: retq 5827; 5828; AVX2-LABEL: PR43609: 5829; AVX2: # %bb.0: 5830; AVX2-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 5831; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 5832; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 5833; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5834; AVX2-NEXT: vpor %xmm4, %xmm3, %xmm3 5835; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 5836; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5837; AVX2-NEXT: vpor %xmm5, %xmm0, %xmm0 5838; AVX2-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5839; AVX2-NEXT: vsubpd %xmm6, %xmm0, %xmm0 5840; AVX2-NEXT: vaddpd %xmm0, %xmm3, %xmm0 5841; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 5842; AVX2-NEXT: vpor %xmm4, %xmm2, %xmm2 5843; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm1 5844; AVX2-NEXT: vpor %xmm5, %xmm1, %xmm1 5845; AVX2-NEXT: vsubpd %xmm6, %xmm1, %xmm1 5846; AVX2-NEXT: vaddpd %xmm1, %xmm2, %xmm1 5847; AVX2-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5848; AVX2-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5849; AVX2-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5850; AVX2-NEXT: vmovupd %xmm0, (%rdi) 5851; AVX2-NEXT: vmovupd %xmm1, 16(%rdi) 5852; AVX2-NEXT: retq 5853; 5854; AVX512F-LABEL: PR43609: 5855; AVX512F: # %bb.0: 5856; AVX512F-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 5857; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 5858; AVX512F-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 5859; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5860; AVX512F-NEXT: vpor %xmm4, %xmm3, %xmm3 5861; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 5862; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5863; AVX512F-NEXT: vpor %xmm5, %xmm0, %xmm0 5864; AVX512F-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5865; AVX512F-NEXT: vsubpd %xmm6, %xmm0, %xmm0 5866; AVX512F-NEXT: vaddpd %xmm0, %xmm3, %xmm0 5867; AVX512F-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 5868; AVX512F-NEXT: vpor %xmm4, %xmm2, %xmm2 5869; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm1 5870; AVX512F-NEXT: vpor %xmm5, %xmm1, %xmm1 5871; AVX512F-NEXT: vsubpd %xmm6, %xmm1, %xmm1 5872; AVX512F-NEXT: vaddpd %xmm1, %xmm2, %xmm1 5873; AVX512F-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5874; AVX512F-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5875; AVX512F-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5876; AVX512F-NEXT: vmovupd %xmm0, (%rdi) 5877; AVX512F-NEXT: vmovupd %xmm1, 16(%rdi) 5878; AVX512F-NEXT: retq 5879; 5880; AVX512VL-LABEL: PR43609: 5881; AVX512VL: # %bb.0: 5882; AVX512VL-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 5883; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 5884; AVX512VL-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 5885; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5886; AVX512VL-NEXT: vpor %xmm4, %xmm3, %xmm3 5887; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 5888; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5889; AVX512VL-NEXT: vpor %xmm5, %xmm0, %xmm0 5890; AVX512VL-NEXT: vmovapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5891; AVX512VL-NEXT: vsubpd %xmm6, %xmm0, %xmm0 5892; AVX512VL-NEXT: vaddpd %xmm0, %xmm3, %xmm0 5893; AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 5894; AVX512VL-NEXT: vpor %xmm4, %xmm2, %xmm2 5895; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1 5896; AVX512VL-NEXT: vpor %xmm5, %xmm1, %xmm1 5897; AVX512VL-NEXT: vsubpd %xmm6, %xmm1, %xmm1 5898; AVX512VL-NEXT: vaddpd %xmm1, %xmm2, %xmm1 5899; AVX512VL-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5900; AVX512VL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5901; AVX512VL-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5902; AVX512VL-NEXT: vmovupd %xmm0, (%rdi) 5903; AVX512VL-NEXT: vmovupd %xmm1, 16(%rdi) 5904; AVX512VL-NEXT: retq 5905; 5906; AVX512DQ-LABEL: PR43609: 5907; AVX512DQ: # %bb.0: 5908; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5909; AVX512DQ-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 5910; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 5911; AVX512DQ-NEXT: vcvtuqq2pd %zmm1, %zmm1 5912; AVX512DQ-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5913; AVX512DQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5914; AVX512DQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5915; AVX512DQ-NEXT: vmovupd %xmm0, (%rdi) 5916; AVX512DQ-NEXT: vmovupd %xmm1, 16(%rdi) 5917; AVX512DQ-NEXT: vzeroupper 5918; AVX512DQ-NEXT: retq 5919; 5920; AVX512VLDQ-LABEL: PR43609: 5921; AVX512VLDQ: # %bb.0: 5922; AVX512VLDQ-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 5923; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0 5924; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm1, %xmm1 5925; AVX512VLDQ-NEXT: vmovapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5926; AVX512VLDQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5927; AVX512VLDQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5928; AVX512VLDQ-NEXT: vmovupd %xmm0, (%rdi) 5929; AVX512VLDQ-NEXT: vmovupd %xmm1, 16(%rdi) 5930; AVX512VLDQ-NEXT: retq 5931 %step.add.epil = add <2 x i64> %y, <i64 2, i64 2> 5932 %t20 = uitofp <2 x i64> %y to <2 x double> 5933 %t21 = uitofp <2 x i64> %step.add.epil to <2 x double> 5934 %t22 = fadd fast <2 x double> %t20, <double 5.0e-01, double 5.0e-01> 5935 %t23 = fadd fast <2 x double> %t21, <double 5.0e-01, double 5.0e-01> 5936 %t24 = getelementptr inbounds double, double* %x, i64 0 5937 %t25 = bitcast double* %t24 to <2 x double>* 5938 store <2 x double> %t22, <2 x double>* %t25, align 8 5939 %t26 = getelementptr inbounds double, double* %t24, i64 2 5940 %t27 = bitcast double* %t26 to <2 x double>* 5941 store <2 x double> %t23, <2 x double>* %t27, align 8 5942 ret void 5943} 5944 5945attributes #0 = { "unsafe-fp-math"="true" } 5946 5947