1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX 6 7define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) { 8; X32-SSE-LABEL: fptrunc_frommem2: 9; X32-SSE: # BB#0: # %entry 10; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 12; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm0 13; X32-SSE-NEXT: extractps $1, %xmm0, 4(%eax) 14; X32-SSE-NEXT: movss %xmm0, (%eax) 15; X32-SSE-NEXT: retl 16; 17; X32-AVX-LABEL: fptrunc_frommem2: 18; X32-AVX: # BB#0: # %entry 19; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 20; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 21; X32-AVX-NEXT: vcvtpd2psx (%ecx), %xmm0 22; X32-AVX-NEXT: vextractps $1, %xmm0, 4(%eax) 23; X32-AVX-NEXT: vmovss %xmm0, (%eax) 24; X32-AVX-NEXT: retl 25; 26; X64-SSE-LABEL: fptrunc_frommem2: 27; X64-SSE: # BB#0: # %entry 28; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 29; X64-SSE-NEXT: movlpd %xmm0, (%rsi) 30; X64-SSE-NEXT: retq 31; 32; X64-AVX-LABEL: fptrunc_frommem2: 33; X64-AVX: # BB#0: # %entry 34; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 35; X64-AVX-NEXT: vmovlpd %xmm0, (%rsi) 36; X64-AVX-NEXT: retq 37entry: 38 %0 = load <2 x double>, <2 x double>* %in 39 %1 = fptrunc <2 x double> %0 to <2 x float> 40 store <2 x float> %1, <2 x float>* %out, align 1 41 ret void 42} 43 44define void @fptrunc_frommem4(<4 x double>* %in, <4 x float>* %out) { 45; X32-SSE-LABEL: fptrunc_frommem4: 46; X32-SSE: # BB#0: # %entry 47; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 48; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 49; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 50; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 51; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 52; X32-SSE-NEXT: movupd %xmm1, (%eax) 53; X32-SSE-NEXT: retl 54; 55; X32-AVX-LABEL: fptrunc_frommem4: 56; X32-AVX: # BB#0: # %entry 57; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 58; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 59; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 60; X32-AVX-NEXT: vmovupd %xmm0, (%eax) 61; X32-AVX-NEXT: retl 62; 63; X64-SSE-LABEL: fptrunc_frommem4: 64; X64-SSE: # BB#0: # %entry 65; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 66; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 67; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 68; X64-SSE-NEXT: movupd %xmm1, (%rsi) 69; X64-SSE-NEXT: retq 70; 71; X64-AVX-LABEL: fptrunc_frommem4: 72; X64-AVX: # BB#0: # %entry 73; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 74; X64-AVX-NEXT: vmovupd %xmm0, (%rsi) 75; X64-AVX-NEXT: retq 76entry: 77 %0 = load <4 x double>, <4 x double>* %in 78 %1 = fptrunc <4 x double> %0 to <4 x float> 79 store <4 x float> %1, <4 x float>* %out, align 1 80 ret void 81} 82 83define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) { 84; X32-SSE-LABEL: fptrunc_frommem8: 85; X32-SSE: # BB#0: # %entry 86; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 87; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 88; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 89; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 90; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 91; X32-SSE-NEXT: cvtpd2ps 48(%ecx), %xmm0 92; X32-SSE-NEXT: cvtpd2ps 32(%ecx), %xmm2 93; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 94; X32-SSE-NEXT: movupd %xmm2, 16(%eax) 95; X32-SSE-NEXT: movupd %xmm1, (%eax) 96; X32-SSE-NEXT: retl 97; 98; X32-AVX-LABEL: fptrunc_frommem8: 99; X32-AVX: # BB#0: # %entry 100; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 101; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 102; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 103; X32-AVX-NEXT: vcvtpd2psy 32(%ecx), %xmm1 104; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 105; X32-AVX-NEXT: vmovupd %ymm0, (%eax) 106; X32-AVX-NEXT: vzeroupper 107; X32-AVX-NEXT: retl 108; 109; X64-SSE-LABEL: fptrunc_frommem8: 110; X64-SSE: # BB#0: # %entry 111; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 112; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 113; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 114; X64-SSE-NEXT: cvtpd2ps 48(%rdi), %xmm0 115; X64-SSE-NEXT: cvtpd2ps 32(%rdi), %xmm2 116; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 117; X64-SSE-NEXT: movupd %xmm2, 16(%rsi) 118; X64-SSE-NEXT: movupd %xmm1, (%rsi) 119; X64-SSE-NEXT: retq 120; 121; X64-AVX-LABEL: fptrunc_frommem8: 122; X64-AVX: # BB#0: # %entry 123; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 124; X64-AVX-NEXT: vcvtpd2psy 32(%rdi), %xmm1 125; X64-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 126; X64-AVX-NEXT: vmovupd %ymm0, (%rsi) 127; X64-AVX-NEXT: vzeroupper 128; X64-AVX-NEXT: retq 129entry: 130 %0 = load <8 x double>, <8 x double>* %in 131 %1 = fptrunc <8 x double> %0 to <8 x float> 132 store <8 x float> %1, <8 x float>* %out, align 1 133 ret void 134} 135 136; FIXME: For exact truncations we should be able to fold this. 137define <4 x float> @fptrunc_fromconst() { 138; X32-SSE-LABEL: fptrunc_fromconst: 139; X32-SSE: # BB#0: # %entry 140; X32-SSE-NEXT: cvtpd2ps .LCPI3_0, %xmm1 141; X32-SSE-NEXT: cvtpd2ps .LCPI3_1, %xmm0 142; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 143; X32-SSE-NEXT: retl 144; 145; X32-AVX-LABEL: fptrunc_fromconst: 146; X32-AVX: # BB#0: # %entry 147; X32-AVX-NEXT: vcvtpd2psy .LCPI3_0, %xmm0 148; X32-AVX-NEXT: retl 149; 150; X64-SSE-LABEL: fptrunc_fromconst: 151; X64-SSE: # BB#0: # %entry 152; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1 153; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0 154; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 155; X64-SSE-NEXT: retq 156; 157; X64-AVX-LABEL: fptrunc_fromconst: 158; X64-AVX: # BB#0: # %entry 159; X64-AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0 160; X64-AVX-NEXT: retq 161entry: 162 %0 = insertelement <4 x double> undef, double 1.0, i32 0 163 %1 = insertelement <4 x double> %0, double -2.0, i32 1 164 %2 = insertelement <4 x double> %1, double +4.0, i32 2 165 %3 = insertelement <4 x double> %2, double -0.0, i32 3 166 %4 = fptrunc <4 x double> %3 to <4 x float> 167 ret <4 x float> %4 168} 169