1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
6
7define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) {
8; X32-SSE-LABEL: fptrunc_frommem2:
9; X32-SSE:       # BB#0: # %entry
10; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
11; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
12; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm0
13; X32-SSE-NEXT:    extractps $1, %xmm0, 4(%eax)
14; X32-SSE-NEXT:    movss %xmm0, (%eax)
15; X32-SSE-NEXT:    retl
16;
17; X32-AVX-LABEL: fptrunc_frommem2:
18; X32-AVX:       # BB#0: # %entry
19; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
20; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
21; X32-AVX-NEXT:    vcvtpd2psx (%ecx), %xmm0
22; X32-AVX-NEXT:    vextractps $1, %xmm0, 4(%eax)
23; X32-AVX-NEXT:    vmovss %xmm0, (%eax)
24; X32-AVX-NEXT:    retl
25;
26; X64-SSE-LABEL: fptrunc_frommem2:
27; X64-SSE:       # BB#0: # %entry
28; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0
29; X64-SSE-NEXT:    movlpd %xmm0, (%rsi)
30; X64-SSE-NEXT:    retq
31;
32; X64-AVX-LABEL: fptrunc_frommem2:
33; X64-AVX:       # BB#0: # %entry
34; X64-AVX-NEXT:    vcvtpd2psx (%rdi), %xmm0
35; X64-AVX-NEXT:    vmovlpd %xmm0, (%rsi)
36; X64-AVX-NEXT:    retq
37entry:
38  %0 = load <2 x double>, <2 x double>* %in
39  %1 = fptrunc <2 x double> %0 to <2 x float>
40  store <2 x float> %1, <2 x float>* %out, align 1
41  ret void
42}
43
44define void @fptrunc_frommem4(<4 x double>* %in, <4 x float>* %out) {
45; X32-SSE-LABEL: fptrunc_frommem4:
46; X32-SSE:       # BB#0: # %entry
47; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
48; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
49; X32-SSE-NEXT:    cvtpd2ps 16(%ecx), %xmm0
50; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm1
51; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
52; X32-SSE-NEXT:    movupd %xmm1, (%eax)
53; X32-SSE-NEXT:    retl
54;
55; X32-AVX-LABEL: fptrunc_frommem4:
56; X32-AVX:       # BB#0: # %entry
57; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
58; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
59; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
60; X32-AVX-NEXT:    vmovupd %xmm0, (%eax)
61; X32-AVX-NEXT:    retl
62;
63; X64-SSE-LABEL: fptrunc_frommem4:
64; X64-SSE:       # BB#0: # %entry
65; X64-SSE-NEXT:    cvtpd2ps 16(%rdi), %xmm0
66; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1
67; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
68; X64-SSE-NEXT:    movupd %xmm1, (%rsi)
69; X64-SSE-NEXT:    retq
70;
71; X64-AVX-LABEL: fptrunc_frommem4:
72; X64-AVX:       # BB#0: # %entry
73; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
74; X64-AVX-NEXT:    vmovupd %xmm0, (%rsi)
75; X64-AVX-NEXT:    retq
76entry:
77  %0 = load <4 x double>, <4 x double>* %in
78  %1 = fptrunc <4 x double> %0 to <4 x float>
79  store <4 x float> %1, <4 x float>* %out, align 1
80  ret void
81}
82
83define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
84; X32-SSE-LABEL: fptrunc_frommem8:
85; X32-SSE:       # BB#0: # %entry
86; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
87; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
88; X32-SSE-NEXT:    cvtpd2ps 16(%ecx), %xmm0
89; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm1
90; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
91; X32-SSE-NEXT:    cvtpd2ps 48(%ecx), %xmm0
92; X32-SSE-NEXT:    cvtpd2ps 32(%ecx), %xmm2
93; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
94; X32-SSE-NEXT:    movupd %xmm2, 16(%eax)
95; X32-SSE-NEXT:    movupd %xmm1, (%eax)
96; X32-SSE-NEXT:    retl
97;
98; X32-AVX-LABEL: fptrunc_frommem8:
99; X32-AVX:       # BB#0: # %entry
100; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
101; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
102; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
103; X32-AVX-NEXT:    vcvtpd2psy 32(%ecx), %xmm1
104; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
105; X32-AVX-NEXT:    vmovupd %ymm0, (%eax)
106; X32-AVX-NEXT:    vzeroupper
107; X32-AVX-NEXT:    retl
108;
109; X64-SSE-LABEL: fptrunc_frommem8:
110; X64-SSE:       # BB#0: # %entry
111; X64-SSE-NEXT:    cvtpd2ps 16(%rdi), %xmm0
112; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1
113; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
114; X64-SSE-NEXT:    cvtpd2ps 48(%rdi), %xmm0
115; X64-SSE-NEXT:    cvtpd2ps 32(%rdi), %xmm2
116; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
117; X64-SSE-NEXT:    movupd %xmm2, 16(%rsi)
118; X64-SSE-NEXT:    movupd %xmm1, (%rsi)
119; X64-SSE-NEXT:    retq
120;
121; X64-AVX-LABEL: fptrunc_frommem8:
122; X64-AVX:       # BB#0: # %entry
123; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
124; X64-AVX-NEXT:    vcvtpd2psy 32(%rdi), %xmm1
125; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
126; X64-AVX-NEXT:    vmovupd %ymm0, (%rsi)
127; X64-AVX-NEXT:    vzeroupper
128; X64-AVX-NEXT:    retq
129entry:
130  %0 = load <8 x double>, <8 x double>* %in
131  %1 = fptrunc <8 x double> %0 to <8 x float>
132  store <8 x float> %1, <8 x float>* %out, align 1
133  ret void
134}
135
136; FIXME: For exact truncations we should be able to fold this.
137define <4 x float> @fptrunc_fromconst() {
138; X32-SSE-LABEL: fptrunc_fromconst:
139; X32-SSE:       # BB#0: # %entry
140; X32-SSE-NEXT:    cvtpd2ps .LCPI3_0, %xmm1
141; X32-SSE-NEXT:    cvtpd2ps .LCPI3_1, %xmm0
142; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
143; X32-SSE-NEXT:    retl
144;
145; X32-AVX-LABEL: fptrunc_fromconst:
146; X32-AVX:       # BB#0: # %entry
147; X32-AVX-NEXT:    vcvtpd2psy .LCPI3_0, %xmm0
148; X32-AVX-NEXT:    retl
149;
150; X64-SSE-LABEL: fptrunc_fromconst:
151; X64-SSE:       # BB#0: # %entry
152; X64-SSE-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm1
153; X64-SSE-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm0
154; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
155; X64-SSE-NEXT:    retq
156;
157; X64-AVX-LABEL: fptrunc_fromconst:
158; X64-AVX:       # BB#0: # %entry
159; X64-AVX-NEXT:    vcvtpd2psy {{.*}}(%rip), %xmm0
160; X64-AVX-NEXT:    retq
161entry:
162  %0  = insertelement <4 x double> undef, double 1.0, i32 0
163  %1  = insertelement <4 x double> %0, double -2.0, i32 1
164  %2  = insertelement <4 x double> %1, double +4.0, i32 2
165  %3  = insertelement <4 x double> %2, double -0.0, i32 3
166  %4  = fptrunc <4 x double> %3 to <4 x float>
167  ret <4 x float> %4
168}
169