1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
6
7; PR11674
8define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
9; X32-SSE-LABEL: fpext_frommem:
10; X32-SSE:       # BB#0: # %entry
11; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
12; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
13; X32-SSE-NEXT:    cvtps2pd (%ecx), %xmm0
14; X32-SSE-NEXT:    movups %xmm0, (%eax)
15; X32-SSE-NEXT:    retl
16;
17; X32-AVX-LABEL: fpext_frommem:
18; X32-AVX:       # BB#0: # %entry
19; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
20; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
21; X32-AVX-NEXT:    vcvtps2pd (%ecx), %xmm0
22; X32-AVX-NEXT:    vmovups %xmm0, (%eax)
23; X32-AVX-NEXT:    retl
24;
25; X64-SSE-LABEL: fpext_frommem:
26; X64-SSE:       # BB#0: # %entry
27; X64-SSE-NEXT:    cvtps2pd (%rdi), %xmm0
28; X64-SSE-NEXT:    movups %xmm0, (%rsi)
29; X64-SSE-NEXT:    retq
30;
31; X64-AVX-LABEL: fpext_frommem:
32; X64-AVX:       # BB#0: # %entry
33; X64-AVX-NEXT:    vcvtps2pd (%rdi), %xmm0
34; X64-AVX-NEXT:    vmovups %xmm0, (%rsi)
35; X64-AVX-NEXT:    retq
36entry:
37  %0 = load <2 x float>, <2 x float>* %in, align 8
38  %1 = fpext <2 x float> %0 to <2 x double>
39  store <2 x double> %1, <2 x double>* %out, align 1
40  ret void
41}
42
43define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) {
44; X32-SSE-LABEL: fpext_frommem4:
45; X32-SSE:       # BB#0: # %entry
46; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
47; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
48; X32-SSE-NEXT:    cvtps2pd (%ecx), %xmm0
49; X32-SSE-NEXT:    cvtps2pd 8(%ecx), %xmm1
50; X32-SSE-NEXT:    movups %xmm1, 16(%eax)
51; X32-SSE-NEXT:    movups %xmm0, (%eax)
52; X32-SSE-NEXT:    retl
53;
54; X32-AVX-LABEL: fpext_frommem4:
55; X32-AVX:       # BB#0: # %entry
56; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
57; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
58; X32-AVX-NEXT:    vcvtps2pd (%ecx), %ymm0
59; X32-AVX-NEXT:    vmovups %ymm0, (%eax)
60; X32-AVX-NEXT:    vzeroupper
61; X32-AVX-NEXT:    retl
62;
63; X64-SSE-LABEL: fpext_frommem4:
64; X64-SSE:       # BB#0: # %entry
65; X64-SSE-NEXT:    cvtps2pd (%rdi), %xmm0
66; X64-SSE-NEXT:    cvtps2pd 8(%rdi), %xmm1
67; X64-SSE-NEXT:    movups %xmm1, 16(%rsi)
68; X64-SSE-NEXT:    movups %xmm0, (%rsi)
69; X64-SSE-NEXT:    retq
70;
71; X64-AVX-LABEL: fpext_frommem4:
72; X64-AVX:       # BB#0: # %entry
73; X64-AVX-NEXT:    vcvtps2pd (%rdi), %ymm0
74; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
75; X64-AVX-NEXT:    vzeroupper
76; X64-AVX-NEXT:    retq
77entry:
78  %0 = load <4 x float>, <4 x float>* %in
79  %1 = fpext <4 x float> %0 to <4 x double>
80  store <4 x double> %1, <4 x double>* %out, align 1
81  ret void
82}
83
84define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
85; X32-SSE-LABEL: fpext_frommem8:
86; X32-SSE:       # BB#0: # %entry
87; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
88; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
89; X32-SSE-NEXT:    cvtps2pd (%ecx), %xmm0
90; X32-SSE-NEXT:    cvtps2pd 8(%ecx), %xmm1
91; X32-SSE-NEXT:    cvtps2pd 16(%ecx), %xmm2
92; X32-SSE-NEXT:    cvtps2pd 24(%ecx), %xmm3
93; X32-SSE-NEXT:    movups %xmm3, 48(%eax)
94; X32-SSE-NEXT:    movups %xmm2, 32(%eax)
95; X32-SSE-NEXT:    movups %xmm1, 16(%eax)
96; X32-SSE-NEXT:    movups %xmm0, (%eax)
97; X32-SSE-NEXT:    retl
98;
99; X32-AVX-LABEL: fpext_frommem8:
100; X32-AVX:       # BB#0: # %entry
101; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
102; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
103; X32-AVX-NEXT:    vcvtps2pd (%ecx), %ymm0
104; X32-AVX-NEXT:    vcvtps2pd 16(%ecx), %ymm1
105; X32-AVX-NEXT:    vmovups %ymm1, 32(%eax)
106; X32-AVX-NEXT:    vmovups %ymm0, (%eax)
107; X32-AVX-NEXT:    vzeroupper
108; X32-AVX-NEXT:    retl
109;
110; X64-SSE-LABEL: fpext_frommem8:
111; X64-SSE:       # BB#0: # %entry
112; X64-SSE-NEXT:    cvtps2pd (%rdi), %xmm0
113; X64-SSE-NEXT:    cvtps2pd 8(%rdi), %xmm1
114; X64-SSE-NEXT:    cvtps2pd 16(%rdi), %xmm2
115; X64-SSE-NEXT:    cvtps2pd 24(%rdi), %xmm3
116; X64-SSE-NEXT:    movups %xmm3, 48(%rsi)
117; X64-SSE-NEXT:    movups %xmm2, 32(%rsi)
118; X64-SSE-NEXT:    movups %xmm1, 16(%rsi)
119; X64-SSE-NEXT:    movups %xmm0, (%rsi)
120; X64-SSE-NEXT:    retq
121;
122; X64-AVX-LABEL: fpext_frommem8:
123; X64-AVX:       # BB#0: # %entry
124; X64-AVX-NEXT:    vcvtps2pd (%rdi), %ymm0
125; X64-AVX-NEXT:    vcvtps2pd 16(%rdi), %ymm1
126; X64-AVX-NEXT:    vmovups %ymm1, 32(%rsi)
127; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
128; X64-AVX-NEXT:    vzeroupper
129; X64-AVX-NEXT:    retq
130entry:
131  %0 = load <8 x float>, <8 x float>* %in
132  %1 = fpext <8 x float> %0 to <8 x double>
133  store <8 x double> %1, <8 x double>* %out, align 1
134  ret void
135}
136
137define <2 x double> @fpext_fromconst() {
138; X32-SSE-LABEL: fpext_fromconst:
139; X32-SSE:       # BB#0: # %entry
140; X32-SSE-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
141; X32-SSE-NEXT:    retl
142;
143; X32-AVX-LABEL: fpext_fromconst:
144; X32-AVX:       # BB#0: # %entry
145; X32-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
146; X32-AVX-NEXT:    retl
147;
148; X64-SSE-LABEL: fpext_fromconst:
149; X64-SSE:       # BB#0: # %entry
150; X64-SSE-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
151; X64-SSE-NEXT:    retq
152;
153; X64-AVX-LABEL: fpext_fromconst:
154; X64-AVX:       # BB#0: # %entry
155; X64-AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
156; X64-AVX-NEXT:    retq
157entry:
158  %0  = insertelement <2 x float> undef, float 1.0, i32 0
159  %1  = insertelement <2 x float> %0, float -2.0, i32 1
160  %2  = fpext <2 x float> %1 to <2 x double>
161  ret <2 x double> %2
162}
163