1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
4
5define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
6; CHECK-LABEL: sitofp00:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
9; CHECK-NEXT:    retq
10  %b = sitofp <8 x i32> %a to <8 x float>
11  ret <8 x float> %b
12}
13
14define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
15; CHECK-LABEL: fptosi00:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
18; CHECK-NEXT:    retq
19  %b = fptosi <8 x float> %a to <8 x i32>
20  ret <8 x i32> %b
21}
22
23define <4 x double> @sitofp01(<4 x i32> %a) {
24; CHECK-LABEL: sitofp01:
25; CHECK:       # %bb.0:
26; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
27; CHECK-NEXT:    retq
28  %b = sitofp <4 x i32> %a to <4 x double>
29  ret <4 x double> %b
30}
31
32define <8 x float> @sitofp02(<8 x i16> %a) {
33; AVX-LABEL: sitofp02:
34; AVX:       # %bb.0:
35; AVX-NEXT:    vpmovsxwd %xmm0, %xmm1
36; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
37; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
38; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
39; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0
40; AVX-NEXT:    retq
41;
42; AVX512-LABEL: sitofp02:
43; AVX512:       # %bb.0:
44; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
45; AVX512-NEXT:    vcvtdq2ps %ymm0, %ymm0
46; AVX512-NEXT:    retq
47  %b = sitofp <8 x i16> %a to <8 x float>
48  ret <8 x float> %b
49}
50
51define <4 x i32> @fptosi01(<4 x double> %a) {
52; CHECK-LABEL: fptosi01:
53; CHECK:       # %bb.0:
54; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm0
55; CHECK-NEXT:    vzeroupper
56; CHECK-NEXT:    retq
57  %b = fptosi <4 x double> %a to <4 x i32>
58  ret <4 x i32> %b
59}
60
61define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
62; AVX-LABEL: fptrunc00:
63; AVX:       # %bb.0:
64; AVX-NEXT:    vcvtpd2ps %ymm0, %xmm0
65; AVX-NEXT:    vcvtpd2ps %ymm1, %xmm1
66; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
67; AVX-NEXT:    retq
68;
69; AVX512-LABEL: fptrunc00:
70; AVX512:       # %bb.0:
71; AVX512-NEXT:    vcvtpd2ps %zmm0, %ymm0
72; AVX512-NEXT:    retq
73  %a = fptrunc <8 x double> %b to <8 x float>
74  ret <8 x float> %a
75}
76
77define <4 x float> @fptrunc01(<2 x double> %a0, <4 x float> %a1) nounwind {
78; CHECK-LABEL: fptrunc01:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0
81; CHECK-NEXT:    retq
82  %ext = extractelement <2 x double> %a0, i32 0
83  %cvt = fptrunc double %ext to float
84  %res = insertelement <4 x float> %a1, float %cvt, i32 0
85  ret <4 x float> %res
86}
87
88define <4 x double> @fpext00(<4 x float> %b) nounwind {
89; CHECK-LABEL: fpext00:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
92; CHECK-NEXT:    retq
93  %a = fpext <4 x float> %b to <4 x double>
94  ret <4 x double> %a
95}
96
97define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
98; CHECK-LABEL: fpext01:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
101; CHECK-NEXT:    retq
102  %ext = extractelement <4 x float> %a1, i32 0
103  %cvt = fpext float %ext to double
104  %res = insertelement <2 x double> %a0, double %cvt, i32 0
105  ret <2 x double> %res
106}
107
108define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
109; CHECK-LABEL: funcA:
110; CHECK:       # %bb.0:
111; CHECK-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
112; CHECK-NEXT:    retq
113  %tmp1 = load i64, i64* %e, align 8
114  %conv = sitofp i64 %tmp1 to double
115  ret double %conv
116}
117
118define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
119; CHECK-LABEL: funcB:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
122; CHECK-NEXT:    retq
123  %tmp1 = load i32, i32* %e, align 4
124  %conv = sitofp i32 %tmp1 to double
125  ret double %conv
126}
127
128define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
129; CHECK-LABEL: funcC:
130; CHECK:       # %bb.0:
131; CHECK-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
132; CHECK-NEXT:    retq
133  %tmp1 = load i32, i32* %e, align 4
134  %conv = sitofp i32 %tmp1 to float
135  ret float %conv
136}
137
138define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
139; CHECK-LABEL: funcD:
140; CHECK:       # %bb.0:
141; CHECK-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
142; CHECK-NEXT:    retq
143  %tmp1 = load i64, i64* %e, align 8
144  %conv = sitofp i64 %tmp1 to float
145  ret float %conv
146}
147
148define void @fpext() nounwind uwtable {
149; CHECK-LABEL: fpext:
150; CHECK:       # %bb.0:
151; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
152; CHECK-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
153; CHECK-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
154; CHECK-NEXT:    retq
155  %f = alloca float, align 4
156  %d = alloca double, align 8
157  %tmp = load float, float* %f, align 4
158  %conv = fpext float %tmp to double
159  store double %conv, double* %d, align 8
160  ret void
161}
162
163define double @nearbyint_f64(double %a) {
164; CHECK-LABEL: nearbyint_f64:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0
167; CHECK-NEXT:    retq
168  %res = call double @llvm.nearbyint.f64(double %a)
169  ret double %res
170}
171declare double @llvm.nearbyint.f64(double %p)
172
173define float @floor_f32(float %a) {
174; CHECK-LABEL: floor_f32:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
177; CHECK-NEXT:    retq
178  %res = call float @llvm.floor.f32(float %a)
179  ret float %res
180}
181declare float @llvm.floor.f32(float %p)
182
183define float @floor_f32_load(float* %aptr) optsize {
184; CHECK-LABEL: floor_f32_load:
185; CHECK:       # %bb.0:
186; CHECK-NEXT:    vroundss $9, (%rdi), %xmm0, %xmm0
187; CHECK-NEXT:    retq
188  %a = load float, float* %aptr
189  %res = call float @llvm.floor.f32(float %a)
190  ret float %res
191}
192
193define float @floor_f32_load_pgso(float* %aptr) !prof !14 {
194; CHECK-LABEL: floor_f32_load_pgso:
195; CHECK:       # %bb.0:
196; CHECK-NEXT:    vroundss $9, (%rdi), %xmm0, %xmm0
197; CHECK-NEXT:    retq
198  %a = load float, float* %aptr
199  %res = call float @llvm.floor.f32(float %a)
200  ret float %res
201}
202
203define double @nearbyint_f64_load(double* %aptr) optsize {
204; CHECK-LABEL: nearbyint_f64_load:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vroundsd $12, (%rdi), %xmm0, %xmm0
207; CHECK-NEXT:    retq
208  %a = load double, double* %aptr
209  %res = call double @llvm.nearbyint.f64(double %a)
210  ret double %res
211}
212
213define double @nearbyint_f64_load_pgso(double* %aptr) !prof !14 {
214; CHECK-LABEL: nearbyint_f64_load_pgso:
215; CHECK:       # %bb.0:
216; CHECK-NEXT:    vroundsd $12, (%rdi), %xmm0, %xmm0
217; CHECK-NEXT:    retq
218  %a = load double, double* %aptr
219  %res = call double @llvm.nearbyint.f64(double %a)
220  ret double %res
221}
222
223!llvm.module.flags = !{!0}
224!0 = !{i32 1, !"ProfileSummary", !1}
225!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
226!2 = !{!"ProfileFormat", !"InstrProf"}
227!3 = !{!"TotalCount", i64 10000}
228!4 = !{!"MaxCount", i64 10}
229!5 = !{!"MaxInternalCount", i64 1}
230!6 = !{!"MaxFunctionCount", i64 1000}
231!7 = !{!"NumCounts", i64 3}
232!8 = !{!"NumFunctions", i64 3}
233!9 = !{!"DetailedSummary", !10}
234!10 = !{!11, !12, !13}
235!11 = !{i32 10000, i64 100, i32 1}
236!12 = !{i32 999000, i64 100, i32 1}
237!13 = !{i32 999999, i64 1, i32 2}
238!14 = !{!"function_entry_count", i64 0}
239