1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
3
4
5define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
6; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
7; CHECK:       ## %bb.0:
8; CHECK-NEXT:    vcvtsd2si %xmm0, %rax
9; CHECK-NEXT:    retq
10  %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
11  ret i64 %res
12}
13declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
14
15define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
16; CHECK-LABEL: test_x86_sse2_cvtsi642sd:
17; CHECK:       ## %bb.0:
18; CHECK-NEXT:    vcvtsi2sd %rdi, %xmm0, %xmm0
19; CHECK-NEXT:    retq
20  %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
21  ret <2 x double> %res
22}
23declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
24
25define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
26; CHECK-LABEL: test_x86_avx512_cvttsd2si64:
27; CHECK:       ## %bb.0:
28; CHECK-NEXT:    vcvttsd2si %xmm0, %rcx
29; CHECK-NEXT:    vcvttsd2si {sae}, %xmm0, %rax
30; CHECK-NEXT:    addq %rcx, %rax
31; CHECK-NEXT:    retq
32  %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
33  %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
34  %res2 = add i64 %res0, %res1
35  ret i64 %res2
36}
37declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
38
39define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
40; CHECK-LABEL: test_x86_avx512_cvttsd2usi64:
41; CHECK:       ## %bb.0:
42; CHECK-NEXT:    vcvttsd2usi %xmm0, %rcx
43; CHECK-NEXT:    vcvttsd2usi {sae}, %xmm0, %rax
44; CHECK-NEXT:    addq %rcx, %rax
45; CHECK-NEXT:    retq
46  %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
47  %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
48  %res2 = add i64 %res0, %res1
49  ret i64 %res2
50}
51declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
52
53define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
54; CHECK-LABEL: test_x86_sse_cvtss2si64:
55; CHECK:       ## %bb.0:
56; CHECK-NEXT:    vcvtss2si %xmm0, %rax
57; CHECK-NEXT:    retq
58  %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
59  ret i64 %res
60}
61declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
62
63
64define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
65; CHECK-LABEL: test_x86_sse_cvtsi642ss:
66; CHECK:       ## %bb.0:
67; CHECK-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
68; CHECK-NEXT:    retq
69  %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
70  ret <4 x float> %res
71}
72declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
73
74
75define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
76; CHECK-LABEL: test_x86_avx512_cvttss2si64:
77; CHECK:       ## %bb.0:
78; CHECK-NEXT:    vcvttss2si %xmm0, %rcx
79; CHECK-NEXT:    vcvttss2si {sae}, %xmm0, %rax
80; CHECK-NEXT:    addq %rcx, %rax
81; CHECK-NEXT:    retq
82  %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
83  %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
84  %res2 = add i64 %res0, %res1
85  ret i64 %res2
86}
87declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
88
89define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
90; CHECK-LABEL: test_x86_avx512_cvttss2usi:
91; CHECK:       ## %bb.0:
92; CHECK-NEXT:    vcvttss2usi {sae}, %xmm0, %ecx
93; CHECK-NEXT:    vcvttss2usi %xmm0, %eax
94; CHECK-NEXT:    addl %ecx, %eax
95; CHECK-NEXT:    retq
96  %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
97  %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
98  %res2 = add i32 %res0, %res1
99  ret i32 %res2
100}
101declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
102
103define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
104; CHECK-LABEL: test_x86_avx512_cvttss2usi64:
105; CHECK:       ## %bb.0:
106; CHECK-NEXT:    vcvttss2usi %xmm0, %rcx
107; CHECK-NEXT:    vcvttss2usi {sae}, %xmm0, %rax
108; CHECK-NEXT:    addq %rcx, %rax
109; CHECK-NEXT:    retq
110  %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
111  %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
112  %res2 = add i64 %res0, %res1
113  ret i64 %res2
114}
115declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
116
117define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
118; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
119; CHECK:       ## %bb.0:
120; CHECK-NEXT:    vcvtsd2usi %xmm0, %rax
121; CHECK-NEXT:    vcvtsd2usi {rz-sae}, %xmm0, %rcx
122; CHECK-NEXT:    addq %rax, %rcx
123; CHECK-NEXT:    vcvtsd2usi {rd-sae}, %xmm0, %rax
124; CHECK-NEXT:    addq %rcx, %rax
125; CHECK-NEXT:    retq
126
127  %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4)
128  %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 11)
129  %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 9)
130  %res3 = add i64 %res, %res1
131  %res4 = add i64 %res3, %res2
132  ret i64 %res4
133}
134declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone
135
136define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) {
137; CHECK-LABEL: test_x86_avx512_cvtsd2si64:
138; CHECK:       ## %bb.0:
139; CHECK-NEXT:    vcvtsd2si %xmm0, %rax
140; CHECK-NEXT:    vcvtsd2si {rz-sae}, %xmm0, %rcx
141; CHECK-NEXT:    addq %rax, %rcx
142; CHECK-NEXT:    vcvtsd2si {rd-sae}, %xmm0, %rax
143; CHECK-NEXT:    addq %rcx, %rax
144; CHECK-NEXT:    retq
145
146  %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4)
147  %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 11)
148  %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 9)
149  %res3 = add i64 %res, %res1
150  %res4 = add i64 %res3, %res2
151  ret i64 %res4
152}
153declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone
154
155define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) {
156; CHECK-LABEL: test_x86_avx512_cvtss2usi64:
157; CHECK:       ## %bb.0:
158; CHECK-NEXT:    vcvtss2usi %xmm0, %rax
159; CHECK-NEXT:    vcvtss2usi {rz-sae}, %xmm0, %rcx
160; CHECK-NEXT:    addq %rax, %rcx
161; CHECK-NEXT:    vcvtss2usi {rd-sae}, %xmm0, %rax
162; CHECK-NEXT:    addq %rcx, %rax
163; CHECK-NEXT:    retq
164
165  %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4)
166  %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 11)
167  %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 9)
168  %res3 = add i64 %res, %res1
169  %res4 = add i64 %res3, %res2
170  ret i64 %res4
171}
172declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone
173
174define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) {
175; CHECK-LABEL: test_x86_avx512_cvtss2si64:
176; CHECK:       ## %bb.0:
177; CHECK-NEXT:    vcvtss2si %xmm0, %rax
178; CHECK-NEXT:    vcvtss2si {rz-sae}, %xmm0, %rcx
179; CHECK-NEXT:    addq %rax, %rcx
180; CHECK-NEXT:    vcvtss2si {rd-sae}, %xmm0, %rax
181; CHECK-NEXT:    addq %rcx, %rax
182; CHECK-NEXT:    retq
183
184  %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4)
185  %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 11)
186  %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 9)
187  %res3 = add i64 %res, %res1
188  %res4 = add i64 %res3, %res2
189  ret i64 %res4
190}
191declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone
192
193define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
194; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
195; CHECK:       ## %bb.0:
196; CHECK-NEXT:    vcvtsi2sd %rdi, {rz-sae}, %xmm0, %xmm0
197; CHECK-NEXT:    retq
198  %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 11) ; <<<2 x double>> [#uses=1]
199  ret <2 x double> %res
200}
201declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
202
203define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
204; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
205; CHECK:       ## %bb.0:
206; CHECK-NEXT:    vcvtsi2ss %rdi, {rz-sae}, %xmm0, %xmm0
207; CHECK-NEXT:    retq
208  %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 11) ; <<<4 x float>> [#uses=1]
209  ret <4 x float> %res
210}
211declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
212
213define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) {
214; CHECK-LABEL: _mm_cvt_roundu64_ss:
215; CHECK:       ## %bb.0:
216; CHECK-NEXT:    vcvtusi2ss %rdi, {rd-sae}, %xmm0, %xmm0
217; CHECK-NEXT:    retq
218  %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 9) ; <<<4 x float>> [#uses=1]
219  ret <4 x float> %res
220}
221
222define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) {
223; CHECK-LABEL: _mm_cvtu64_ss:
224; CHECK:       ## %bb.0:
225; CHECK-NEXT:    vcvtusi2ss %rdi, %xmm0, %xmm0
226; CHECK-NEXT:    retq
227  %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
228  ret <4 x float> %res
229}
230declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
231
232define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) {
233; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
234; CHECK:       ## %bb.0:
235; CHECK-NEXT:    vcvtusi2sd %rdi, {rd-sae}, %xmm0, %xmm0
236; CHECK-NEXT:    retq
237  %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 9) ; <<<2 x double>> [#uses=1]
238  ret <2 x double> %res
239}
240
241define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) {
242; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
243; CHECK:       ## %bb.0:
244; CHECK-NEXT:    vcvtusi2sd %rdi, %xmm0, %xmm0
245; CHECK-NEXT:    retq
246  %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
247  ret <2 x double> %res
248}
249declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
250