1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX
4
5define <2 x double> @floor_v2f64(<2 x double> %p) {
6; SSE41-LABEL: floor_v2f64:
7; SSE41:       ## BB#0:
8; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0
9; SSE41-NEXT:    retq
10;
11; AVX-LABEL: floor_v2f64:
12; AVX:       ## BB#0:
13; AVX-NEXT:    vroundpd $9, %xmm0, %xmm0
14; AVX-NEXT:    retq
15  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
16  ret <2 x double> %t
17}
18declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
19
20define <4 x float> @floor_v4f32(<4 x float> %p) {
21; SSE41-LABEL: floor_v4f32:
22; SSE41:       ## BB#0:
23; SSE41-NEXT:    roundps $9, %xmm0, %xmm0
24; SSE41-NEXT:    retq
25;
26; AVX-LABEL: floor_v4f32:
27; AVX:       ## BB#0:
28; AVX-NEXT:    vroundps $9, %xmm0, %xmm0
29; AVX-NEXT:    retq
30  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
31  ret <4 x float> %t
32}
33declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
34
35define <4 x double> @floor_v4f64(<4 x double> %p){
36; SSE41-LABEL: floor_v4f64:
37; SSE41:       ## BB#0:
38; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0
39; SSE41-NEXT:    roundpd $9, %xmm1, %xmm1
40; SSE41-NEXT:    retq
41;
42; AVX-LABEL: floor_v4f64:
43; AVX:       ## BB#0:
44; AVX-NEXT:    vroundpd $9, %ymm0, %ymm0
45; AVX-NEXT:    retq
46  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
47  ret <4 x double> %t
48}
49declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
50
51define <8 x float> @floor_v8f32(<8 x float> %p) {
52; SSE41-LABEL: floor_v8f32:
53; SSE41:       ## BB#0:
54; SSE41-NEXT:    roundps $9, %xmm0, %xmm0
55; SSE41-NEXT:    roundps $9, %xmm1, %xmm1
56; SSE41-NEXT:    retq
57;
58; AVX-LABEL: floor_v8f32:
59; AVX:       ## BB#0:
60; AVX-NEXT:    vroundps $9, %ymm0, %ymm0
61; AVX-NEXT:    retq
62  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
63  ret <8 x float> %t
64}
65declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
66
67define <2 x double> @ceil_v2f64(<2 x double> %p) {
68; SSE41-LABEL: ceil_v2f64:
69; SSE41:       ## BB#0:
70; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0
71; SSE41-NEXT:    retq
72;
73; AVX-LABEL: ceil_v2f64:
74; AVX:       ## BB#0:
75; AVX-NEXT:    vroundpd $10, %xmm0, %xmm0
76; AVX-NEXT:    retq
77  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
78  ret <2 x double> %t
79}
80declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
81
82define <4 x float> @ceil_v4f32(<4 x float> %p) {
83; SSE41-LABEL: ceil_v4f32:
84; SSE41:       ## BB#0:
85; SSE41-NEXT:    roundps $10, %xmm0, %xmm0
86; SSE41-NEXT:    retq
87;
88; AVX-LABEL: ceil_v4f32:
89; AVX:       ## BB#0:
90; AVX-NEXT:    vroundps $10, %xmm0, %xmm0
91; AVX-NEXT:    retq
92  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
93  ret <4 x float> %t
94}
95declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
96
97define <4 x double> @ceil_v4f64(<4 x double> %p) {
98; SSE41-LABEL: ceil_v4f64:
99; SSE41:       ## BB#0:
100; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0
101; SSE41-NEXT:    roundpd $10, %xmm1, %xmm1
102; SSE41-NEXT:    retq
103;
104; AVX-LABEL: ceil_v4f64:
105; AVX:       ## BB#0:
106; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0
107; AVX-NEXT:    retq
108  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
109  ret <4 x double> %t
110}
111declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
112
113define <8 x float> @ceil_v8f32(<8 x float> %p) {
114; SSE41-LABEL: ceil_v8f32:
115; SSE41:       ## BB#0:
116; SSE41-NEXT:    roundps $10, %xmm0, %xmm0
117; SSE41-NEXT:    roundps $10, %xmm1, %xmm1
118; SSE41-NEXT:    retq
119;
120; AVX-LABEL: ceil_v8f32:
121; AVX:       ## BB#0:
122; AVX-NEXT:    vroundps $10, %ymm0, %ymm0
123; AVX-NEXT:    retq
124  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
125  ret <8 x float> %t
126}
127declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
128
129define <2 x double> @trunc_v2f64(<2 x double> %p) {
130; SSE41-LABEL: trunc_v2f64:
131; SSE41:       ## BB#0:
132; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
133; SSE41-NEXT:    retq
134;
135; AVX-LABEL: trunc_v2f64:
136; AVX:       ## BB#0:
137; AVX-NEXT:    vroundpd $11, %xmm0, %xmm0
138; AVX-NEXT:    retq
139  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
140  ret <2 x double> %t
141}
142declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
143
144define <4 x float> @trunc_v4f32(<4 x float> %p) {
145; SSE41-LABEL: trunc_v4f32:
146; SSE41:       ## BB#0:
147; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
148; SSE41-NEXT:    retq
149;
150; AVX-LABEL: trunc_v4f32:
151; AVX:       ## BB#0:
152; AVX-NEXT:    vroundps $11, %xmm0, %xmm0
153; AVX-NEXT:    retq
154  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
155  ret <4 x float> %t
156}
157declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
158
159define <4 x double> @trunc_v4f64(<4 x double> %p) {
160; SSE41-LABEL: trunc_v4f64:
161; SSE41:       ## BB#0:
162; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
163; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
164; SSE41-NEXT:    retq
165;
166; AVX-LABEL: trunc_v4f64:
167; AVX:       ## BB#0:
168; AVX-NEXT:    vroundpd $11, %ymm0, %ymm0
169; AVX-NEXT:    retq
170  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
171  ret <4 x double> %t
172}
173declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
174
175define <8 x float> @trunc_v8f32(<8 x float> %p) {
176; SSE41-LABEL: trunc_v8f32:
177; SSE41:       ## BB#0:
178; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
179; SSE41-NEXT:    roundps $11, %xmm1, %xmm1
180; SSE41-NEXT:    retq
181;
182; AVX-LABEL: trunc_v8f32:
183; AVX:       ## BB#0:
184; AVX-NEXT:    vroundps $11, %ymm0, %ymm0
185; AVX-NEXT:    retq
186  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
187  ret <8 x float> %t
188}
189declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
190
191define <2 x double> @rint_v2f64(<2 x double> %p) {
192; SSE41-LABEL: rint_v2f64:
193; SSE41:       ## BB#0:
194; SSE41-NEXT:    roundpd $4, %xmm0, %xmm0
195; SSE41-NEXT:    retq
196;
197; AVX-LABEL: rint_v2f64:
198; AVX:       ## BB#0:
199; AVX-NEXT:    vroundpd $4, %xmm0, %xmm0
200; AVX-NEXT:    retq
201  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
202  ret <2 x double> %t
203}
204declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
205
206define <4 x float> @rint_v4f32(<4 x float> %p) {
207; SSE41-LABEL: rint_v4f32:
208; SSE41:       ## BB#0:
209; SSE41-NEXT:    roundps $4, %xmm0, %xmm0
210; SSE41-NEXT:    retq
211;
212; AVX-LABEL: rint_v4f32:
213; AVX:       ## BB#0:
214; AVX-NEXT:    vroundps $4, %xmm0, %xmm0
215; AVX-NEXT:    retq
216  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
217  ret <4 x float> %t
218}
219declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
220
221define <4 x double> @rint_v4f64(<4 x double> %p) {
222; SSE41-LABEL: rint_v4f64:
223; SSE41:       ## BB#0:
224; SSE41-NEXT:    roundpd $4, %xmm0, %xmm0
225; SSE41-NEXT:    roundpd $4, %xmm1, %xmm1
226; SSE41-NEXT:    retq
227;
228; AVX-LABEL: rint_v4f64:
229; AVX:       ## BB#0:
230; AVX-NEXT:    vroundpd $4, %ymm0, %ymm0
231; AVX-NEXT:    retq
232  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
233  ret <4 x double> %t
234}
235declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
236
237define <8 x float> @rint_v8f32(<8 x float> %p) {
238; SSE41-LABEL: rint_v8f32:
239; SSE41:       ## BB#0:
240; SSE41-NEXT:    roundps $4, %xmm0, %xmm0
241; SSE41-NEXT:    roundps $4, %xmm1, %xmm1
242; SSE41-NEXT:    retq
243;
244; AVX-LABEL: rint_v8f32:
245; AVX:       ## BB#0:
246; AVX-NEXT:    vroundps $4, %ymm0, %ymm0
247; AVX-NEXT:    retq
248  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
249  ret <8 x float> %t
250}
251declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
252
253define <2 x double> @nearbyint_v2f64(<2 x double> %p) {
254; SSE41-LABEL: nearbyint_v2f64:
255; SSE41:       ## BB#0:
256; SSE41-NEXT:    roundpd $12, %xmm0, %xmm0
257; SSE41-NEXT:    retq
258;
259; AVX-LABEL: nearbyint_v2f64:
260; AVX:       ## BB#0:
261; AVX-NEXT:    vroundpd $12, %xmm0, %xmm0
262; AVX-NEXT:    retq
263  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
264  ret <2 x double> %t
265}
266declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
267
268define <4 x float> @nearbyint_v4f32(<4 x float> %p) {
269; SSE41-LABEL: nearbyint_v4f32:
270; SSE41:       ## BB#0:
271; SSE41-NEXT:    roundps $12, %xmm0, %xmm0
272; SSE41-NEXT:    retq
273;
274; AVX-LABEL: nearbyint_v4f32:
275; AVX:       ## BB#0:
276; AVX-NEXT:    vroundps $12, %xmm0, %xmm0
277; AVX-NEXT:    retq
278  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
279  ret <4 x float> %t
280}
281declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
282
283define <4 x double> @nearbyint_v4f64(<4 x double> %p) {
284; SSE41-LABEL: nearbyint_v4f64:
285; SSE41:       ## BB#0:
286; SSE41-NEXT:    roundpd $12, %xmm0, %xmm0
287; SSE41-NEXT:    roundpd $12, %xmm1, %xmm1
288; SSE41-NEXT:    retq
289;
290; AVX-LABEL: nearbyint_v4f64:
291; AVX:       ## BB#0:
292; AVX-NEXT:    vroundpd $12, %ymm0, %ymm0
293; AVX-NEXT:    retq
294  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
295  ret <4 x double> %t
296}
297declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
298
299define <8 x float> @nearbyint_v8f32(<8 x float> %p) {
300; SSE41-LABEL: nearbyint_v8f32:
301; SSE41:       ## BB#0:
302; SSE41-NEXT:    roundps $12, %xmm0, %xmm0
303; SSE41-NEXT:    roundps $12, %xmm1, %xmm1
304; SSE41-NEXT:    retq
305;
306; AVX-LABEL: nearbyint_v8f32:
307; AVX:       ## BB#0:
308; AVX-NEXT:    vroundps $12, %ymm0, %ymm0
309; AVX-NEXT:    retq
310  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
311  ret <8 x float> %t
312}
313declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
314
315;
316; Constant Folding
317;
318
319define <2 x double> @const_floor_v2f64() {
320; SSE41-LABEL: const_floor_v2f64:
321; SSE41:       ## BB#0:
322; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
323; SSE41-NEXT:    retq
324;
325; AVX-LABEL: const_floor_v2f64:
326; AVX:       ## BB#0:
327; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
328; AVX-NEXT:    retq
329  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>)
330  ret <2 x double> %t
331}
332
333define <4 x float> @const_floor_v4f32() {
334; SSE41-LABEL: const_floor_v4f32:
335; SSE41:       ## BB#0:
336; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
337; SSE41-NEXT:    retq
338;
339; AVX-LABEL: const_floor_v4f32:
340; AVX:       ## BB#0:
341; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
342; AVX-NEXT:    retq
343  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
344  ret <4 x float> %t
345}
346
347define <2 x double> @const_ceil_v2f64() {
348; SSE41-LABEL: const_ceil_v2f64:
349; SSE41:       ## BB#0:
350; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
351; SSE41-NEXT:    retq
352;
353; AVX-LABEL: const_ceil_v2f64:
354; AVX:       ## BB#0:
355; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
356; AVX-NEXT:    retq
357  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>)
358  ret <2 x double> %t
359}
360
361define <4 x float> @const_ceil_v4f32() {
362; SSE41-LABEL: const_ceil_v4f32:
363; SSE41:       ## BB#0:
364; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
365; SSE41-NEXT:    retq
366;
367; AVX-LABEL: const_ceil_v4f32:
368; AVX:       ## BB#0:
369; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
370; AVX-NEXT:    retq
371  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
372  ret <4 x float> %t
373}
374
375define <2 x double> @const_trunc_v2f64() {
376; SSE41-LABEL: const_trunc_v2f64:
377; SSE41:       ## BB#0:
378; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
379; SSE41-NEXT:    retq
380;
381; AVX-LABEL: const_trunc_v2f64:
382; AVX:       ## BB#0:
383; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
384; AVX-NEXT:    retq
385  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>)
386  ret <2 x double> %t
387}
388
389define <4 x float> @const_trunc_v4f32() {
390; SSE41-LABEL: const_trunc_v4f32:
391; SSE41:       ## BB#0:
392; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
393; SSE41-NEXT:    retq
394;
395; AVX-LABEL: const_trunc_v4f32:
396; AVX:       ## BB#0:
397; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
398; AVX-NEXT:    retq
399  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
400  ret <4 x float> %t
401}
402