1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s
3
4declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
5declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
6declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
7declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
8declare <8 x double> @llvm.floor.v8f64(<8 x double> %p)
9declare <16 x float> @llvm.floor.v16f32(<16 x float> %p)
10declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
11declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
12declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
13declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
14declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
15declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
16declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
17declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
18declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
19declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
20declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
21declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
22declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
23declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
24declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
25declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
26declare <8 x double> @llvm.rint.v8f64(<8 x double> %p)
27declare <16 x float> @llvm.rint.v16f32(<16 x float> %p)
28declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
29declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
30declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
31declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
32declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
33declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
34
35define <2 x double> @floor_v2f64(<2 x double> %p) {
36; CHECK-LABEL: floor_v2f64:
37; CHECK:       ## %bb.0:
38; CHECK-NEXT:    vroundpd $9, %xmm0, %xmm0
39; CHECK-NEXT:    retq
40  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
41  ret <2 x double> %t
42}
43
44define <4 x float> @floor_v4f32(<4 x float> %p) {
45; CHECK-LABEL: floor_v4f32:
46; CHECK:       ## %bb.0:
47; CHECK-NEXT:    vroundps $9, %xmm0, %xmm0
48; CHECK-NEXT:    retq
49  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
50  ret <4 x float> %t
51}
52
53define <4 x double> @floor_v4f64(<4 x double> %p){
54; CHECK-LABEL: floor_v4f64:
55; CHECK:       ## %bb.0:
56; CHECK-NEXT:    vroundpd $9, %ymm0, %ymm0
57; CHECK-NEXT:    retq
58  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
59  ret <4 x double> %t
60}
61
62define <8 x float> @floor_v8f32(<8 x float> %p) {
63; CHECK-LABEL: floor_v8f32:
64; CHECK:       ## %bb.0:
65; CHECK-NEXT:    vroundps $9, %ymm0, %ymm0
66; CHECK-NEXT:    retq
67  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
68  ret <8 x float> %t
69}
70
71define <8 x double> @floor_v8f64(<8 x double> %p){
72; CHECK-LABEL: floor_v8f64:
73; CHECK:       ## %bb.0:
74; CHECK-NEXT:    vrndscalepd $9, %zmm0, %zmm0
75; CHECK-NEXT:    retq
76  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
77  ret <8 x double> %t
78}
79
80define <16 x float> @floor_v16f32(<16 x float> %p) {
81; CHECK-LABEL: floor_v16f32:
82; CHECK:       ## %bb.0:
83; CHECK-NEXT:    vrndscaleps $9, %zmm0, %zmm0
84; CHECK-NEXT:    retq
85  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
86  ret <16 x float> %t
87}
88
89define <2 x double> @floor_v2f64_load(<2 x double>* %ptr) {
90; CHECK-LABEL: floor_v2f64_load:
91; CHECK:       ## %bb.0:
92; CHECK-NEXT:    vroundpd $9, (%rdi), %xmm0
93; CHECK-NEXT:    retq
94  %p = load <2 x double>, <2 x double>* %ptr
95  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
96  ret <2 x double> %t
97}
98
99define <4 x float> @floor_v4f32_load(<4 x float>* %ptr) {
100; CHECK-LABEL: floor_v4f32_load:
101; CHECK:       ## %bb.0:
102; CHECK-NEXT:    vroundps $9, (%rdi), %xmm0
103; CHECK-NEXT:    retq
104  %p = load <4 x float>, <4 x float>* %ptr
105  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
106  ret <4 x float> %t
107}
108
109define <4 x double> @floor_v4f64_load(<4 x double>* %ptr){
110; CHECK-LABEL: floor_v4f64_load:
111; CHECK:       ## %bb.0:
112; CHECK-NEXT:    vroundpd $9, (%rdi), %ymm0
113; CHECK-NEXT:    retq
114  %p = load <4 x double>, <4 x double>* %ptr
115  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
116  ret <4 x double> %t
117}
118
119define <8 x float> @floor_v8f32_load(<8 x float>* %ptr) {
120; CHECK-LABEL: floor_v8f32_load:
121; CHECK:       ## %bb.0:
122; CHECK-NEXT:    vroundps $9, (%rdi), %ymm0
123; CHECK-NEXT:    retq
124  %p = load <8 x float>, <8 x float>* %ptr
125  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
126  ret <8 x float> %t
127}
128
129define <8 x double> @floor_v8f64_load(<8 x double>* %ptr){
130; CHECK-LABEL: floor_v8f64_load:
131; CHECK:       ## %bb.0:
132; CHECK-NEXT:    vrndscalepd $9, (%rdi), %zmm0
133; CHECK-NEXT:    retq
134  %p = load <8 x double>, <8 x double>* %ptr
135  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
136  ret <8 x double> %t
137}
138
139define <16 x float> @floor_v16f32_load(<16 x float>* %ptr) {
140; CHECK-LABEL: floor_v16f32_load:
141; CHECK:       ## %bb.0:
142; CHECK-NEXT:    vrndscaleps $9, (%rdi), %zmm0
143; CHECK-NEXT:    retq
144  %p = load <16 x float>, <16 x float>* %ptr
145  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
146  ret <16 x float> %t
147}
148
149define <2 x double> @floor_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
150; CHECK-LABEL: floor_v2f64_mask:
151; CHECK:       ## %bb.0:
152; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
153; CHECK-NEXT:    vrndscalepd $9, %xmm0, %xmm1 {%k1}
154; CHECK-NEXT:    vmovapd %xmm1, %xmm0
155; CHECK-NEXT:    retq
156  %c = icmp eq <2 x i64> %cmp, zeroinitializer
157  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
158  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
159  ret <2 x double> %s
160}
161
162define <4 x float> @floor_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
163; CHECK-LABEL: floor_v4f32_mask:
164; CHECK:       ## %bb.0:
165; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
166; CHECK-NEXT:    vrndscaleps $9, %xmm0, %xmm1 {%k1}
167; CHECK-NEXT:    vmovaps %xmm1, %xmm0
168; CHECK-NEXT:    retq
169  %c = icmp eq <4 x i32> %cmp, zeroinitializer
170  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
171  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
172  ret <4 x float> %s
173}
174
175define <4 x double> @floor_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
176; CHECK-LABEL: floor_v4f64_mask:
177; CHECK:       ## %bb.0:
178; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
179; CHECK-NEXT:    vrndscalepd $9, %ymm0, %ymm1 {%k1}
180; CHECK-NEXT:    vmovapd %ymm1, %ymm0
181; CHECK-NEXT:    retq
182  %c = icmp eq <4 x i64> %cmp, zeroinitializer
183  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
184  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
185  ret <4 x double> %s
186}
187
188define <8 x float> @floor_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
189; CHECK-LABEL: floor_v8f32_mask:
190; CHECK:       ## %bb.0:
191; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
192; CHECK-NEXT:    vrndscaleps $9, %ymm0, %ymm1 {%k1}
193; CHECK-NEXT:    vmovaps %ymm1, %ymm0
194; CHECK-NEXT:    retq
195  %c = icmp eq <8 x i32> %cmp, zeroinitializer
196  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
197  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
198  ret <8 x float> %s
199}
200
201define <8 x double> @floor_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
202; CHECK-LABEL: floor_v8f64_mask:
203; CHECK:       ## %bb.0:
204; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
205; CHECK-NEXT:    vrndscalepd $9, %zmm0, %zmm1 {%k1}
206; CHECK-NEXT:    vmovapd %zmm1, %zmm0
207; CHECK-NEXT:    retq
208  %c = icmp eq <8 x i64> %cmp, zeroinitializer
209  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
210  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
211  ret <8 x double> %s
212}
213
214define <16 x float> @floor_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
215; CHECK-LABEL: floor_v16f32_mask:
216; CHECK:       ## %bb.0:
217; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
218; CHECK-NEXT:    vrndscaleps $9, %zmm0, %zmm1 {%k1}
219; CHECK-NEXT:    vmovaps %zmm1, %zmm0
220; CHECK-NEXT:    retq
221  %c = icmp eq <16 x i32> %cmp, zeroinitializer
222  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
223  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
224  ret <16 x float> %s
225}
226
227define <2 x double> @floor_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
228; CHECK-LABEL: floor_v2f64_maskz:
229; CHECK:       ## %bb.0:
230; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
231; CHECK-NEXT:    vrndscalepd $9, %xmm0, %xmm0 {%k1} {z}
232; CHECK-NEXT:    retq
233  %c = icmp eq <2 x i64> %cmp, zeroinitializer
234  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
235  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
236  ret <2 x double> %s
237}
238
239define <4 x float> @floor_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
240; CHECK-LABEL: floor_v4f32_maskz:
241; CHECK:       ## %bb.0:
242; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
243; CHECK-NEXT:    vrndscaleps $9, %xmm0, %xmm0 {%k1} {z}
244; CHECK-NEXT:    retq
245  %c = icmp eq <4 x i32> %cmp, zeroinitializer
246  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
247  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
248  ret <4 x float> %s
249}
250
251define <4 x double> @floor_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
252; CHECK-LABEL: floor_v4f64_maskz:
253; CHECK:       ## %bb.0:
254; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
255; CHECK-NEXT:    vrndscalepd $9, %ymm0, %ymm0 {%k1} {z}
256; CHECK-NEXT:    retq
257  %c = icmp eq <4 x i64> %cmp, zeroinitializer
258  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
259  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
260  ret <4 x double> %s
261}
262
263define <8 x float> @floor_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
264; CHECK-LABEL: floor_v8f32_maskz:
265; CHECK:       ## %bb.0:
266; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
267; CHECK-NEXT:    vrndscaleps $9, %ymm0, %ymm0 {%k1} {z}
268; CHECK-NEXT:    retq
269  %c = icmp eq <8 x i32> %cmp, zeroinitializer
270  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
271  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
272  ret <8 x float> %s
273}
274
275define <8 x double> @floor_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
276; CHECK-LABEL: floor_v8f64_maskz:
277; CHECK:       ## %bb.0:
278; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
279; CHECK-NEXT:    vrndscalepd $9, %zmm0, %zmm0 {%k1} {z}
280; CHECK-NEXT:    retq
281  %c = icmp eq <8 x i64> %cmp, zeroinitializer
282  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
283  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
284  ret <8 x double> %s
285}
286
287define <16 x float> @floor_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
288; CHECK-LABEL: floor_v16f32_maskz:
289; CHECK:       ## %bb.0:
290; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
291; CHECK-NEXT:    vrndscaleps $9, %zmm0, %zmm0 {%k1} {z}
292; CHECK-NEXT:    retq
293  %c = icmp eq <16 x i32> %cmp, zeroinitializer
294  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
295  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
296  ret <16 x float> %s
297}
298
299define <2 x double> @floor_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
300; CHECK-LABEL: floor_v2f64_mask_load:
301; CHECK:       ## %bb.0:
302; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
303; CHECK-NEXT:    vrndscalepd $9, (%rdi), %xmm0 {%k1}
304; CHECK-NEXT:    retq
305  %c = icmp eq <2 x i64> %cmp, zeroinitializer
306  %p = load <2 x double>, <2 x double>* %ptr
307  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
308  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
309  ret <2 x double> %s
310}
311
312define <4 x float> @floor_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
313; CHECK-LABEL: floor_v4f32_mask_load:
314; CHECK:       ## %bb.0:
315; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
316; CHECK-NEXT:    vrndscaleps $9, (%rdi), %xmm0 {%k1}
317; CHECK-NEXT:    retq
318  %c = icmp eq <4 x i32> %cmp, zeroinitializer
319  %p = load <4 x float>, <4 x float>* %ptr
320  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
321  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
322  ret <4 x float> %s
323}
324
325define <4 x double> @floor_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
326; CHECK-LABEL: floor_v4f64_mask_load:
327; CHECK:       ## %bb.0:
328; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
329; CHECK-NEXT:    vrndscalepd $9, (%rdi), %ymm0 {%k1}
330; CHECK-NEXT:    retq
331  %c = icmp eq <4 x i64> %cmp, zeroinitializer
332  %p = load <4 x double>, <4 x double>* %ptr
333  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
334  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
335  ret <4 x double> %s
336}
337
338define <8 x float> @floor_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
339; CHECK-LABEL: floor_v8f32_mask_load:
340; CHECK:       ## %bb.0:
341; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
342; CHECK-NEXT:    vrndscaleps $9, (%rdi), %ymm0 {%k1}
343; CHECK-NEXT:    retq
344  %c = icmp eq <8 x i32> %cmp, zeroinitializer
345  %p = load <8 x float>, <8 x float>* %ptr
346  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
347  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
348  ret <8 x float> %s
349}
350
351define <8 x double> @floor_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
352; CHECK-LABEL: floor_v8f64_mask_load:
353; CHECK:       ## %bb.0:
354; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
355; CHECK-NEXT:    vrndscalepd $9, (%rdi), %zmm0 {%k1}
356; CHECK-NEXT:    retq
357  %c = icmp eq <8 x i64> %cmp, zeroinitializer
358  %p = load <8 x double>, <8 x double>* %ptr
359  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
360  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
361  ret <8 x double> %s
362}
363
364define <16 x float> @floor_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
365; CHECK-LABEL: floor_v16f32_mask_load:
366; CHECK:       ## %bb.0:
367; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
368; CHECK-NEXT:    vrndscaleps $9, (%rdi), %zmm0 {%k1}
369; CHECK-NEXT:    retq
370  %c = icmp eq <16 x i32> %cmp, zeroinitializer
371  %p = load <16 x float>, <16 x float>* %ptr
372  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
373  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
374  ret <16 x float> %s
375}
376
377define <2 x double> @floor_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
378; CHECK-LABEL: floor_v2f64_maskz_load:
379; CHECK:       ## %bb.0:
380; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
381; CHECK-NEXT:    vrndscalepd $9, (%rdi), %xmm0 {%k1} {z}
382; CHECK-NEXT:    retq
383  %c = icmp eq <2 x i64> %cmp, zeroinitializer
384  %p = load <2 x double>, <2 x double>* %ptr
385  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
386  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
387  ret <2 x double> %s
388}
389
390define <4 x float> @floor_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
391; CHECK-LABEL: floor_v4f32_maskz_load:
392; CHECK:       ## %bb.0:
393; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
394; CHECK-NEXT:    vrndscaleps $9, (%rdi), %xmm0 {%k1} {z}
395; CHECK-NEXT:    retq
396  %c = icmp eq <4 x i32> %cmp, zeroinitializer
397  %p = load <4 x float>, <4 x float>* %ptr
398  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
399  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
400  ret <4 x float> %s
401}
402
403define <4 x double> @floor_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
404; CHECK-LABEL: floor_v4f64_maskz_load:
405; CHECK:       ## %bb.0:
406; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
407; CHECK-NEXT:    vrndscalepd $9, (%rdi), %ymm0 {%k1} {z}
408; CHECK-NEXT:    retq
409  %c = icmp eq <4 x i64> %cmp, zeroinitializer
410  %p = load <4 x double>, <4 x double>* %ptr
411  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
412  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
413  ret <4 x double> %s
414}
415
416define <8 x float> @floor_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
417; CHECK-LABEL: floor_v8f32_maskz_load:
418; CHECK:       ## %bb.0:
419; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
420; CHECK-NEXT:    vrndscaleps $9, (%rdi), %ymm0 {%k1} {z}
421; CHECK-NEXT:    retq
422  %c = icmp eq <8 x i32> %cmp, zeroinitializer
423  %p = load <8 x float>, <8 x float>* %ptr
424  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
425  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
426  ret <8 x float> %s
427}
428
429define <8 x double> @floor_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
430; CHECK-LABEL: floor_v8f64_maskz_load:
431; CHECK:       ## %bb.0:
432; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
433; CHECK-NEXT:    vrndscalepd $9, (%rdi), %zmm0 {%k1} {z}
434; CHECK-NEXT:    retq
435  %c = icmp eq <8 x i64> %cmp, zeroinitializer
436  %p = load <8 x double>, <8 x double>* %ptr
437  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
438  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
439  ret <8 x double> %s
440}
441
442define <16 x float> @floor_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
443; CHECK-LABEL: floor_v16f32_maskz_load:
444; CHECK:       ## %bb.0:
445; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
446; CHECK-NEXT:    vrndscaleps $9, (%rdi), %zmm0 {%k1} {z}
447; CHECK-NEXT:    retq
448  %c = icmp eq <16 x i32> %cmp, zeroinitializer
449  %p = load <16 x float>, <16 x float>* %ptr
450  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
451  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
452  ret <16 x float> %s
453}
454
455define <2 x double> @floor_v2f64_broadcast(double* %ptr) {
456; CHECK-LABEL: floor_v2f64_broadcast:
457; CHECK:       ## %bb.0:
458; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to2}, %xmm0
459; CHECK-NEXT:    retq
460  %ps = load double, double* %ptr
461  %pins = insertelement <2 x double> undef, double %ps, i32 0
462  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
463  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
464  ret <2 x double> %t
465}
466
467define <4 x float> @floor_v4f32_broadcast(float* %ptr) {
468; CHECK-LABEL: floor_v4f32_broadcast:
469; CHECK:       ## %bb.0:
470; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to4}, %xmm0
471; CHECK-NEXT:    retq
472  %ps = load float, float* %ptr
473  %pins = insertelement <4 x float> undef, float %ps, i32 0
474  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
475  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
476  ret <4 x float> %t
477}
478
479define <4 x double> @floor_v4f64_broadcast(double* %ptr){
480; CHECK-LABEL: floor_v4f64_broadcast:
481; CHECK:       ## %bb.0:
482; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to4}, %ymm0
483; CHECK-NEXT:    retq
484  %ps = load double, double* %ptr
485  %pins = insertelement <4 x double> undef, double %ps, i32 0
486  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
487  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
488  ret <4 x double> %t
489}
490
491define <8 x float> @floor_v8f32_broadcast(float* %ptr) {
492; CHECK-LABEL: floor_v8f32_broadcast:
493; CHECK:       ## %bb.0:
494; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to8}, %ymm0
495; CHECK-NEXT:    retq
496  %ps = load float, float* %ptr
497  %pins = insertelement <8 x float> undef, float %ps, i32 0
498  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
499  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
500  ret <8 x float> %t
501}
502
503define <8 x double> @floor_v8f64_broadcast(double* %ptr){
504; CHECK-LABEL: floor_v8f64_broadcast:
505; CHECK:       ## %bb.0:
506; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to8}, %zmm0
507; CHECK-NEXT:    retq
508  %ps = load double, double* %ptr
509  %pins = insertelement <8 x double> undef, double %ps, i32 0
510  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
511  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
512  ret <8 x double> %t
513}
514
515define <16 x float> @floor_v16f32_broadcast(float* %ptr) {
516; CHECK-LABEL: floor_v16f32_broadcast:
517; CHECK:       ## %bb.0:
518; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to16}, %zmm0
519; CHECK-NEXT:    retq
520  %ps = load float, float* %ptr
521  %pins = insertelement <16 x float> undef, float %ps, i32 0
522  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
523  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
524  ret <16 x float> %t
525}
526
527define <2 x double> @floor_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
528; CHECK-LABEL: floor_v2f64_mask_broadcast:
529; CHECK:       ## %bb.0:
530; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
531; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1}
532; CHECK-NEXT:    retq
533  %c = icmp eq <2 x i64> %cmp, zeroinitializer
534  %ps = load double, double* %ptr
535  %pins = insertelement <2 x double> undef, double %ps, i32 0
536  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
537  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
538  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
539  ret <2 x double> %s
540}
541
542define <4 x float> @floor_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
543; CHECK-LABEL: floor_v4f32_mask_broadcast:
544; CHECK:       ## %bb.0:
545; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
546; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1}
547; CHECK-NEXT:    retq
548  %c = icmp eq <4 x i32> %cmp, zeroinitializer
549  %ps = load float, float* %ptr
550  %pins = insertelement <4 x float> undef, float %ps, i32 0
551  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
552  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
553  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
554  ret <4 x float> %s
555}
556
557define <4 x double> @floor_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
558; CHECK-LABEL: floor_v4f64_mask_broadcast:
559; CHECK:       ## %bb.0:
560; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
561; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1}
562; CHECK-NEXT:    retq
563  %c = icmp eq <4 x i64> %cmp, zeroinitializer
564  %ps = load double, double* %ptr
565  %pins = insertelement <4 x double> undef, double %ps, i32 0
566  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
567  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
568  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
569  ret <4 x double> %s
570}
571
572define <8 x float> @floor_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
573; CHECK-LABEL: floor_v8f32_mask_broadcast:
574; CHECK:       ## %bb.0:
575; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
576; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1}
577; CHECK-NEXT:    retq
578  %c = icmp eq <8 x i32> %cmp, zeroinitializer
579  %ps = load float, float* %ptr
580  %pins = insertelement <8 x float> undef, float %ps, i32 0
581  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
582  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
583  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
584  ret <8 x float> %s
585}
586
587define <8 x double> @floor_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
588; CHECK-LABEL: floor_v8f64_mask_broadcast:
589; CHECK:       ## %bb.0:
590; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
591; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1}
592; CHECK-NEXT:    retq
593  %c = icmp eq <8 x i64> %cmp, zeroinitializer
594  %ps = load double, double* %ptr
595  %pins = insertelement <8 x double> undef, double %ps, i32 0
596  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
597  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
598  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
599  ret <8 x double> %s
600}
601
602define <16 x float> @floor_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
603; CHECK-LABEL: floor_v16f32_mask_broadcast:
604; CHECK:       ## %bb.0:
605; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
606; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1}
607; CHECK-NEXT:    retq
608  %c = icmp eq <16 x i32> %cmp, zeroinitializer
609  %ps = load float, float* %ptr
610  %pins = insertelement <16 x float> undef, float %ps, i32 0
611  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
612  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
613  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
614  ret <16 x float> %s
615}
616
617define <2 x double> @floor_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
618; CHECK-LABEL: floor_v2f64_maskz_broadcast:
619; CHECK:       ## %bb.0:
620; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
621; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1} {z}
622; CHECK-NEXT:    retq
623  %c = icmp eq <2 x i64> %cmp, zeroinitializer
624  %ps = load double, double* %ptr
625  %pins = insertelement <2 x double> undef, double %ps, i32 0
626  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
627  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
628  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
629  ret <2 x double> %s
630}
631
632define <4 x float> @floor_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
633; CHECK-LABEL: floor_v4f32_maskz_broadcast:
634; CHECK:       ## %bb.0:
635; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
636; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1} {z}
637; CHECK-NEXT:    retq
638  %c = icmp eq <4 x i32> %cmp, zeroinitializer
639  %ps = load float, float* %ptr
640  %pins = insertelement <4 x float> undef, float %ps, i32 0
641  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
642  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
643  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
644  ret <4 x float> %s
645}
646
647define <4 x double> @floor_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
648; CHECK-LABEL: floor_v4f64_maskz_broadcast:
649; CHECK:       ## %bb.0:
650; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
651; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1} {z}
652; CHECK-NEXT:    retq
653  %c = icmp eq <4 x i64> %cmp, zeroinitializer
654  %ps = load double, double* %ptr
655  %pins = insertelement <4 x double> undef, double %ps, i32 0
656  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
657  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
658  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
659  ret <4 x double> %s
660}
661
662define <8 x float> @floor_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
663; CHECK-LABEL: floor_v8f32_maskz_broadcast:
664; CHECK:       ## %bb.0:
665; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
666; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1} {z}
667; CHECK-NEXT:    retq
668  %c = icmp eq <8 x i32> %cmp, zeroinitializer
669  %ps = load float, float* %ptr
670  %pins = insertelement <8 x float> undef, float %ps, i32 0
671  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
672  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
673  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
674  ret <8 x float> %s
675}
676
677define <8 x double> @floor_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
678; CHECK-LABEL: floor_v8f64_maskz_broadcast:
679; CHECK:       ## %bb.0:
680; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
681; CHECK-NEXT:    vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1} {z}
682; CHECK-NEXT:    retq
683  %c = icmp eq <8 x i64> %cmp, zeroinitializer
684  %ps = load double, double* %ptr
685  %pins = insertelement <8 x double> undef, double %ps, i32 0
686  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
687  %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p)
688  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
689  ret <8 x double> %s
690}
691
692define <16 x float> @floor_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
693; CHECK-LABEL: floor_v16f32_maskz_broadcast:
694; CHECK:       ## %bb.0:
695; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
696; CHECK-NEXT:    vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1} {z}
697; CHECK-NEXT:    retq
698  %c = icmp eq <16 x i32> %cmp, zeroinitializer
699  %ps = load float, float* %ptr
700  %pins = insertelement <16 x float> undef, float %ps, i32 0
701  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
702  %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p)
703  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
704  ret <16 x float> %s
705}
706
707define <2 x double> @ceil_v2f64(<2 x double> %p) {
708; CHECK-LABEL: ceil_v2f64:
709; CHECK:       ## %bb.0:
710; CHECK-NEXT:    vroundpd $10, %xmm0, %xmm0
711; CHECK-NEXT:    retq
712  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
713  ret <2 x double> %t
714}
715
716define <4 x float> @ceil_v4f32(<4 x float> %p) {
717; CHECK-LABEL: ceil_v4f32:
718; CHECK:       ## %bb.0:
719; CHECK-NEXT:    vroundps $10, %xmm0, %xmm0
720; CHECK-NEXT:    retq
721  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
722  ret <4 x float> %t
723}
724
725define <4 x double> @ceil_v4f64(<4 x double> %p){
726; CHECK-LABEL: ceil_v4f64:
727; CHECK:       ## %bb.0:
728; CHECK-NEXT:    vroundpd $10, %ymm0, %ymm0
729; CHECK-NEXT:    retq
730  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
731  ret <4 x double> %t
732}
733
734define <8 x float> @ceil_v8f32(<8 x float> %p) {
735; CHECK-LABEL: ceil_v8f32:
736; CHECK:       ## %bb.0:
737; CHECK-NEXT:    vroundps $10, %ymm0, %ymm0
738; CHECK-NEXT:    retq
739  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
740  ret <8 x float> %t
741}
742
743define <8 x double> @ceil_v8f64(<8 x double> %p){
744; CHECK-LABEL: ceil_v8f64:
745; CHECK:       ## %bb.0:
746; CHECK-NEXT:    vrndscalepd $10, %zmm0, %zmm0
747; CHECK-NEXT:    retq
748  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
749  ret <8 x double> %t
750}
751
752define <16 x float> @ceil_v16f32(<16 x float> %p) {
753; CHECK-LABEL: ceil_v16f32:
754; CHECK:       ## %bb.0:
755; CHECK-NEXT:    vrndscaleps $10, %zmm0, %zmm0
756; CHECK-NEXT:    retq
757  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
758  ret <16 x float> %t
759}
760
761define <2 x double> @ceil_v2f64_load(<2 x double>* %ptr) {
762; CHECK-LABEL: ceil_v2f64_load:
763; CHECK:       ## %bb.0:
764; CHECK-NEXT:    vroundpd $10, (%rdi), %xmm0
765; CHECK-NEXT:    retq
766  %p = load <2 x double>, <2 x double>* %ptr
767  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
768  ret <2 x double> %t
769}
770
771define <4 x float> @ceil_v4f32_load(<4 x float>* %ptr) {
772; CHECK-LABEL: ceil_v4f32_load:
773; CHECK:       ## %bb.0:
774; CHECK-NEXT:    vroundps $10, (%rdi), %xmm0
775; CHECK-NEXT:    retq
776  %p = load <4 x float>, <4 x float>* %ptr
777  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
778  ret <4 x float> %t
779}
780
781define <4 x double> @ceil_v4f64_load(<4 x double>* %ptr){
782; CHECK-LABEL: ceil_v4f64_load:
783; CHECK:       ## %bb.0:
784; CHECK-NEXT:    vroundpd $10, (%rdi), %ymm0
785; CHECK-NEXT:    retq
786  %p = load <4 x double>, <4 x double>* %ptr
787  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
788  ret <4 x double> %t
789}
790
791define <8 x float> @ceil_v8f32_load(<8 x float>* %ptr) {
792; CHECK-LABEL: ceil_v8f32_load:
793; CHECK:       ## %bb.0:
794; CHECK-NEXT:    vroundps $10, (%rdi), %ymm0
795; CHECK-NEXT:    retq
796  %p = load <8 x float>, <8 x float>* %ptr
797  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
798  ret <8 x float> %t
799}
800
801define <8 x double> @ceil_v8f64_load(<8 x double>* %ptr){
802; CHECK-LABEL: ceil_v8f64_load:
803; CHECK:       ## %bb.0:
804; CHECK-NEXT:    vrndscalepd $10, (%rdi), %zmm0
805; CHECK-NEXT:    retq
806  %p = load <8 x double>, <8 x double>* %ptr
807  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
808  ret <8 x double> %t
809}
810
811define <16 x float> @ceil_v16f32_load(<16 x float>* %ptr) {
812; CHECK-LABEL: ceil_v16f32_load:
813; CHECK:       ## %bb.0:
814; CHECK-NEXT:    vrndscaleps $10, (%rdi), %zmm0
815; CHECK-NEXT:    retq
816  %p = load <16 x float>, <16 x float>* %ptr
817  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
818  ret <16 x float> %t
819}
820
821define <2 x double> @ceil_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
822; CHECK-LABEL: ceil_v2f64_mask:
823; CHECK:       ## %bb.0:
824; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
825; CHECK-NEXT:    vrndscalepd $10, %xmm0, %xmm1 {%k1}
826; CHECK-NEXT:    vmovapd %xmm1, %xmm0
827; CHECK-NEXT:    retq
828  %c = icmp eq <2 x i64> %cmp, zeroinitializer
829  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
830  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
831  ret <2 x double> %s
832}
833
834define <4 x float> @ceil_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
835; CHECK-LABEL: ceil_v4f32_mask:
836; CHECK:       ## %bb.0:
837; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
838; CHECK-NEXT:    vrndscaleps $10, %xmm0, %xmm1 {%k1}
839; CHECK-NEXT:    vmovaps %xmm1, %xmm0
840; CHECK-NEXT:    retq
841  %c = icmp eq <4 x i32> %cmp, zeroinitializer
842  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
843  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
844  ret <4 x float> %s
845}
846
847define <4 x double> @ceil_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
848; CHECK-LABEL: ceil_v4f64_mask:
849; CHECK:       ## %bb.0:
850; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
851; CHECK-NEXT:    vrndscalepd $10, %ymm0, %ymm1 {%k1}
852; CHECK-NEXT:    vmovapd %ymm1, %ymm0
853; CHECK-NEXT:    retq
854  %c = icmp eq <4 x i64> %cmp, zeroinitializer
855  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
856  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
857  ret <4 x double> %s
858}
859
860define <8 x float> @ceil_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
861; CHECK-LABEL: ceil_v8f32_mask:
862; CHECK:       ## %bb.0:
863; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
864; CHECK-NEXT:    vrndscaleps $10, %ymm0, %ymm1 {%k1}
865; CHECK-NEXT:    vmovaps %ymm1, %ymm0
866; CHECK-NEXT:    retq
867  %c = icmp eq <8 x i32> %cmp, zeroinitializer
868  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
869  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
870  ret <8 x float> %s
871}
872
873define <8 x double> @ceil_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
874; CHECK-LABEL: ceil_v8f64_mask:
875; CHECK:       ## %bb.0:
876; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
877; CHECK-NEXT:    vrndscalepd $10, %zmm0, %zmm1 {%k1}
878; CHECK-NEXT:    vmovapd %zmm1, %zmm0
879; CHECK-NEXT:    retq
880  %c = icmp eq <8 x i64> %cmp, zeroinitializer
881  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
882  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
883  ret <8 x double> %s
884}
885
886define <16 x float> @ceil_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
887; CHECK-LABEL: ceil_v16f32_mask:
888; CHECK:       ## %bb.0:
889; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
890; CHECK-NEXT:    vrndscaleps $10, %zmm0, %zmm1 {%k1}
891; CHECK-NEXT:    vmovaps %zmm1, %zmm0
892; CHECK-NEXT:    retq
893  %c = icmp eq <16 x i32> %cmp, zeroinitializer
894  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
895  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
896  ret <16 x float> %s
897}
898
899define <2 x double> @ceil_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
900; CHECK-LABEL: ceil_v2f64_maskz:
901; CHECK:       ## %bb.0:
902; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
903; CHECK-NEXT:    vrndscalepd $10, %xmm0, %xmm0 {%k1} {z}
904; CHECK-NEXT:    retq
905  %c = icmp eq <2 x i64> %cmp, zeroinitializer
906  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
907  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
908  ret <2 x double> %s
909}
910
911define <4 x float> @ceil_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
912; CHECK-LABEL: ceil_v4f32_maskz:
913; CHECK:       ## %bb.0:
914; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
915; CHECK-NEXT:    vrndscaleps $10, %xmm0, %xmm0 {%k1} {z}
916; CHECK-NEXT:    retq
917  %c = icmp eq <4 x i32> %cmp, zeroinitializer
918  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
919  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
920  ret <4 x float> %s
921}
922
923define <4 x double> @ceil_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
924; CHECK-LABEL: ceil_v4f64_maskz:
925; CHECK:       ## %bb.0:
926; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
927; CHECK-NEXT:    vrndscalepd $10, %ymm0, %ymm0 {%k1} {z}
928; CHECK-NEXT:    retq
929  %c = icmp eq <4 x i64> %cmp, zeroinitializer
930  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
931  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
932  ret <4 x double> %s
933}
934
935define <8 x float> @ceil_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
936; CHECK-LABEL: ceil_v8f32_maskz:
937; CHECK:       ## %bb.0:
938; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
939; CHECK-NEXT:    vrndscaleps $10, %ymm0, %ymm0 {%k1} {z}
940; CHECK-NEXT:    retq
941  %c = icmp eq <8 x i32> %cmp, zeroinitializer
942  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
943  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
944  ret <8 x float> %s
945}
946
947define <8 x double> @ceil_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
948; CHECK-LABEL: ceil_v8f64_maskz:
949; CHECK:       ## %bb.0:
950; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
951; CHECK-NEXT:    vrndscalepd $10, %zmm0, %zmm0 {%k1} {z}
952; CHECK-NEXT:    retq
953  %c = icmp eq <8 x i64> %cmp, zeroinitializer
954  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
955  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
956  ret <8 x double> %s
957}
958
959define <16 x float> @ceil_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
960; CHECK-LABEL: ceil_v16f32_maskz:
961; CHECK:       ## %bb.0:
962; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
963; CHECK-NEXT:    vrndscaleps $10, %zmm0, %zmm0 {%k1} {z}
964; CHECK-NEXT:    retq
965  %c = icmp eq <16 x i32> %cmp, zeroinitializer
966  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
967  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
968  ret <16 x float> %s
969}
970
971define <2 x double> @ceil_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
972; CHECK-LABEL: ceil_v2f64_mask_load:
973; CHECK:       ## %bb.0:
974; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
975; CHECK-NEXT:    vrndscalepd $10, (%rdi), %xmm0 {%k1}
976; CHECK-NEXT:    retq
977  %c = icmp eq <2 x i64> %cmp, zeroinitializer
978  %p = load <2 x double>, <2 x double>* %ptr
979  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
980  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
981  ret <2 x double> %s
982}
983
984define <4 x float> @ceil_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
985; CHECK-LABEL: ceil_v4f32_mask_load:
986; CHECK:       ## %bb.0:
987; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
988; CHECK-NEXT:    vrndscaleps $10, (%rdi), %xmm0 {%k1}
989; CHECK-NEXT:    retq
990  %c = icmp eq <4 x i32> %cmp, zeroinitializer
991  %p = load <4 x float>, <4 x float>* %ptr
992  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
993  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
994  ret <4 x float> %s
995}
996
997define <4 x double> @ceil_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
998; CHECK-LABEL: ceil_v4f64_mask_load:
999; CHECK:       ## %bb.0:
1000; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1001; CHECK-NEXT:    vrndscalepd $10, (%rdi), %ymm0 {%k1}
1002; CHECK-NEXT:    retq
1003  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1004  %p = load <4 x double>, <4 x double>* %ptr
1005  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1006  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1007  ret <4 x double> %s
1008}
1009
1010define <8 x float> @ceil_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
1011; CHECK-LABEL: ceil_v8f32_mask_load:
1012; CHECK:       ## %bb.0:
1013; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
1014; CHECK-NEXT:    vrndscaleps $10, (%rdi), %ymm0 {%k1}
1015; CHECK-NEXT:    retq
1016  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1017  %p = load <8 x float>, <8 x float>* %ptr
1018  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1019  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1020  ret <8 x float> %s
1021}
1022
1023define <8 x double> @ceil_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
1024; CHECK-LABEL: ceil_v8f64_mask_load:
1025; CHECK:       ## %bb.0:
1026; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1027; CHECK-NEXT:    vrndscalepd $10, (%rdi), %zmm0 {%k1}
1028; CHECK-NEXT:    retq
1029  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1030  %p = load <8 x double>, <8 x double>* %ptr
1031  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1032  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1033  ret <8 x double> %s
1034}
1035
1036define <16 x float> @ceil_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
1037; CHECK-LABEL: ceil_v16f32_mask_load:
1038; CHECK:       ## %bb.0:
1039; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1040; CHECK-NEXT:    vrndscaleps $10, (%rdi), %zmm0 {%k1}
1041; CHECK-NEXT:    retq
1042  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1043  %p = load <16 x float>, <16 x float>* %ptr
1044  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1045  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1046  ret <16 x float> %s
1047}
1048
1049define <2 x double> @ceil_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
1050; CHECK-LABEL: ceil_v2f64_maskz_load:
1051; CHECK:       ## %bb.0:
1052; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
1053; CHECK-NEXT:    vrndscalepd $10, (%rdi), %xmm0 {%k1} {z}
1054; CHECK-NEXT:    retq
1055  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1056  %p = load <2 x double>, <2 x double>* %ptr
1057  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
1058  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1059  ret <2 x double> %s
1060}
1061
1062define <4 x float> @ceil_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
1063; CHECK-LABEL: ceil_v4f32_maskz_load:
1064; CHECK:       ## %bb.0:
1065; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
1066; CHECK-NEXT:    vrndscaleps $10, (%rdi), %xmm0 {%k1} {z}
1067; CHECK-NEXT:    retq
1068  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1069  %p = load <4 x float>, <4 x float>* %ptr
1070  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
1071  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1072  ret <4 x float> %s
1073}
1074
1075define <4 x double> @ceil_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
1076; CHECK-LABEL: ceil_v4f64_maskz_load:
1077; CHECK:       ## %bb.0:
1078; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
1079; CHECK-NEXT:    vrndscalepd $10, (%rdi), %ymm0 {%k1} {z}
1080; CHECK-NEXT:    retq
1081  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1082  %p = load <4 x double>, <4 x double>* %ptr
1083  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1084  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
1085  ret <4 x double> %s
1086}
1087
1088define <8 x float> @ceil_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
1089; CHECK-LABEL: ceil_v8f32_maskz_load:
1090; CHECK:       ## %bb.0:
1091; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
1092; CHECK-NEXT:    vrndscaleps $10, (%rdi), %ymm0 {%k1} {z}
1093; CHECK-NEXT:    retq
1094  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1095  %p = load <8 x float>, <8 x float>* %ptr
1096  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1097  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
1098  ret <8 x float> %s
1099}
1100
1101define <8 x double> @ceil_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
1102; CHECK-LABEL: ceil_v8f64_maskz_load:
1103; CHECK:       ## %bb.0:
1104; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1105; CHECK-NEXT:    vrndscalepd $10, (%rdi), %zmm0 {%k1} {z}
1106; CHECK-NEXT:    retq
1107  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1108  %p = load <8 x double>, <8 x double>* %ptr
1109  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1110  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
1111  ret <8 x double> %s
1112}
1113
1114define <16 x float> @ceil_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
1115; CHECK-LABEL: ceil_v16f32_maskz_load:
1116; CHECK:       ## %bb.0:
1117; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1118; CHECK-NEXT:    vrndscaleps $10, (%rdi), %zmm0 {%k1} {z}
1119; CHECK-NEXT:    retq
1120  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1121  %p = load <16 x float>, <16 x float>* %ptr
1122  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1123  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
1124  ret <16 x float> %s
1125}
1126
1127define <2 x double> @ceil_v2f64_broadcast(double* %ptr) {
1128; CHECK-LABEL: ceil_v2f64_broadcast:
1129; CHECK:       ## %bb.0:
1130; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to2}, %xmm0
1131; CHECK-NEXT:    retq
1132  %ps = load double, double* %ptr
1133  %pins = insertelement <2 x double> undef, double %ps, i32 0
1134  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1135  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
1136  ret <2 x double> %t
1137}
1138
1139define <4 x float> @ceil_v4f32_broadcast(float* %ptr) {
1140; CHECK-LABEL: ceil_v4f32_broadcast:
1141; CHECK:       ## %bb.0:
1142; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to4}, %xmm0
1143; CHECK-NEXT:    retq
1144  %ps = load float, float* %ptr
1145  %pins = insertelement <4 x float> undef, float %ps, i32 0
1146  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1147  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
1148  ret <4 x float> %t
1149}
1150
1151define <4 x double> @ceil_v4f64_broadcast(double* %ptr){
1152; CHECK-LABEL: ceil_v4f64_broadcast:
1153; CHECK:       ## %bb.0:
1154; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to4}, %ymm0
1155; CHECK-NEXT:    retq
1156  %ps = load double, double* %ptr
1157  %pins = insertelement <4 x double> undef, double %ps, i32 0
1158  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1159  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1160  ret <4 x double> %t
1161}
1162
1163define <8 x float> @ceil_v8f32_broadcast(float* %ptr) {
1164; CHECK-LABEL: ceil_v8f32_broadcast:
1165; CHECK:       ## %bb.0:
1166; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to8}, %ymm0
1167; CHECK-NEXT:    retq
1168  %ps = load float, float* %ptr
1169  %pins = insertelement <8 x float> undef, float %ps, i32 0
1170  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1171  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1172  ret <8 x float> %t
1173}
1174
1175define <8 x double> @ceil_v8f64_broadcast(double* %ptr){
1176; CHECK-LABEL: ceil_v8f64_broadcast:
1177; CHECK:       ## %bb.0:
1178; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to8}, %zmm0
1179; CHECK-NEXT:    retq
1180  %ps = load double, double* %ptr
1181  %pins = insertelement <8 x double> undef, double %ps, i32 0
1182  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1183  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1184  ret <8 x double> %t
1185}
1186
1187define <16 x float> @ceil_v16f32_broadcast(float* %ptr) {
1188; CHECK-LABEL: ceil_v16f32_broadcast:
1189; CHECK:       ## %bb.0:
1190; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to16}, %zmm0
1191; CHECK-NEXT:    retq
1192  %ps = load float, float* %ptr
1193  %pins = insertelement <16 x float> undef, float %ps, i32 0
1194  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1195  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1196  ret <16 x float> %t
1197}
1198
1199define <2 x double> @ceil_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
1200; CHECK-LABEL: ceil_v2f64_mask_broadcast:
1201; CHECK:       ## %bb.0:
1202; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
1203; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1}
1204; CHECK-NEXT:    retq
1205  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1206  %ps = load double, double* %ptr
1207  %pins = insertelement <2 x double> undef, double %ps, i32 0
1208  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1209  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
1210  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
1211  ret <2 x double> %s
1212}
1213
1214define <4 x float> @ceil_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
1215; CHECK-LABEL: ceil_v4f32_mask_broadcast:
1216; CHECK:       ## %bb.0:
1217; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
1218; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1}
1219; CHECK-NEXT:    retq
1220  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1221  %ps = load float, float* %ptr
1222  %pins = insertelement <4 x float> undef, float %ps, i32 0
1223  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1224  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
1225  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
1226  ret <4 x float> %s
1227}
1228
1229define <4 x double> @ceil_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
1230; CHECK-LABEL: ceil_v4f64_mask_broadcast:
1231; CHECK:       ## %bb.0:
1232; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1233; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1}
1234; CHECK-NEXT:    retq
1235  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1236  %ps = load double, double* %ptr
1237  %pins = insertelement <4 x double> undef, double %ps, i32 0
1238  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1239  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1240  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1241  ret <4 x double> %s
1242}
1243
1244define <8 x float> @ceil_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
1245; CHECK-LABEL: ceil_v8f32_mask_broadcast:
1246; CHECK:       ## %bb.0:
1247; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
1248; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1}
1249; CHECK-NEXT:    retq
1250  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1251  %ps = load float, float* %ptr
1252  %pins = insertelement <8 x float> undef, float %ps, i32 0
1253  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1254  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1255  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1256  ret <8 x float> %s
1257}
1258
1259define <8 x double> @ceil_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
1260; CHECK-LABEL: ceil_v8f64_mask_broadcast:
1261; CHECK:       ## %bb.0:
1262; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1263; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1}
1264; CHECK-NEXT:    retq
1265  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1266  %ps = load double, double* %ptr
1267  %pins = insertelement <8 x double> undef, double %ps, i32 0
1268  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1269  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1270  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1271  ret <8 x double> %s
1272}
1273
1274define <16 x float> @ceil_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
1275; CHECK-LABEL: ceil_v16f32_mask_broadcast:
1276; CHECK:       ## %bb.0:
1277; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1278; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1}
1279; CHECK-NEXT:    retq
1280  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1281  %ps = load float, float* %ptr
1282  %pins = insertelement <16 x float> undef, float %ps, i32 0
1283  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1284  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1285  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1286  ret <16 x float> %s
1287}
1288
1289define <2 x double> @ceil_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
1290; CHECK-LABEL: ceil_v2f64_maskz_broadcast:
1291; CHECK:       ## %bb.0:
1292; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
1293; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1} {z}
1294; CHECK-NEXT:    retq
1295  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1296  %ps = load double, double* %ptr
1297  %pins = insertelement <2 x double> undef, double %ps, i32 0
1298  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1299  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
1300  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1301  ret <2 x double> %s
1302}
1303
1304define <4 x float> @ceil_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
1305; CHECK-LABEL: ceil_v4f32_maskz_broadcast:
1306; CHECK:       ## %bb.0:
1307; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
1308; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1} {z}
1309; CHECK-NEXT:    retq
1310  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1311  %ps = load float, float* %ptr
1312  %pins = insertelement <4 x float> undef, float %ps, i32 0
1313  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1314  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
1315  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1316  ret <4 x float> %s
1317}
1318
1319define <4 x double> @ceil_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
1320; CHECK-LABEL: ceil_v4f64_maskz_broadcast:
1321; CHECK:       ## %bb.0:
1322; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
1323; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1} {z}
1324; CHECK-NEXT:    retq
1325  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1326  %ps = load double, double* %ptr
1327  %pins = insertelement <4 x double> undef, double %ps, i32 0
1328  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1329  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
1330  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
1331  ret <4 x double> %s
1332}
1333
1334define <8 x float> @ceil_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
1335; CHECK-LABEL: ceil_v8f32_maskz_broadcast:
1336; CHECK:       ## %bb.0:
1337; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
1338; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1} {z}
1339; CHECK-NEXT:    retq
1340  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1341  %ps = load float, float* %ptr
1342  %pins = insertelement <8 x float> undef, float %ps, i32 0
1343  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1344  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
1345  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
1346  ret <8 x float> %s
1347}
1348
1349define <8 x double> @ceil_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
1350; CHECK-LABEL: ceil_v8f64_maskz_broadcast:
1351; CHECK:       ## %bb.0:
1352; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1353; CHECK-NEXT:    vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1} {z}
1354; CHECK-NEXT:    retq
1355  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1356  %ps = load double, double* %ptr
1357  %pins = insertelement <8 x double> undef, double %ps, i32 0
1358  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1359  %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
1360  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
1361  ret <8 x double> %s
1362}
1363
1364define <16 x float> @ceil_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
1365; CHECK-LABEL: ceil_v16f32_maskz_broadcast:
1366; CHECK:       ## %bb.0:
1367; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1368; CHECK-NEXT:    vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1} {z}
1369; CHECK-NEXT:    retq
1370  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1371  %ps = load float, float* %ptr
1372  %pins = insertelement <16 x float> undef, float %ps, i32 0
1373  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1374  %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
1375  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
1376  ret <16 x float> %s
1377}
1378
1379define <2 x double> @trunc_v2f64(<2 x double> %p) {
1380; CHECK-LABEL: trunc_v2f64:
1381; CHECK:       ## %bb.0:
1382; CHECK-NEXT:    vroundpd $11, %xmm0, %xmm0
1383; CHECK-NEXT:    retq
1384  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1385  ret <2 x double> %t
1386}
1387
1388define <4 x float> @trunc_v4f32(<4 x float> %p) {
1389; CHECK-LABEL: trunc_v4f32:
1390; CHECK:       ## %bb.0:
1391; CHECK-NEXT:    vroundps $11, %xmm0, %xmm0
1392; CHECK-NEXT:    retq
1393  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1394  ret <4 x float> %t
1395}
1396
1397define <4 x double> @trunc_v4f64(<4 x double> %p){
1398; CHECK-LABEL: trunc_v4f64:
1399; CHECK:       ## %bb.0:
1400; CHECK-NEXT:    vroundpd $11, %ymm0, %ymm0
1401; CHECK-NEXT:    retq
1402  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1403  ret <4 x double> %t
1404}
1405
1406define <8 x float> @trunc_v8f32(<8 x float> %p) {
1407; CHECK-LABEL: trunc_v8f32:
1408; CHECK:       ## %bb.0:
1409; CHECK-NEXT:    vroundps $11, %ymm0, %ymm0
1410; CHECK-NEXT:    retq
1411  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1412  ret <8 x float> %t
1413}
1414
1415define <8 x double> @trunc_v8f64(<8 x double> %p){
1416; CHECK-LABEL: trunc_v8f64:
1417; CHECK:       ## %bb.0:
1418; CHECK-NEXT:    vrndscalepd $11, %zmm0, %zmm0
1419; CHECK-NEXT:    retq
1420  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1421  ret <8 x double> %t
1422}
1423
1424define <16 x float> @trunc_v16f32(<16 x float> %p) {
1425; CHECK-LABEL: trunc_v16f32:
1426; CHECK:       ## %bb.0:
1427; CHECK-NEXT:    vrndscaleps $11, %zmm0, %zmm0
1428; CHECK-NEXT:    retq
1429  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1430  ret <16 x float> %t
1431}
1432
1433define <2 x double> @trunc_v2f64_load(<2 x double>* %ptr) {
1434; CHECK-LABEL: trunc_v2f64_load:
1435; CHECK:       ## %bb.0:
1436; CHECK-NEXT:    vroundpd $11, (%rdi), %xmm0
1437; CHECK-NEXT:    retq
1438  %p = load <2 x double>, <2 x double>* %ptr
1439  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1440  ret <2 x double> %t
1441}
1442
1443define <4 x float> @trunc_v4f32_load(<4 x float>* %ptr) {
1444; CHECK-LABEL: trunc_v4f32_load:
1445; CHECK:       ## %bb.0:
1446; CHECK-NEXT:    vroundps $11, (%rdi), %xmm0
1447; CHECK-NEXT:    retq
1448  %p = load <4 x float>, <4 x float>* %ptr
1449  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1450  ret <4 x float> %t
1451}
1452
1453define <4 x double> @trunc_v4f64_load(<4 x double>* %ptr){
1454; CHECK-LABEL: trunc_v4f64_load:
1455; CHECK:       ## %bb.0:
1456; CHECK-NEXT:    vroundpd $11, (%rdi), %ymm0
1457; CHECK-NEXT:    retq
1458  %p = load <4 x double>, <4 x double>* %ptr
1459  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1460  ret <4 x double> %t
1461}
1462
1463define <8 x float> @trunc_v8f32_load(<8 x float>* %ptr) {
1464; CHECK-LABEL: trunc_v8f32_load:
1465; CHECK:       ## %bb.0:
1466; CHECK-NEXT:    vroundps $11, (%rdi), %ymm0
1467; CHECK-NEXT:    retq
1468  %p = load <8 x float>, <8 x float>* %ptr
1469  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1470  ret <8 x float> %t
1471}
1472
1473define <8 x double> @trunc_v8f64_load(<8 x double>* %ptr){
1474; CHECK-LABEL: trunc_v8f64_load:
1475; CHECK:       ## %bb.0:
1476; CHECK-NEXT:    vrndscalepd $11, (%rdi), %zmm0
1477; CHECK-NEXT:    retq
1478  %p = load <8 x double>, <8 x double>* %ptr
1479  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1480  ret <8 x double> %t
1481}
1482
1483define <16 x float> @trunc_v16f32_load(<16 x float>* %ptr) {
1484; CHECK-LABEL: trunc_v16f32_load:
1485; CHECK:       ## %bb.0:
1486; CHECK-NEXT:    vrndscaleps $11, (%rdi), %zmm0
1487; CHECK-NEXT:    retq
1488  %p = load <16 x float>, <16 x float>* %ptr
1489  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1490  ret <16 x float> %t
1491}
1492
1493define <2 x double> @trunc_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
1494; CHECK-LABEL: trunc_v2f64_mask:
1495; CHECK:       ## %bb.0:
1496; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
1497; CHECK-NEXT:    vrndscalepd $11, %xmm0, %xmm1 {%k1}
1498; CHECK-NEXT:    vmovapd %xmm1, %xmm0
1499; CHECK-NEXT:    retq
1500  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1501  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1502  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
1503  ret <2 x double> %s
1504}
1505
1506define <4 x float> @trunc_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
1507; CHECK-LABEL: trunc_v4f32_mask:
1508; CHECK:       ## %bb.0:
1509; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
1510; CHECK-NEXT:    vrndscaleps $11, %xmm0, %xmm1 {%k1}
1511; CHECK-NEXT:    vmovaps %xmm1, %xmm0
1512; CHECK-NEXT:    retq
1513  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1514  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1515  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
1516  ret <4 x float> %s
1517}
1518
1519define <4 x double> @trunc_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
1520; CHECK-LABEL: trunc_v4f64_mask:
1521; CHECK:       ## %bb.0:
1522; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
1523; CHECK-NEXT:    vrndscalepd $11, %ymm0, %ymm1 {%k1}
1524; CHECK-NEXT:    vmovapd %ymm1, %ymm0
1525; CHECK-NEXT:    retq
1526  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1527  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1528  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1529  ret <4 x double> %s
1530}
1531
1532define <8 x float> @trunc_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
1533; CHECK-LABEL: trunc_v8f32_mask:
1534; CHECK:       ## %bb.0:
1535; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
1536; CHECK-NEXT:    vrndscaleps $11, %ymm0, %ymm1 {%k1}
1537; CHECK-NEXT:    vmovaps %ymm1, %ymm0
1538; CHECK-NEXT:    retq
1539  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1540  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1541  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1542  ret <8 x float> %s
1543}
1544
1545define <8 x double> @trunc_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
1546; CHECK-LABEL: trunc_v8f64_mask:
1547; CHECK:       ## %bb.0:
1548; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
1549; CHECK-NEXT:    vrndscalepd $11, %zmm0, %zmm1 {%k1}
1550; CHECK-NEXT:    vmovapd %zmm1, %zmm0
1551; CHECK-NEXT:    retq
1552  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1553  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1554  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1555  ret <8 x double> %s
1556}
1557
1558define <16 x float> @trunc_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
1559; CHECK-LABEL: trunc_v16f32_mask:
1560; CHECK:       ## %bb.0:
1561; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
1562; CHECK-NEXT:    vrndscaleps $11, %zmm0, %zmm1 {%k1}
1563; CHECK-NEXT:    vmovaps %zmm1, %zmm0
1564; CHECK-NEXT:    retq
1565  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1566  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1567  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1568  ret <16 x float> %s
1569}
1570
1571define <2 x double> @trunc_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
1572; CHECK-LABEL: trunc_v2f64_maskz:
1573; CHECK:       ## %bb.0:
1574; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
1575; CHECK-NEXT:    vrndscalepd $11, %xmm0, %xmm0 {%k1} {z}
1576; CHECK-NEXT:    retq
1577  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1578  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1579  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1580  ret <2 x double> %s
1581}
1582
1583define <4 x float> @trunc_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
1584; CHECK-LABEL: trunc_v4f32_maskz:
1585; CHECK:       ## %bb.0:
1586; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
1587; CHECK-NEXT:    vrndscaleps $11, %xmm0, %xmm0 {%k1} {z}
1588; CHECK-NEXT:    retq
1589  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1590  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1591  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1592  ret <4 x float> %s
1593}
1594
1595define <4 x double> @trunc_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
1596; CHECK-LABEL: trunc_v4f64_maskz:
1597; CHECK:       ## %bb.0:
1598; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1599; CHECK-NEXT:    vrndscalepd $11, %ymm0, %ymm0 {%k1} {z}
1600; CHECK-NEXT:    retq
1601  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1602  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1603  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
1604  ret <4 x double> %s
1605}
1606
1607define <8 x float> @trunc_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
1608; CHECK-LABEL: trunc_v8f32_maskz:
1609; CHECK:       ## %bb.0:
1610; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
1611; CHECK-NEXT:    vrndscaleps $11, %ymm0, %ymm0 {%k1} {z}
1612; CHECK-NEXT:    retq
1613  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1614  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1615  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
1616  ret <8 x float> %s
1617}
1618
1619define <8 x double> @trunc_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
1620; CHECK-LABEL: trunc_v8f64_maskz:
1621; CHECK:       ## %bb.0:
1622; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1623; CHECK-NEXT:    vrndscalepd $11, %zmm0, %zmm0 {%k1} {z}
1624; CHECK-NEXT:    retq
1625  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1626  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1627  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
1628  ret <8 x double> %s
1629}
1630
1631define <16 x float> @trunc_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
1632; CHECK-LABEL: trunc_v16f32_maskz:
1633; CHECK:       ## %bb.0:
1634; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1635; CHECK-NEXT:    vrndscaleps $11, %zmm0, %zmm0 {%k1} {z}
1636; CHECK-NEXT:    retq
1637  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1638  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1639  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
1640  ret <16 x float> %s
1641}
1642
1643define <2 x double> @trunc_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
1644; CHECK-LABEL: trunc_v2f64_mask_load:
1645; CHECK:       ## %bb.0:
1646; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
1647; CHECK-NEXT:    vrndscalepd $11, (%rdi), %xmm0 {%k1}
1648; CHECK-NEXT:    retq
1649  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1650  %p = load <2 x double>, <2 x double>* %ptr
1651  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1652  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
1653  ret <2 x double> %s
1654}
1655
1656define <4 x float> @trunc_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
1657; CHECK-LABEL: trunc_v4f32_mask_load:
1658; CHECK:       ## %bb.0:
1659; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
1660; CHECK-NEXT:    vrndscaleps $11, (%rdi), %xmm0 {%k1}
1661; CHECK-NEXT:    retq
1662  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1663  %p = load <4 x float>, <4 x float>* %ptr
1664  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1665  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
1666  ret <4 x float> %s
1667}
1668
1669define <4 x double> @trunc_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
1670; CHECK-LABEL: trunc_v4f64_mask_load:
1671; CHECK:       ## %bb.0:
1672; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1673; CHECK-NEXT:    vrndscalepd $11, (%rdi), %ymm0 {%k1}
1674; CHECK-NEXT:    retq
1675  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1676  %p = load <4 x double>, <4 x double>* %ptr
1677  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1678  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1679  ret <4 x double> %s
1680}
1681
1682define <8 x float> @trunc_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
1683; CHECK-LABEL: trunc_v8f32_mask_load:
1684; CHECK:       ## %bb.0:
1685; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
1686; CHECK-NEXT:    vrndscaleps $11, (%rdi), %ymm0 {%k1}
1687; CHECK-NEXT:    retq
1688  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1689  %p = load <8 x float>, <8 x float>* %ptr
1690  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1691  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1692  ret <8 x float> %s
1693}
1694
1695define <8 x double> @trunc_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
1696; CHECK-LABEL: trunc_v8f64_mask_load:
1697; CHECK:       ## %bb.0:
1698; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1699; CHECK-NEXT:    vrndscalepd $11, (%rdi), %zmm0 {%k1}
1700; CHECK-NEXT:    retq
1701  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1702  %p = load <8 x double>, <8 x double>* %ptr
1703  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1704  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1705  ret <8 x double> %s
1706}
1707
1708define <16 x float> @trunc_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
1709; CHECK-LABEL: trunc_v16f32_mask_load:
1710; CHECK:       ## %bb.0:
1711; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1712; CHECK-NEXT:    vrndscaleps $11, (%rdi), %zmm0 {%k1}
1713; CHECK-NEXT:    retq
1714  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1715  %p = load <16 x float>, <16 x float>* %ptr
1716  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1717  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1718  ret <16 x float> %s
1719}
1720
1721define <2 x double> @trunc_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
1722; CHECK-LABEL: trunc_v2f64_maskz_load:
1723; CHECK:       ## %bb.0:
1724; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
1725; CHECK-NEXT:    vrndscalepd $11, (%rdi), %xmm0 {%k1} {z}
1726; CHECK-NEXT:    retq
1727  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1728  %p = load <2 x double>, <2 x double>* %ptr
1729  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1730  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1731  ret <2 x double> %s
1732}
1733
1734define <4 x float> @trunc_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
1735; CHECK-LABEL: trunc_v4f32_maskz_load:
1736; CHECK:       ## %bb.0:
1737; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
1738; CHECK-NEXT:    vrndscaleps $11, (%rdi), %xmm0 {%k1} {z}
1739; CHECK-NEXT:    retq
1740  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1741  %p = load <4 x float>, <4 x float>* %ptr
1742  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1743  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1744  ret <4 x float> %s
1745}
1746
1747define <4 x double> @trunc_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
1748; CHECK-LABEL: trunc_v4f64_maskz_load:
1749; CHECK:       ## %bb.0:
1750; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
1751; CHECK-NEXT:    vrndscalepd $11, (%rdi), %ymm0 {%k1} {z}
1752; CHECK-NEXT:    retq
1753  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1754  %p = load <4 x double>, <4 x double>* %ptr
1755  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1756  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
1757  ret <4 x double> %s
1758}
1759
1760define <8 x float> @trunc_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
1761; CHECK-LABEL: trunc_v8f32_maskz_load:
1762; CHECK:       ## %bb.0:
1763; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
1764; CHECK-NEXT:    vrndscaleps $11, (%rdi), %ymm0 {%k1} {z}
1765; CHECK-NEXT:    retq
1766  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1767  %p = load <8 x float>, <8 x float>* %ptr
1768  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1769  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
1770  ret <8 x float> %s
1771}
1772
1773define <8 x double> @trunc_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
1774; CHECK-LABEL: trunc_v8f64_maskz_load:
1775; CHECK:       ## %bb.0:
1776; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1777; CHECK-NEXT:    vrndscalepd $11, (%rdi), %zmm0 {%k1} {z}
1778; CHECK-NEXT:    retq
1779  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1780  %p = load <8 x double>, <8 x double>* %ptr
1781  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1782  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
1783  ret <8 x double> %s
1784}
1785
1786define <16 x float> @trunc_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
1787; CHECK-LABEL: trunc_v16f32_maskz_load:
1788; CHECK:       ## %bb.0:
1789; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1790; CHECK-NEXT:    vrndscaleps $11, (%rdi), %zmm0 {%k1} {z}
1791; CHECK-NEXT:    retq
1792  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1793  %p = load <16 x float>, <16 x float>* %ptr
1794  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1795  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
1796  ret <16 x float> %s
1797}
1798
1799define <2 x double> @trunc_v2f64_broadcast(double* %ptr) {
1800; CHECK-LABEL: trunc_v2f64_broadcast:
1801; CHECK:       ## %bb.0:
1802; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to2}, %xmm0
1803; CHECK-NEXT:    retq
1804  %ps = load double, double* %ptr
1805  %pins = insertelement <2 x double> undef, double %ps, i32 0
1806  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1807  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1808  ret <2 x double> %t
1809}
1810
1811define <4 x float> @trunc_v4f32_broadcast(float* %ptr) {
1812; CHECK-LABEL: trunc_v4f32_broadcast:
1813; CHECK:       ## %bb.0:
1814; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to4}, %xmm0
1815; CHECK-NEXT:    retq
1816  %ps = load float, float* %ptr
1817  %pins = insertelement <4 x float> undef, float %ps, i32 0
1818  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1819  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1820  ret <4 x float> %t
1821}
1822
1823define <4 x double> @trunc_v4f64_broadcast(double* %ptr){
1824; CHECK-LABEL: trunc_v4f64_broadcast:
1825; CHECK:       ## %bb.0:
1826; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to4}, %ymm0
1827; CHECK-NEXT:    retq
1828  %ps = load double, double* %ptr
1829  %pins = insertelement <4 x double> undef, double %ps, i32 0
1830  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1831  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1832  ret <4 x double> %t
1833}
1834
1835define <8 x float> @trunc_v8f32_broadcast(float* %ptr) {
1836; CHECK-LABEL: trunc_v8f32_broadcast:
1837; CHECK:       ## %bb.0:
1838; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to8}, %ymm0
1839; CHECK-NEXT:    retq
1840  %ps = load float, float* %ptr
1841  %pins = insertelement <8 x float> undef, float %ps, i32 0
1842  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1843  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1844  ret <8 x float> %t
1845}
1846
1847define <8 x double> @trunc_v8f64_broadcast(double* %ptr){
1848; CHECK-LABEL: trunc_v8f64_broadcast:
1849; CHECK:       ## %bb.0:
1850; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to8}, %zmm0
1851; CHECK-NEXT:    retq
1852  %ps = load double, double* %ptr
1853  %pins = insertelement <8 x double> undef, double %ps, i32 0
1854  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1855  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1856  ret <8 x double> %t
1857}
1858
1859define <16 x float> @trunc_v16f32_broadcast(float* %ptr) {
1860; CHECK-LABEL: trunc_v16f32_broadcast:
1861; CHECK:       ## %bb.0:
1862; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to16}, %zmm0
1863; CHECK-NEXT:    retq
1864  %ps = load float, float* %ptr
1865  %pins = insertelement <16 x float> undef, float %ps, i32 0
1866  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1867  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1868  ret <16 x float> %t
1869}
1870
1871define <2 x double> @trunc_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
1872; CHECK-LABEL: trunc_v2f64_mask_broadcast:
1873; CHECK:       ## %bb.0:
1874; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
1875; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1}
1876; CHECK-NEXT:    retq
1877  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1878  %ps = load double, double* %ptr
1879  %pins = insertelement <2 x double> undef, double %ps, i32 0
1880  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1881  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1882  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
1883  ret <2 x double> %s
1884}
1885
1886define <4 x float> @trunc_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
1887; CHECK-LABEL: trunc_v4f32_mask_broadcast:
1888; CHECK:       ## %bb.0:
1889; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
1890; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1}
1891; CHECK-NEXT:    retq
1892  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1893  %ps = load float, float* %ptr
1894  %pins = insertelement <4 x float> undef, float %ps, i32 0
1895  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1896  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1897  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
1898  ret <4 x float> %s
1899}
1900
1901define <4 x double> @trunc_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
1902; CHECK-LABEL: trunc_v4f64_mask_broadcast:
1903; CHECK:       ## %bb.0:
1904; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1905; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1}
1906; CHECK-NEXT:    retq
1907  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1908  %ps = load double, double* %ptr
1909  %pins = insertelement <4 x double> undef, double %ps, i32 0
1910  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
1911  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
1912  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
1913  ret <4 x double> %s
1914}
1915
1916define <8 x float> @trunc_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
1917; CHECK-LABEL: trunc_v8f32_mask_broadcast:
1918; CHECK:       ## %bb.0:
1919; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
1920; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1}
1921; CHECK-NEXT:    retq
1922  %c = icmp eq <8 x i32> %cmp, zeroinitializer
1923  %ps = load float, float* %ptr
1924  %pins = insertelement <8 x float> undef, float %ps, i32 0
1925  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
1926  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
1927  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
1928  ret <8 x float> %s
1929}
1930
1931define <8 x double> @trunc_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
1932; CHECK-LABEL: trunc_v8f64_mask_broadcast:
1933; CHECK:       ## %bb.0:
1934; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1935; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1}
1936; CHECK-NEXT:    retq
1937  %c = icmp eq <8 x i64> %cmp, zeroinitializer
1938  %ps = load double, double* %ptr
1939  %pins = insertelement <8 x double> undef, double %ps, i32 0
1940  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
1941  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
1942  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
1943  ret <8 x double> %s
1944}
1945
1946define <16 x float> @trunc_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
1947; CHECK-LABEL: trunc_v16f32_mask_broadcast:
1948; CHECK:       ## %bb.0:
1949; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
1950; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1}
1951; CHECK-NEXT:    retq
1952  %c = icmp eq <16 x i32> %cmp, zeroinitializer
1953  %ps = load float, float* %ptr
1954  %pins = insertelement <16 x float> undef, float %ps, i32 0
1955  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
1956  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
1957  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
1958  ret <16 x float> %s
1959}
1960
1961define <2 x double> @trunc_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
1962; CHECK-LABEL: trunc_v2f64_maskz_broadcast:
1963; CHECK:       ## %bb.0:
1964; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
1965; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1} {z}
1966; CHECK-NEXT:    retq
1967  %c = icmp eq <2 x i64> %cmp, zeroinitializer
1968  %ps = load double, double* %ptr
1969  %pins = insertelement <2 x double> undef, double %ps, i32 0
1970  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
1971  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
1972  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
1973  ret <2 x double> %s
1974}
1975
1976define <4 x float> @trunc_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
1977; CHECK-LABEL: trunc_v4f32_maskz_broadcast:
1978; CHECK:       ## %bb.0:
1979; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
1980; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1} {z}
1981; CHECK-NEXT:    retq
1982  %c = icmp eq <4 x i32> %cmp, zeroinitializer
1983  %ps = load float, float* %ptr
1984  %pins = insertelement <4 x float> undef, float %ps, i32 0
1985  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
1986  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
1987  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
1988  ret <4 x float> %s
1989}
1990
1991define <4 x double> @trunc_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
1992; CHECK-LABEL: trunc_v4f64_maskz_broadcast:
1993; CHECK:       ## %bb.0:
1994; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
1995; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1} {z}
1996; CHECK-NEXT:    retq
1997  %c = icmp eq <4 x i64> %cmp, zeroinitializer
1998  %ps = load double, double* %ptr
1999  %pins = insertelement <4 x double> undef, double %ps, i32 0
2000  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
2001  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
2002  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2003  ret <4 x double> %s
2004}
2005
2006define <8 x float> @trunc_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
2007; CHECK-LABEL: trunc_v8f32_maskz_broadcast:
2008; CHECK:       ## %bb.0:
2009; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
2010; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1} {z}
2011; CHECK-NEXT:    retq
2012  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2013  %ps = load float, float* %ptr
2014  %pins = insertelement <8 x float> undef, float %ps, i32 0
2015  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
2016  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
2017  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2018  ret <8 x float> %s
2019}
2020
2021define <8 x double> @trunc_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
2022; CHECK-LABEL: trunc_v8f64_maskz_broadcast:
2023; CHECK:       ## %bb.0:
2024; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
2025; CHECK-NEXT:    vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1} {z}
2026; CHECK-NEXT:    retq
2027  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2028  %ps = load double, double* %ptr
2029  %pins = insertelement <8 x double> undef, double %ps, i32 0
2030  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
2031  %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
2032  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2033  ret <8 x double> %s
2034}
2035
2036define <16 x float> @trunc_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
2037; CHECK-LABEL: trunc_v16f32_maskz_broadcast:
2038; CHECK:       ## %bb.0:
2039; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
2040; CHECK-NEXT:    vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1} {z}
2041; CHECK-NEXT:    retq
2042  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2043  %ps = load float, float* %ptr
2044  %pins = insertelement <16 x float> undef, float %ps, i32 0
2045  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
2046  %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
2047  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2048  ret <16 x float> %s
2049}
2050
2051define <2 x double> @rint_v2f64(<2 x double> %p) {
2052; CHECK-LABEL: rint_v2f64:
2053; CHECK:       ## %bb.0:
2054; CHECK-NEXT:    vroundpd $4, %xmm0, %xmm0
2055; CHECK-NEXT:    retq
2056  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2057  ret <2 x double> %t
2058}
2059
2060define <4 x float> @rint_v4f32(<4 x float> %p) {
2061; CHECK-LABEL: rint_v4f32:
2062; CHECK:       ## %bb.0:
2063; CHECK-NEXT:    vroundps $4, %xmm0, %xmm0
2064; CHECK-NEXT:    retq
2065  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2066  ret <4 x float> %t
2067}
2068
2069define <4 x double> @rint_v4f64(<4 x double> %p){
2070; CHECK-LABEL: rint_v4f64:
2071; CHECK:       ## %bb.0:
2072; CHECK-NEXT:    vroundpd $4, %ymm0, %ymm0
2073; CHECK-NEXT:    retq
2074  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2075  ret <4 x double> %t
2076}
2077
2078define <8 x float> @rint_v8f32(<8 x float> %p) {
2079; CHECK-LABEL: rint_v8f32:
2080; CHECK:       ## %bb.0:
2081; CHECK-NEXT:    vroundps $4, %ymm0, %ymm0
2082; CHECK-NEXT:    retq
2083  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2084  ret <8 x float> %t
2085}
2086
2087define <8 x double> @rint_v8f64(<8 x double> %p){
2088; CHECK-LABEL: rint_v8f64:
2089; CHECK:       ## %bb.0:
2090; CHECK-NEXT:    vrndscalepd $4, %zmm0, %zmm0
2091; CHECK-NEXT:    retq
2092  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2093  ret <8 x double> %t
2094}
2095
2096define <16 x float> @rint_v16f32(<16 x float> %p) {
2097; CHECK-LABEL: rint_v16f32:
2098; CHECK:       ## %bb.0:
2099; CHECK-NEXT:    vrndscaleps $4, %zmm0, %zmm0
2100; CHECK-NEXT:    retq
2101  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2102  ret <16 x float> %t
2103}
2104
2105define <2 x double> @rint_v2f64_load(<2 x double>* %ptr) {
2106; CHECK-LABEL: rint_v2f64_load:
2107; CHECK:       ## %bb.0:
2108; CHECK-NEXT:    vroundpd $4, (%rdi), %xmm0
2109; CHECK-NEXT:    retq
2110  %p = load <2 x double>, <2 x double>* %ptr
2111  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2112  ret <2 x double> %t
2113}
2114
2115define <4 x float> @rint_v4f32_load(<4 x float>* %ptr) {
2116; CHECK-LABEL: rint_v4f32_load:
2117; CHECK:       ## %bb.0:
2118; CHECK-NEXT:    vroundps $4, (%rdi), %xmm0
2119; CHECK-NEXT:    retq
2120  %p = load <4 x float>, <4 x float>* %ptr
2121  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2122  ret <4 x float> %t
2123}
2124
2125define <4 x double> @rint_v4f64_load(<4 x double>* %ptr){
2126; CHECK-LABEL: rint_v4f64_load:
2127; CHECK:       ## %bb.0:
2128; CHECK-NEXT:    vroundpd $4, (%rdi), %ymm0
2129; CHECK-NEXT:    retq
2130  %p = load <4 x double>, <4 x double>* %ptr
2131  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2132  ret <4 x double> %t
2133}
2134
2135define <8 x float> @rint_v8f32_load(<8 x float>* %ptr) {
2136; CHECK-LABEL: rint_v8f32_load:
2137; CHECK:       ## %bb.0:
2138; CHECK-NEXT:    vroundps $4, (%rdi), %ymm0
2139; CHECK-NEXT:    retq
2140  %p = load <8 x float>, <8 x float>* %ptr
2141  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2142  ret <8 x float> %t
2143}
2144
2145define <8 x double> @rint_v8f64_load(<8 x double>* %ptr){
2146; CHECK-LABEL: rint_v8f64_load:
2147; CHECK:       ## %bb.0:
2148; CHECK-NEXT:    vrndscalepd $4, (%rdi), %zmm0
2149; CHECK-NEXT:    retq
2150  %p = load <8 x double>, <8 x double>* %ptr
2151  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2152  ret <8 x double> %t
2153}
2154
2155define <16 x float> @rint_v16f32_load(<16 x float>* %ptr) {
2156; CHECK-LABEL: rint_v16f32_load:
2157; CHECK:       ## %bb.0:
2158; CHECK-NEXT:    vrndscaleps $4, (%rdi), %zmm0
2159; CHECK-NEXT:    retq
2160  %p = load <16 x float>, <16 x float>* %ptr
2161  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2162  ret <16 x float> %t
2163}
2164
2165define <2 x double> @rint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
2166; CHECK-LABEL: rint_v2f64_mask:
2167; CHECK:       ## %bb.0:
2168; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
2169; CHECK-NEXT:    vrndscalepd $4, %xmm0, %xmm1 {%k1}
2170; CHECK-NEXT:    vmovapd %xmm1, %xmm0
2171; CHECK-NEXT:    retq
2172  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2173  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2174  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2175  ret <2 x double> %s
2176}
2177
2178define <4 x float> @rint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
2179; CHECK-LABEL: rint_v4f32_mask:
2180; CHECK:       ## %bb.0:
2181; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
2182; CHECK-NEXT:    vrndscaleps $4, %xmm0, %xmm1 {%k1}
2183; CHECK-NEXT:    vmovaps %xmm1, %xmm0
2184; CHECK-NEXT:    retq
2185  %c = icmp eq <4 x i32> %cmp, zeroinitializer
2186  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2187  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
2188  ret <4 x float> %s
2189}
2190
2191define <4 x double> @rint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
2192; CHECK-LABEL: rint_v4f64_mask:
2193; CHECK:       ## %bb.0:
2194; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
2195; CHECK-NEXT:    vrndscalepd $4, %ymm0, %ymm1 {%k1}
2196; CHECK-NEXT:    vmovapd %ymm1, %ymm0
2197; CHECK-NEXT:    retq
2198  %c = icmp eq <4 x i64> %cmp, zeroinitializer
2199  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2200  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
2201  ret <4 x double> %s
2202}
2203
2204define <8 x float> @rint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
2205; CHECK-LABEL: rint_v8f32_mask:
2206; CHECK:       ## %bb.0:
2207; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
2208; CHECK-NEXT:    vrndscaleps $4, %ymm0, %ymm1 {%k1}
2209; CHECK-NEXT:    vmovaps %ymm1, %ymm0
2210; CHECK-NEXT:    retq
2211  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2212  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2213  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
2214  ret <8 x float> %s
2215}
2216
2217define <8 x double> @rint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
2218; CHECK-LABEL: rint_v8f64_mask:
2219; CHECK:       ## %bb.0:
2220; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
2221; CHECK-NEXT:    vrndscalepd $4, %zmm0, %zmm1 {%k1}
2222; CHECK-NEXT:    vmovapd %zmm1, %zmm0
2223; CHECK-NEXT:    retq
2224  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2225  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2226  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
2227  ret <8 x double> %s
2228}
2229
2230define <16 x float> @rint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
2231; CHECK-LABEL: rint_v16f32_mask:
2232; CHECK:       ## %bb.0:
2233; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
2234; CHECK-NEXT:    vrndscaleps $4, %zmm0, %zmm1 {%k1}
2235; CHECK-NEXT:    vmovaps %zmm1, %zmm0
2236; CHECK-NEXT:    retq
2237  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2238  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2239  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
2240  ret <16 x float> %s
2241}
2242
2243define <2 x double> @rint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
2244; CHECK-LABEL: rint_v2f64_maskz:
2245; CHECK:       ## %bb.0:
2246; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
2247; CHECK-NEXT:    vrndscalepd $4, %xmm0, %xmm0 {%k1} {z}
2248; CHECK-NEXT:    retq
2249  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2250  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2251  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
2252  ret <2 x double> %s
2253}
2254
2255define <4 x float> @rint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
2256; CHECK-LABEL: rint_v4f32_maskz:
2257; CHECK:       ## %bb.0:
2258; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
2259; CHECK-NEXT:    vrndscaleps $4, %xmm0, %xmm0 {%k1} {z}
2260; CHECK-NEXT:    retq
2261  %c = icmp eq <4 x i32> %cmp, zeroinitializer
2262  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2263  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
2264  ret <4 x float> %s
2265}
2266
2267define <4 x double> @rint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
2268; CHECK-LABEL: rint_v4f64_maskz:
2269; CHECK:       ## %bb.0:
2270; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
2271; CHECK-NEXT:    vrndscalepd $4, %ymm0, %ymm0 {%k1} {z}
2272; CHECK-NEXT:    retq
2273  %c = icmp eq <4 x i64> %cmp, zeroinitializer
2274  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2275  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2276  ret <4 x double> %s
2277}
2278
2279define <8 x float> @rint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
2280; CHECK-LABEL: rint_v8f32_maskz:
2281; CHECK:       ## %bb.0:
2282; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
2283; CHECK-NEXT:    vrndscaleps $4, %ymm0, %ymm0 {%k1} {z}
2284; CHECK-NEXT:    retq
2285  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2286  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2287  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2288  ret <8 x float> %s
2289}
2290
2291define <8 x double> @rint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
2292; CHECK-LABEL: rint_v8f64_maskz:
2293; CHECK:       ## %bb.0:
2294; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
2295; CHECK-NEXT:    vrndscalepd $4, %zmm0, %zmm0 {%k1} {z}
2296; CHECK-NEXT:    retq
2297  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2298  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2299  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2300  ret <8 x double> %s
2301}
2302
2303define <16 x float> @rint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
2304; CHECK-LABEL: rint_v16f32_maskz:
2305; CHECK:       ## %bb.0:
2306; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
2307; CHECK-NEXT:    vrndscaleps $4, %zmm0, %zmm0 {%k1} {z}
2308; CHECK-NEXT:    retq
2309  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2310  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2311  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2312  ret <16 x float> %s
2313}
2314
2315define <2 x double> @rint_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
2316; CHECK-LABEL: rint_v2f64_mask_load:
2317; CHECK:       ## %bb.0:
2318; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
2319; CHECK-NEXT:    vrndscalepd $4, (%rdi), %xmm0 {%k1}
2320; CHECK-NEXT:    retq
2321  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2322  %p = load <2 x double>, <2 x double>* %ptr
2323  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2324  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2325  ret <2 x double> %s
2326}
2327
2328define <4 x float> @rint_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
2329; CHECK-LABEL: rint_v4f32_mask_load:
2330; CHECK:       ## %bb.0:
2331; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
2332; CHECK-NEXT:    vrndscaleps $4, (%rdi), %xmm0 {%k1}
2333; CHECK-NEXT:    retq
2334  %c = icmp eq <4 x i32> %cmp, zeroinitializer
2335  %p = load <4 x float>, <4 x float>* %ptr
2336  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2337  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
2338  ret <4 x float> %s
2339}
2340
2341define <4 x double> @rint_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
2342; CHECK-LABEL: rint_v4f64_mask_load:
2343; CHECK:       ## %bb.0:
2344; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
2345; CHECK-NEXT:    vrndscalepd $4, (%rdi), %ymm0 {%k1}
2346; CHECK-NEXT:    retq
2347  %c = icmp eq <4 x i64> %cmp, zeroinitializer
2348  %p = load <4 x double>, <4 x double>* %ptr
2349  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2350  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
2351  ret <4 x double> %s
2352}
2353
2354define <8 x float> @rint_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
2355; CHECK-LABEL: rint_v8f32_mask_load:
2356; CHECK:       ## %bb.0:
2357; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
2358; CHECK-NEXT:    vrndscaleps $4, (%rdi), %ymm0 {%k1}
2359; CHECK-NEXT:    retq
2360  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2361  %p = load <8 x float>, <8 x float>* %ptr
2362  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2363  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
2364  ret <8 x float> %s
2365}
2366
2367define <8 x double> @rint_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
2368; CHECK-LABEL: rint_v8f64_mask_load:
2369; CHECK:       ## %bb.0:
2370; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
2371; CHECK-NEXT:    vrndscalepd $4, (%rdi), %zmm0 {%k1}
2372; CHECK-NEXT:    retq
2373  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2374  %p = load <8 x double>, <8 x double>* %ptr
2375  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2376  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
2377  ret <8 x double> %s
2378}
2379
2380define <16 x float> @rint_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
2381; CHECK-LABEL: rint_v16f32_mask_load:
2382; CHECK:       ## %bb.0:
2383; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
2384; CHECK-NEXT:    vrndscaleps $4, (%rdi), %zmm0 {%k1}
2385; CHECK-NEXT:    retq
2386  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2387  %p = load <16 x float>, <16 x float>* %ptr
2388  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2389  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
2390  ret <16 x float> %s
2391}
2392
2393define <2 x double> @rint_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
2394; CHECK-LABEL: rint_v2f64_maskz_load:
2395; CHECK:       ## %bb.0:
2396; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
2397; CHECK-NEXT:    vrndscalepd $4, (%rdi), %xmm0 {%k1} {z}
2398; CHECK-NEXT:    retq
2399  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2400  %p = load <2 x double>, <2 x double>* %ptr
2401  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2402  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
2403  ret <2 x double> %s
2404}
2405
2406define <4 x float> @rint_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
2407; CHECK-LABEL: rint_v4f32_maskz_load:
2408; CHECK:       ## %bb.0:
2409; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
2410; CHECK-NEXT:    vrndscaleps $4, (%rdi), %xmm0 {%k1} {z}
2411; CHECK-NEXT:    retq
2412  %c = icmp eq <4 x i32> %cmp, zeroinitializer
2413  %p = load <4 x float>, <4 x float>* %ptr
2414  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2415  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
2416  ret <4 x float> %s
2417}
2418
2419define <4 x double> @rint_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
2420; CHECK-LABEL: rint_v4f64_maskz_load:
2421; CHECK:       ## %bb.0:
2422; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
2423; CHECK-NEXT:    vrndscalepd $4, (%rdi), %ymm0 {%k1} {z}
2424; CHECK-NEXT:    retq
2425  %c = icmp eq <4 x i64> %cmp, zeroinitializer
2426  %p = load <4 x double>, <4 x double>* %ptr
2427  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2428  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2429  ret <4 x double> %s
2430}
2431
2432define <8 x float> @rint_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
2433; CHECK-LABEL: rint_v8f32_maskz_load:
2434; CHECK:       ## %bb.0:
2435; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
2436; CHECK-NEXT:    vrndscaleps $4, (%rdi), %ymm0 {%k1} {z}
2437; CHECK-NEXT:    retq
2438  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2439  %p = load <8 x float>, <8 x float>* %ptr
2440  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2441  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2442  ret <8 x float> %s
2443}
2444
2445define <8 x double> @rint_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
2446; CHECK-LABEL: rint_v8f64_maskz_load:
2447; CHECK:       ## %bb.0:
2448; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
2449; CHECK-NEXT:    vrndscalepd $4, (%rdi), %zmm0 {%k1} {z}
2450; CHECK-NEXT:    retq
2451  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2452  %p = load <8 x double>, <8 x double>* %ptr
2453  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2454  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2455  ret <8 x double> %s
2456}
2457
2458define <16 x float> @rint_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
2459; CHECK-LABEL: rint_v16f32_maskz_load:
2460; CHECK:       ## %bb.0:
2461; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
2462; CHECK-NEXT:    vrndscaleps $4, (%rdi), %zmm0 {%k1} {z}
2463; CHECK-NEXT:    retq
2464  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2465  %p = load <16 x float>, <16 x float>* %ptr
2466  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2467  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2468  ret <16 x float> %s
2469}
2470
2471define <2 x double> @rint_v2f64_broadcast(double* %ptr) {
2472; CHECK-LABEL: rint_v2f64_broadcast:
2473; CHECK:       ## %bb.0:
2474; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to2}, %xmm0
2475; CHECK-NEXT:    retq
2476  %ps = load double, double* %ptr
2477  %pins = insertelement <2 x double> undef, double %ps, i32 0
2478  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
2479  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2480  ret <2 x double> %t
2481}
2482
2483define <4 x float> @rint_v4f32_broadcast(float* %ptr) {
2484; CHECK-LABEL: rint_v4f32_broadcast:
2485; CHECK:       ## %bb.0:
2486; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to4}, %xmm0
2487; CHECK-NEXT:    retq
2488  %ps = load float, float* %ptr
2489  %pins = insertelement <4 x float> undef, float %ps, i32 0
2490  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
2491  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2492  ret <4 x float> %t
2493}
2494
2495define <4 x double> @rint_v4f64_broadcast(double* %ptr){
2496; CHECK-LABEL: rint_v4f64_broadcast:
2497; CHECK:       ## %bb.0:
2498; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to4}, %ymm0
2499; CHECK-NEXT:    retq
2500  %ps = load double, double* %ptr
2501  %pins = insertelement <4 x double> undef, double %ps, i32 0
2502  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
2503  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2504  ret <4 x double> %t
2505}
2506
2507define <8 x float> @rint_v8f32_broadcast(float* %ptr) {
2508; CHECK-LABEL: rint_v8f32_broadcast:
2509; CHECK:       ## %bb.0:
2510; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to8}, %ymm0
2511; CHECK-NEXT:    retq
2512  %ps = load float, float* %ptr
2513  %pins = insertelement <8 x float> undef, float %ps, i32 0
2514  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
2515  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2516  ret <8 x float> %t
2517}
2518
2519define <8 x double> @rint_v8f64_broadcast(double* %ptr){
2520; CHECK-LABEL: rint_v8f64_broadcast:
2521; CHECK:       ## %bb.0:
2522; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to8}, %zmm0
2523; CHECK-NEXT:    retq
2524  %ps = load double, double* %ptr
2525  %pins = insertelement <8 x double> undef, double %ps, i32 0
2526  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
2527  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2528  ret <8 x double> %t
2529}
2530
2531define <16 x float> @rint_v16f32_broadcast(float* %ptr) {
2532; CHECK-LABEL: rint_v16f32_broadcast:
2533; CHECK:       ## %bb.0:
2534; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to16}, %zmm0
2535; CHECK-NEXT:    retq
2536  %ps = load float, float* %ptr
2537  %pins = insertelement <16 x float> undef, float %ps, i32 0
2538  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
2539  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2540  ret <16 x float> %t
2541}
2542
2543define <2 x double> @rint_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
2544; CHECK-LABEL: rint_v2f64_mask_broadcast:
2545; CHECK:       ## %bb.0:
2546; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
2547; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1}
2548; CHECK-NEXT:    retq
2549  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2550  %ps = load double, double* %ptr
2551  %pins = insertelement <2 x double> undef, double %ps, i32 0
2552  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
2553  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2554  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2555  ret <2 x double> %s
2556}
2557
2558define <4 x float> @rint_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
2559; CHECK-LABEL: rint_v4f32_mask_broadcast:
2560; CHECK:       ## %bb.0:
2561; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
2562; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1}
2563; CHECK-NEXT:    retq
2564  %c = icmp eq <4 x i32> %cmp, zeroinitializer
2565  %ps = load float, float* %ptr
2566  %pins = insertelement <4 x float> undef, float %ps, i32 0
2567  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
2568  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2569  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
2570  ret <4 x float> %s
2571}
2572
2573define <4 x double> @rint_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
2574; CHECK-LABEL: rint_v4f64_mask_broadcast:
2575; CHECK:       ## %bb.0:
2576; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
2577; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1}
2578; CHECK-NEXT:    retq
2579  %c = icmp eq <4 x i64> %cmp, zeroinitializer
2580  %ps = load double, double* %ptr
2581  %pins = insertelement <4 x double> undef, double %ps, i32 0
2582  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
2583  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2584  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
2585  ret <4 x double> %s
2586}
2587
2588define <8 x float> @rint_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
2589; CHECK-LABEL: rint_v8f32_mask_broadcast:
2590; CHECK:       ## %bb.0:
2591; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
2592; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1}
2593; CHECK-NEXT:    retq
2594  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2595  %ps = load float, float* %ptr
2596  %pins = insertelement <8 x float> undef, float %ps, i32 0
2597  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
2598  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2599  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
2600  ret <8 x float> %s
2601}
2602
2603define <8 x double> @rint_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
2604; CHECK-LABEL: rint_v8f64_mask_broadcast:
2605; CHECK:       ## %bb.0:
2606; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
2607; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1}
2608; CHECK-NEXT:    retq
2609  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2610  %ps = load double, double* %ptr
2611  %pins = insertelement <8 x double> undef, double %ps, i32 0
2612  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
2613  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2614  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
2615  ret <8 x double> %s
2616}
2617
2618define <16 x float> @rint_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
2619; CHECK-LABEL: rint_v16f32_mask_broadcast:
2620; CHECK:       ## %bb.0:
2621; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
2622; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1}
2623; CHECK-NEXT:    retq
2624  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2625  %ps = load float, float* %ptr
2626  %pins = insertelement <16 x float> undef, float %ps, i32 0
2627  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
2628  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2629  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
2630  ret <16 x float> %s
2631}
2632
2633define <2 x double> @rint_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
2634; CHECK-LABEL: rint_v2f64_maskz_broadcast:
2635; CHECK:       ## %bb.0:
2636; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
2637; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1} {z}
2638; CHECK-NEXT:    retq
2639  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2640  %ps = load double, double* %ptr
2641  %pins = insertelement <2 x double> undef, double %ps, i32 0
2642  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
2643  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
2644  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
2645  ret <2 x double> %s
2646}
2647
2648define <4 x float> @rint_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
2649; CHECK-LABEL: rint_v4f32_maskz_broadcast:
2650; CHECK:       ## %bb.0:
2651; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
2652; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1} {z}
2653; CHECK-NEXT:    retq
2654  %c = icmp eq <4 x i32> %cmp, zeroinitializer
2655  %ps = load float, float* %ptr
2656  %pins = insertelement <4 x float> undef, float %ps, i32 0
2657  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
2658  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
2659  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
2660  ret <4 x float> %s
2661}
2662
2663define <4 x double> @rint_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
2664; CHECK-LABEL: rint_v4f64_maskz_broadcast:
2665; CHECK:       ## %bb.0:
2666; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
2667; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1} {z}
2668; CHECK-NEXT:    retq
2669  %c = icmp eq <4 x i64> %cmp, zeroinitializer
2670  %ps = load double, double* %ptr
2671  %pins = insertelement <4 x double> undef, double %ps, i32 0
2672  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
2673  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
2674  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2675  ret <4 x double> %s
2676}
2677
2678define <8 x float> @rint_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
2679; CHECK-LABEL: rint_v8f32_maskz_broadcast:
2680; CHECK:       ## %bb.0:
2681; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
2682; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1} {z}
2683; CHECK-NEXT:    retq
2684  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2685  %ps = load float, float* %ptr
2686  %pins = insertelement <8 x float> undef, float %ps, i32 0
2687  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
2688  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
2689  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2690  ret <8 x float> %s
2691}
2692
2693define <8 x double> @rint_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
2694; CHECK-LABEL: rint_v8f64_maskz_broadcast:
2695; CHECK:       ## %bb.0:
2696; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
2697; CHECK-NEXT:    vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1} {z}
2698; CHECK-NEXT:    retq
2699  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2700  %ps = load double, double* %ptr
2701  %pins = insertelement <8 x double> undef, double %ps, i32 0
2702  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
2703  %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p)
2704  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2705  ret <8 x double> %s
2706}
2707
2708define <16 x float> @rint_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
2709; CHECK-LABEL: rint_v16f32_maskz_broadcast:
2710; CHECK:       ## %bb.0:
2711; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
2712; CHECK-NEXT:    vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1} {z}
2713; CHECK-NEXT:    retq
2714  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2715  %ps = load float, float* %ptr
2716  %pins = insertelement <16 x float> undef, float %ps, i32 0
2717  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
2718  %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p)
2719  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2720  ret <16 x float> %s
2721}
2722
2723define <2 x double> @nearbyint_v2f64(<2 x double> %p) {
2724; CHECK-LABEL: nearbyint_v2f64:
2725; CHECK:       ## %bb.0:
2726; CHECK-NEXT:    vroundpd $12, %xmm0, %xmm0
2727; CHECK-NEXT:    retq
2728  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2729  ret <2 x double> %t
2730}
2731
2732define <4 x float> @nearbyint_v4f32(<4 x float> %p) {
2733; CHECK-LABEL: nearbyint_v4f32:
2734; CHECK:       ## %bb.0:
2735; CHECK-NEXT:    vroundps $12, %xmm0, %xmm0
2736; CHECK-NEXT:    retq
2737  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
2738  ret <4 x float> %t
2739}
2740
2741define <4 x double> @nearbyint_v4f64(<4 x double> %p){
2742; CHECK-LABEL: nearbyint_v4f64:
2743; CHECK:       ## %bb.0:
2744; CHECK-NEXT:    vroundpd $12, %ymm0, %ymm0
2745; CHECK-NEXT:    retq
2746  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
2747  ret <4 x double> %t
2748}
2749
2750define <8 x float> @nearbyint_v8f32(<8 x float> %p) {
2751; CHECK-LABEL: nearbyint_v8f32:
2752; CHECK:       ## %bb.0:
2753; CHECK-NEXT:    vroundps $12, %ymm0, %ymm0
2754; CHECK-NEXT:    retq
2755  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
2756  ret <8 x float> %t
2757}
2758
2759define <8 x double> @nearbyint_v8f64(<8 x double> %p){
2760; CHECK-LABEL: nearbyint_v8f64:
2761; CHECK:       ## %bb.0:
2762; CHECK-NEXT:    vrndscalepd $12, %zmm0, %zmm0
2763; CHECK-NEXT:    retq
2764  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
2765  ret <8 x double> %t
2766}
2767
2768define <16 x float> @nearbyint_v16f32(<16 x float> %p) {
2769; CHECK-LABEL: nearbyint_v16f32:
2770; CHECK:       ## %bb.0:
2771; CHECK-NEXT:    vrndscaleps $12, %zmm0, %zmm0
2772; CHECK-NEXT:    retq
2773  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
2774  ret <16 x float> %t
2775}
2776
2777define <2 x double> @nearbyint_v2f64_load(<2 x double>* %ptr) {
2778; CHECK-LABEL: nearbyint_v2f64_load:
2779; CHECK:       ## %bb.0:
2780; CHECK-NEXT:    vroundpd $12, (%rdi), %xmm0
2781; CHECK-NEXT:    retq
2782  %p = load <2 x double>, <2 x double>* %ptr
2783  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2784  ret <2 x double> %t
2785}
2786
2787define <4 x float> @nearbyint_v4f32_load(<4 x float>* %ptr) {
2788; CHECK-LABEL: nearbyint_v4f32_load:
2789; CHECK:       ## %bb.0:
2790; CHECK-NEXT:    vroundps $12, (%rdi), %xmm0
2791; CHECK-NEXT:    retq
2792  %p = load <4 x float>, <4 x float>* %ptr
2793  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
2794  ret <4 x float> %t
2795}
2796
2797define <4 x double> @nearbyint_v4f64_load(<4 x double>* %ptr){
2798; CHECK-LABEL: nearbyint_v4f64_load:
2799; CHECK:       ## %bb.0:
2800; CHECK-NEXT:    vroundpd $12, (%rdi), %ymm0
2801; CHECK-NEXT:    retq
2802  %p = load <4 x double>, <4 x double>* %ptr
2803  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
2804  ret <4 x double> %t
2805}
2806
2807define <8 x float> @nearbyint_v8f32_load(<8 x float>* %ptr) {
2808; CHECK-LABEL: nearbyint_v8f32_load:
2809; CHECK:       ## %bb.0:
2810; CHECK-NEXT:    vroundps $12, (%rdi), %ymm0
2811; CHECK-NEXT:    retq
2812  %p = load <8 x float>, <8 x float>* %ptr
2813  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
2814  ret <8 x float> %t
2815}
2816
2817define <8 x double> @nearbyint_v8f64_load(<8 x double>* %ptr){
2818; CHECK-LABEL: nearbyint_v8f64_load:
2819; CHECK:       ## %bb.0:
2820; CHECK-NEXT:    vrndscalepd $12, (%rdi), %zmm0
2821; CHECK-NEXT:    retq
2822  %p = load <8 x double>, <8 x double>* %ptr
2823  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
2824  ret <8 x double> %t
2825}
2826
2827define <16 x float> @nearbyint_v16f32_load(<16 x float>* %ptr) {
2828; CHECK-LABEL: nearbyint_v16f32_load:
2829; CHECK:       ## %bb.0:
2830; CHECK-NEXT:    vrndscaleps $12, (%rdi), %zmm0
2831; CHECK-NEXT:    retq
2832  %p = load <16 x float>, <16 x float>* %ptr
2833  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
2834  ret <16 x float> %t
2835}
2836
2837define <2 x double> @nearbyint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) {
2838; CHECK-LABEL: nearbyint_v2f64_mask:
2839; CHECK:       ## %bb.0:
2840; CHECK-NEXT:    vptestnmq %xmm2, %xmm2, %k1
2841; CHECK-NEXT:    vrndscalepd $12, %xmm0, %xmm1 {%k1}
2842; CHECK-NEXT:    vmovapd %xmm1, %xmm0
2843; CHECK-NEXT:    retq
2844  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2845  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2846  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2847  ret <2 x double> %s
2848}
2849
2850define <4 x float> @nearbyint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) {
2851; CHECK-LABEL: nearbyint_v4f32_mask:
2852; CHECK:       ## %bb.0:
2853; CHECK-NEXT:    vptestnmd %xmm2, %xmm2, %k1
2854; CHECK-NEXT:    vrndscaleps $12, %xmm0, %xmm1 {%k1}
2855; CHECK-NEXT:    vmovaps %xmm1, %xmm0
2856; CHECK-NEXT:    retq
2857  %c = icmp eq <4 x i32> %cmp, zeroinitializer
2858  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
2859  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
2860  ret <4 x float> %s
2861}
2862
2863define <4 x double> @nearbyint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) {
2864; CHECK-LABEL: nearbyint_v4f64_mask:
2865; CHECK:       ## %bb.0:
2866; CHECK-NEXT:    vptestnmq %ymm2, %ymm2, %k1
2867; CHECK-NEXT:    vrndscalepd $12, %ymm0, %ymm1 {%k1}
2868; CHECK-NEXT:    vmovapd %ymm1, %ymm0
2869; CHECK-NEXT:    retq
2870  %c = icmp eq <4 x i64> %cmp, zeroinitializer
2871  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
2872  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
2873  ret <4 x double> %s
2874}
2875
2876define <8 x float> @nearbyint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) {
2877; CHECK-LABEL: nearbyint_v8f32_mask:
2878; CHECK:       ## %bb.0:
2879; CHECK-NEXT:    vptestnmd %ymm2, %ymm2, %k1
2880; CHECK-NEXT:    vrndscaleps $12, %ymm0, %ymm1 {%k1}
2881; CHECK-NEXT:    vmovaps %ymm1, %ymm0
2882; CHECK-NEXT:    retq
2883  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2884  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
2885  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
2886  ret <8 x float> %s
2887}
2888
2889define <8 x double> @nearbyint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) {
2890; CHECK-LABEL: nearbyint_v8f64_mask:
2891; CHECK:       ## %bb.0:
2892; CHECK-NEXT:    vptestnmq %zmm2, %zmm2, %k1
2893; CHECK-NEXT:    vrndscalepd $12, %zmm0, %zmm1 {%k1}
2894; CHECK-NEXT:    vmovapd %zmm1, %zmm0
2895; CHECK-NEXT:    retq
2896  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2897  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
2898  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
2899  ret <8 x double> %s
2900}
2901
2902define <16 x float> @nearbyint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) {
2903; CHECK-LABEL: nearbyint_v16f32_mask:
2904; CHECK:       ## %bb.0:
2905; CHECK-NEXT:    vptestnmd %zmm2, %zmm2, %k1
2906; CHECK-NEXT:    vrndscaleps $12, %zmm0, %zmm1 {%k1}
2907; CHECK-NEXT:    vmovaps %zmm1, %zmm0
2908; CHECK-NEXT:    retq
2909  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2910  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
2911  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
2912  ret <16 x float> %s
2913}
2914
2915define <2 x double> @nearbyint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) {
2916; CHECK-LABEL: nearbyint_v2f64_maskz:
2917; CHECK:       ## %bb.0:
2918; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
2919; CHECK-NEXT:    vrndscalepd $12, %xmm0, %xmm0 {%k1} {z}
2920; CHECK-NEXT:    retq
2921  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2922  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2923  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
2924  ret <2 x double> %s
2925}
2926
2927define <4 x float> @nearbyint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) {
2928; CHECK-LABEL: nearbyint_v4f32_maskz:
2929; CHECK:       ## %bb.0:
2930; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
2931; CHECK-NEXT:    vrndscaleps $12, %xmm0, %xmm0 {%k1} {z}
2932; CHECK-NEXT:    retq
2933  %c = icmp eq <4 x i32> %cmp, zeroinitializer
2934  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
2935  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
2936  ret <4 x float> %s
2937}
2938
2939define <4 x double> @nearbyint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) {
2940; CHECK-LABEL: nearbyint_v4f64_maskz:
2941; CHECK:       ## %bb.0:
2942; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
2943; CHECK-NEXT:    vrndscalepd $12, %ymm0, %ymm0 {%k1} {z}
2944; CHECK-NEXT:    retq
2945  %c = icmp eq <4 x i64> %cmp, zeroinitializer
2946  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
2947  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
2948  ret <4 x double> %s
2949}
2950
2951define <8 x float> @nearbyint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) {
2952; CHECK-LABEL: nearbyint_v8f32_maskz:
2953; CHECK:       ## %bb.0:
2954; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
2955; CHECK-NEXT:    vrndscaleps $12, %ymm0, %ymm0 {%k1} {z}
2956; CHECK-NEXT:    retq
2957  %c = icmp eq <8 x i32> %cmp, zeroinitializer
2958  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
2959  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
2960  ret <8 x float> %s
2961}
2962
2963define <8 x double> @nearbyint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) {
2964; CHECK-LABEL: nearbyint_v8f64_maskz:
2965; CHECK:       ## %bb.0:
2966; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
2967; CHECK-NEXT:    vrndscalepd $12, %zmm0, %zmm0 {%k1} {z}
2968; CHECK-NEXT:    retq
2969  %c = icmp eq <8 x i64> %cmp, zeroinitializer
2970  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
2971  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
2972  ret <8 x double> %s
2973}
2974
2975define <16 x float> @nearbyint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) {
2976; CHECK-LABEL: nearbyint_v16f32_maskz:
2977; CHECK:       ## %bb.0:
2978; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
2979; CHECK-NEXT:    vrndscaleps $12, %zmm0, %zmm0 {%k1} {z}
2980; CHECK-NEXT:    retq
2981  %c = icmp eq <16 x i32> %cmp, zeroinitializer
2982  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
2983  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
2984  ret <16 x float> %s
2985}
2986
2987define <2 x double> @nearbyint_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
2988; CHECK-LABEL: nearbyint_v2f64_mask_load:
2989; CHECK:       ## %bb.0:
2990; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
2991; CHECK-NEXT:    vrndscalepd $12, (%rdi), %xmm0 {%k1}
2992; CHECK-NEXT:    retq
2993  %c = icmp eq <2 x i64> %cmp, zeroinitializer
2994  %p = load <2 x double>, <2 x double>* %ptr
2995  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
2996  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
2997  ret <2 x double> %s
2998}
2999
3000define <4 x float> @nearbyint_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
3001; CHECK-LABEL: nearbyint_v4f32_mask_load:
3002; CHECK:       ## %bb.0:
3003; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
3004; CHECK-NEXT:    vrndscaleps $12, (%rdi), %xmm0 {%k1}
3005; CHECK-NEXT:    retq
3006  %c = icmp eq <4 x i32> %cmp, zeroinitializer
3007  %p = load <4 x float>, <4 x float>* %ptr
3008  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3009  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
3010  ret <4 x float> %s
3011}
3012
3013define <4 x double> @nearbyint_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
3014; CHECK-LABEL: nearbyint_v4f64_mask_load:
3015; CHECK:       ## %bb.0:
3016; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
3017; CHECK-NEXT:    vrndscalepd $12, (%rdi), %ymm0 {%k1}
3018; CHECK-NEXT:    retq
3019  %c = icmp eq <4 x i64> %cmp, zeroinitializer
3020  %p = load <4 x double>, <4 x double>* %ptr
3021  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3022  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
3023  ret <4 x double> %s
3024}
3025
3026define <8 x float> @nearbyint_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
3027; CHECK-LABEL: nearbyint_v8f32_mask_load:
3028; CHECK:       ## %bb.0:
3029; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
3030; CHECK-NEXT:    vrndscaleps $12, (%rdi), %ymm0 {%k1}
3031; CHECK-NEXT:    retq
3032  %c = icmp eq <8 x i32> %cmp, zeroinitializer
3033  %p = load <8 x float>, <8 x float>* %ptr
3034  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3035  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
3036  ret <8 x float> %s
3037}
3038
3039define <8 x double> @nearbyint_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
3040; CHECK-LABEL: nearbyint_v8f64_mask_load:
3041; CHECK:       ## %bb.0:
3042; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
3043; CHECK-NEXT:    vrndscalepd $12, (%rdi), %zmm0 {%k1}
3044; CHECK-NEXT:    retq
3045  %c = icmp eq <8 x i64> %cmp, zeroinitializer
3046  %p = load <8 x double>, <8 x double>* %ptr
3047  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3048  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
3049  ret <8 x double> %s
3050}
3051
3052define <16 x float> @nearbyint_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
3053; CHECK-LABEL: nearbyint_v16f32_mask_load:
3054; CHECK:       ## %bb.0:
3055; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
3056; CHECK-NEXT:    vrndscaleps $12, (%rdi), %zmm0 {%k1}
3057; CHECK-NEXT:    retq
3058  %c = icmp eq <16 x i32> %cmp, zeroinitializer
3059  %p = load <16 x float>, <16 x float>* %ptr
3060  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3061  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
3062  ret <16 x float> %s
3063}
3064
3065define <2 x double> @nearbyint_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) {
3066; CHECK-LABEL: nearbyint_v2f64_maskz_load:
3067; CHECK:       ## %bb.0:
3068; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
3069; CHECK-NEXT:    vrndscalepd $12, (%rdi), %xmm0 {%k1} {z}
3070; CHECK-NEXT:    retq
3071  %c = icmp eq <2 x i64> %cmp, zeroinitializer
3072  %p = load <2 x double>, <2 x double>* %ptr
3073  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
3074  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
3075  ret <2 x double> %s
3076}
3077
3078define <4 x float> @nearbyint_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) {
3079; CHECK-LABEL: nearbyint_v4f32_maskz_load:
3080; CHECK:       ## %bb.0:
3081; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
3082; CHECK-NEXT:    vrndscaleps $12, (%rdi), %xmm0 {%k1} {z}
3083; CHECK-NEXT:    retq
3084  %c = icmp eq <4 x i32> %cmp, zeroinitializer
3085  %p = load <4 x float>, <4 x float>* %ptr
3086  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3087  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
3088  ret <4 x float> %s
3089}
3090
3091define <4 x double> @nearbyint_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) {
3092; CHECK-LABEL: nearbyint_v4f64_maskz_load:
3093; CHECK:       ## %bb.0:
3094; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
3095; CHECK-NEXT:    vrndscalepd $12, (%rdi), %ymm0 {%k1} {z}
3096; CHECK-NEXT:    retq
3097  %c = icmp eq <4 x i64> %cmp, zeroinitializer
3098  %p = load <4 x double>, <4 x double>* %ptr
3099  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3100  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
3101  ret <4 x double> %s
3102}
3103
3104define <8 x float> @nearbyint_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) {
3105; CHECK-LABEL: nearbyint_v8f32_maskz_load:
3106; CHECK:       ## %bb.0:
3107; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
3108; CHECK-NEXT:    vrndscaleps $12, (%rdi), %ymm0 {%k1} {z}
3109; CHECK-NEXT:    retq
3110  %c = icmp eq <8 x i32> %cmp, zeroinitializer
3111  %p = load <8 x float>, <8 x float>* %ptr
3112  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3113  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
3114  ret <8 x float> %s
3115}
3116
3117define <8 x double> @nearbyint_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) {
3118; CHECK-LABEL: nearbyint_v8f64_maskz_load:
3119; CHECK:       ## %bb.0:
3120; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
3121; CHECK-NEXT:    vrndscalepd $12, (%rdi), %zmm0 {%k1} {z}
3122; CHECK-NEXT:    retq
3123  %c = icmp eq <8 x i64> %cmp, zeroinitializer
3124  %p = load <8 x double>, <8 x double>* %ptr
3125  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3126  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
3127  ret <8 x double> %s
3128}
3129
3130define <16 x float> @nearbyint_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) {
3131; CHECK-LABEL: nearbyint_v16f32_maskz_load:
3132; CHECK:       ## %bb.0:
3133; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
3134; CHECK-NEXT:    vrndscaleps $12, (%rdi), %zmm0 {%k1} {z}
3135; CHECK-NEXT:    retq
3136  %c = icmp eq <16 x i32> %cmp, zeroinitializer
3137  %p = load <16 x float>, <16 x float>* %ptr
3138  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3139  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
3140  ret <16 x float> %s
3141}
3142
3143define <2 x double> @nearbyint_v2f64_broadcast(double* %ptr) {
3144; CHECK-LABEL: nearbyint_v2f64_broadcast:
3145; CHECK:       ## %bb.0:
3146; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to2}, %xmm0
3147; CHECK-NEXT:    retq
3148  %ps = load double, double* %ptr
3149  %pins = insertelement <2 x double> undef, double %ps, i32 0
3150  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
3151  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
3152  ret <2 x double> %t
3153}
3154
3155define <4 x float> @nearbyint_v4f32_broadcast(float* %ptr) {
3156; CHECK-LABEL: nearbyint_v4f32_broadcast:
3157; CHECK:       ## %bb.0:
3158; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to4}, %xmm0
3159; CHECK-NEXT:    retq
3160  %ps = load float, float* %ptr
3161  %pins = insertelement <4 x float> undef, float %ps, i32 0
3162  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
3163  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3164  ret <4 x float> %t
3165}
3166
3167define <4 x double> @nearbyint_v4f64_broadcast(double* %ptr){
3168; CHECK-LABEL: nearbyint_v4f64_broadcast:
3169; CHECK:       ## %bb.0:
3170; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to4}, %ymm0
3171; CHECK-NEXT:    retq
3172  %ps = load double, double* %ptr
3173  %pins = insertelement <4 x double> undef, double %ps, i32 0
3174  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
3175  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3176  ret <4 x double> %t
3177}
3178
3179define <8 x float> @nearbyint_v8f32_broadcast(float* %ptr) {
3180; CHECK-LABEL: nearbyint_v8f32_broadcast:
3181; CHECK:       ## %bb.0:
3182; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to8}, %ymm0
3183; CHECK-NEXT:    retq
3184  %ps = load float, float* %ptr
3185  %pins = insertelement <8 x float> undef, float %ps, i32 0
3186  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
3187  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3188  ret <8 x float> %t
3189}
3190
3191define <8 x double> @nearbyint_v8f64_broadcast(double* %ptr){
3192; CHECK-LABEL: nearbyint_v8f64_broadcast:
3193; CHECK:       ## %bb.0:
3194; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to8}, %zmm0
3195; CHECK-NEXT:    retq
3196  %ps = load double, double* %ptr
3197  %pins = insertelement <8 x double> undef, double %ps, i32 0
3198  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
3199  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3200  ret <8 x double> %t
3201}
3202
3203define <16 x float> @nearbyint_v16f32_broadcast(float* %ptr) {
3204; CHECK-LABEL: nearbyint_v16f32_broadcast:
3205; CHECK:       ## %bb.0:
3206; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to16}, %zmm0
3207; CHECK-NEXT:    retq
3208  %ps = load float, float* %ptr
3209  %pins = insertelement <16 x float> undef, float %ps, i32 0
3210  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
3211  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3212  ret <16 x float> %t
3213}
3214
3215define <2 x double> @nearbyint_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) {
3216; CHECK-LABEL: nearbyint_v2f64_mask_broadcast:
3217; CHECK:       ## %bb.0:
3218; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
3219; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1}
3220; CHECK-NEXT:    retq
3221  %c = icmp eq <2 x i64> %cmp, zeroinitializer
3222  %ps = load double, double* %ptr
3223  %pins = insertelement <2 x double> undef, double %ps, i32 0
3224  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
3225  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
3226  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru
3227  ret <2 x double> %s
3228}
3229
3230define <4 x float> @nearbyint_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) {
3231; CHECK-LABEL: nearbyint_v4f32_mask_broadcast:
3232; CHECK:       ## %bb.0:
3233; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
3234; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1}
3235; CHECK-NEXT:    retq
3236  %c = icmp eq <4 x i32> %cmp, zeroinitializer
3237  %ps = load float, float* %ptr
3238  %pins = insertelement <4 x float> undef, float %ps, i32 0
3239  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
3240  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3241  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru
3242  ret <4 x float> %s
3243}
3244
3245define <4 x double> @nearbyint_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) {
3246; CHECK-LABEL: nearbyint_v4f64_mask_broadcast:
3247; CHECK:       ## %bb.0:
3248; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
3249; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1}
3250; CHECK-NEXT:    retq
3251  %c = icmp eq <4 x i64> %cmp, zeroinitializer
3252  %ps = load double, double* %ptr
3253  %pins = insertelement <4 x double> undef, double %ps, i32 0
3254  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
3255  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3256  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru
3257  ret <4 x double> %s
3258}
3259
3260define <8 x float> @nearbyint_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) {
3261; CHECK-LABEL: nearbyint_v8f32_mask_broadcast:
3262; CHECK:       ## %bb.0:
3263; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
3264; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1}
3265; CHECK-NEXT:    retq
3266  %c = icmp eq <8 x i32> %cmp, zeroinitializer
3267  %ps = load float, float* %ptr
3268  %pins = insertelement <8 x float> undef, float %ps, i32 0
3269  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
3270  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3271  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru
3272  ret <8 x float> %s
3273}
3274
3275define <8 x double> @nearbyint_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) {
3276; CHECK-LABEL: nearbyint_v8f64_mask_broadcast:
3277; CHECK:       ## %bb.0:
3278; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
3279; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1}
3280; CHECK-NEXT:    retq
3281  %c = icmp eq <8 x i64> %cmp, zeroinitializer
3282  %ps = load double, double* %ptr
3283  %pins = insertelement <8 x double> undef, double %ps, i32 0
3284  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
3285  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3286  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru
3287  ret <8 x double> %s
3288}
3289
3290define <16 x float> @nearbyint_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) {
3291; CHECK-LABEL: nearbyint_v16f32_mask_broadcast:
3292; CHECK:       ## %bb.0:
3293; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
3294; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1}
3295; CHECK-NEXT:    retq
3296  %c = icmp eq <16 x i32> %cmp, zeroinitializer
3297  %ps = load float, float* %ptr
3298  %pins = insertelement <16 x float> undef, float %ps, i32 0
3299  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
3300  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3301  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru
3302  ret <16 x float> %s
3303}
3304
3305define <2 x double> @nearbyint_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) {
3306; CHECK-LABEL: nearbyint_v2f64_maskz_broadcast:
3307; CHECK:       ## %bb.0:
3308; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
3309; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1} {z}
3310; CHECK-NEXT:    retq
3311  %c = icmp eq <2 x i64> %cmp, zeroinitializer
3312  %ps = load double, double* %ptr
3313  %pins = insertelement <2 x double> undef, double %ps, i32 0
3314  %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer
3315  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
3316  %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer
3317  ret <2 x double> %s
3318}
3319
3320define <4 x float> @nearbyint_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) {
3321; CHECK-LABEL: nearbyint_v4f32_maskz_broadcast:
3322; CHECK:       ## %bb.0:
3323; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
3324; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1} {z}
3325; CHECK-NEXT:    retq
3326  %c = icmp eq <4 x i32> %cmp, zeroinitializer
3327  %ps = load float, float* %ptr
3328  %pins = insertelement <4 x float> undef, float %ps, i32 0
3329  %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer
3330  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
3331  %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer
3332  ret <4 x float> %s
3333}
3334
3335define <4 x double> @nearbyint_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) {
3336; CHECK-LABEL: nearbyint_v4f64_maskz_broadcast:
3337; CHECK:       ## %bb.0:
3338; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
3339; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1} {z}
3340; CHECK-NEXT:    retq
3341  %c = icmp eq <4 x i64> %cmp, zeroinitializer
3342  %ps = load double, double* %ptr
3343  %pins = insertelement <4 x double> undef, double %ps, i32 0
3344  %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer
3345  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
3346  %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer
3347  ret <4 x double> %s
3348}
3349
3350define <8 x float> @nearbyint_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) {
3351; CHECK-LABEL: nearbyint_v8f32_maskz_broadcast:
3352; CHECK:       ## %bb.0:
3353; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
3354; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1} {z}
3355; CHECK-NEXT:    retq
3356  %c = icmp eq <8 x i32> %cmp, zeroinitializer
3357  %ps = load float, float* %ptr
3358  %pins = insertelement <8 x float> undef, float %ps, i32 0
3359  %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer
3360  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
3361  %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer
3362  ret <8 x float> %s
3363}
3364
3365define <8 x double> @nearbyint_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) {
3366; CHECK-LABEL: nearbyint_v8f64_maskz_broadcast:
3367; CHECK:       ## %bb.0:
3368; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
3369; CHECK-NEXT:    vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1} {z}
3370; CHECK-NEXT:    retq
3371  %c = icmp eq <8 x i64> %cmp, zeroinitializer
3372  %ps = load double, double* %ptr
3373  %pins = insertelement <8 x double> undef, double %ps, i32 0
3374  %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer
3375  %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
3376  %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer
3377  ret <8 x double> %s
3378}
3379
3380define <16 x float> @nearbyint_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) {
3381; CHECK-LABEL: nearbyint_v16f32_maskz_broadcast:
3382; CHECK:       ## %bb.0:
3383; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
3384; CHECK-NEXT:    vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1} {z}
3385; CHECK-NEXT:    retq
3386  %c = icmp eq <16 x i32> %cmp, zeroinitializer
3387  %ps = load float, float* %ptr
3388  %pins = insertelement <16 x float> undef, float %ps, i32 0
3389  %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer
3390  %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
3391  %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer
3392  ret <16 x float> %s
3393}
3394