1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
3
4define <4 x double> @test_double_to_4(double %s) {
5; CHECK-LABEL: test_double_to_4:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
8; CHECK-NEXT:    retq
9  %vec = insertelement <2 x double> undef, double %s, i32 0
10  %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
11  ret <4 x double> %res
12}
13define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %default, <4 x double> %mask) {
14; CHECK-LABEL: test_masked_double_to_4_mask0:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
17; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
18; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1}
19; CHECK-NEXT:    vmovapd %ymm1, %ymm0
20; CHECK-NEXT:    retq
21  %vec = insertelement <2 x double> undef, double %s, i32 0
22  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
23  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
24  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
25  ret <4 x double> %res
26}
27
28define <4 x double> @test_masked_z_double_to_4_mask0(double %s, <4 x double> %mask) {
29; CHECK-LABEL: test_masked_z_double_to_4_mask0:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
32; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
33; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
34; CHECK-NEXT:    retq
35  %vec = insertelement <2 x double> undef, double %s, i32 0
36  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
37  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
38  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
39  ret <4 x double> %res
40}
41define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %default, <4 x double> %mask) {
42; CHECK-LABEL: test_masked_double_to_4_mask1:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
45; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
46; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1}
47; CHECK-NEXT:    vmovapd %ymm1, %ymm0
48; CHECK-NEXT:    retq
49  %vec = insertelement <2 x double> undef, double %s, i32 0
50  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
51  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
52  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
53  ret <4 x double> %res
54}
55
56define <4 x double> @test_masked_z_double_to_4_mask1(double %s, <4 x double> %mask) {
57; CHECK-LABEL: test_masked_z_double_to_4_mask1:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
60; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
61; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
62; CHECK-NEXT:    retq
63  %vec = insertelement <2 x double> undef, double %s, i32 0
64  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
65  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
66  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
67  ret <4 x double> %res
68}
69define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %default, <4 x double> %mask) {
70; CHECK-LABEL: test_masked_double_to_4_mask2:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
73; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
74; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1}
75; CHECK-NEXT:    vmovapd %ymm1, %ymm0
76; CHECK-NEXT:    retq
77  %vec = insertelement <2 x double> undef, double %s, i32 0
78  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
79  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
80  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
81  ret <4 x double> %res
82}
83
84define <4 x double> @test_masked_z_double_to_4_mask2(double %s, <4 x double> %mask) {
85; CHECK-LABEL: test_masked_z_double_to_4_mask2:
86; CHECK:       # %bb.0:
87; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
88; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
89; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
90; CHECK-NEXT:    retq
91  %vec = insertelement <2 x double> undef, double %s, i32 0
92  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
93  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
94  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
95  ret <4 x double> %res
96}
97define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %default, <4 x double> %mask) {
98; CHECK-LABEL: test_masked_double_to_4_mask3:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
101; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
102; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1}
103; CHECK-NEXT:    vmovapd %ymm1, %ymm0
104; CHECK-NEXT:    retq
105  %vec = insertelement <2 x double> undef, double %s, i32 0
106  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
107  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
108  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
109  ret <4 x double> %res
110}
111
112define <4 x double> @test_masked_z_double_to_4_mask3(double %s, <4 x double> %mask) {
113; CHECK-LABEL: test_masked_z_double_to_4_mask3:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
116; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
117; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
118; CHECK-NEXT:    retq
119  %vec = insertelement <2 x double> undef, double %s, i32 0
120  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
121  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
122  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
123  ret <4 x double> %res
124}
125define <8 x double> @test_double_to_8(double %s) {
126; CHECK-LABEL: test_double_to_8:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
129; CHECK-NEXT:    retq
130  %vec = insertelement <2 x double> undef, double %s, i32 0
131  %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
132  ret <8 x double> %res
133}
134define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %default, <8 x double> %mask) {
135; CHECK-LABEL: test_masked_double_to_8_mask0:
136; CHECK:       # %bb.0:
137; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
138; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
139; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
140; CHECK-NEXT:    vmovapd %zmm1, %zmm0
141; CHECK-NEXT:    retq
142  %vec = insertelement <2 x double> undef, double %s, i32 0
143  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
144  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
145  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
146  ret <8 x double> %res
147}
148
149define <8 x double> @test_masked_z_double_to_8_mask0(double %s, <8 x double> %mask) {
150; CHECK-LABEL: test_masked_z_double_to_8_mask0:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
153; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
154; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
155; CHECK-NEXT:    retq
156  %vec = insertelement <2 x double> undef, double %s, i32 0
157  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
158  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
159  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
160  ret <8 x double> %res
161}
162define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %default, <8 x double> %mask) {
163; CHECK-LABEL: test_masked_double_to_8_mask1:
164; CHECK:       # %bb.0:
165; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
166; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
167; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
168; CHECK-NEXT:    vmovapd %zmm1, %zmm0
169; CHECK-NEXT:    retq
170  %vec = insertelement <2 x double> undef, double %s, i32 0
171  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
172  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
173  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
174  ret <8 x double> %res
175}
176
177define <8 x double> @test_masked_z_double_to_8_mask1(double %s, <8 x double> %mask) {
178; CHECK-LABEL: test_masked_z_double_to_8_mask1:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
181; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
182; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
183; CHECK-NEXT:    retq
184  %vec = insertelement <2 x double> undef, double %s, i32 0
185  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
186  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
187  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
188  ret <8 x double> %res
189}
190define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %default, <8 x double> %mask) {
191; CHECK-LABEL: test_masked_double_to_8_mask2:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
194; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
195; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
196; CHECK-NEXT:    vmovapd %zmm1, %zmm0
197; CHECK-NEXT:    retq
198  %vec = insertelement <2 x double> undef, double %s, i32 0
199  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
200  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
201  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
202  ret <8 x double> %res
203}
204
205define <8 x double> @test_masked_z_double_to_8_mask2(double %s, <8 x double> %mask) {
206; CHECK-LABEL: test_masked_z_double_to_8_mask2:
207; CHECK:       # %bb.0:
208; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
209; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
210; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
211; CHECK-NEXT:    retq
212  %vec = insertelement <2 x double> undef, double %s, i32 0
213  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
214  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
215  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
216  ret <8 x double> %res
217}
218define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %default, <8 x double> %mask) {
219; CHECK-LABEL: test_masked_double_to_8_mask3:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
222; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
223; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
224; CHECK-NEXT:    vmovapd %zmm1, %zmm0
225; CHECK-NEXT:    retq
226  %vec = insertelement <2 x double> undef, double %s, i32 0
227  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
228  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
229  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
230  ret <8 x double> %res
231}
232
233define <8 x double> @test_masked_z_double_to_8_mask3(double %s, <8 x double> %mask) {
234; CHECK-LABEL: test_masked_z_double_to_8_mask3:
235; CHECK:       # %bb.0:
236; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
237; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
238; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
239; CHECK-NEXT:    retq
240  %vec = insertelement <2 x double> undef, double %s, i32 0
241  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
242  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
243  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
244  ret <8 x double> %res
245}
246define <4 x float> @test_float_to_4(float %s) {
247; CHECK-LABEL: test_float_to_4:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
250; CHECK-NEXT:    retq
251  %vec = insertelement <2 x float> undef, float %s, i32 0
252  %res = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
253  ret <4 x float> %res
254}
255define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default, <4 x float> %mask) {
256; CHECK-LABEL: test_masked_float_to_4_mask0:
257; CHECK:       # %bb.0:
258; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
259; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
260; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1}
261; CHECK-NEXT:    vmovaps %xmm1, %xmm0
262; CHECK-NEXT:    retq
263  %vec = insertelement <2 x float> undef, float %s, i32 0
264  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
265  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
266  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
267  ret <4 x float> %res
268}
269
270define <4 x float> @test_masked_z_float_to_4_mask0(float %s, <4 x float> %mask) {
271; CHECK-LABEL: test_masked_z_float_to_4_mask0:
272; CHECK:       # %bb.0:
273; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
274; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
275; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
276; CHECK-NEXT:    retq
277  %vec = insertelement <2 x float> undef, float %s, i32 0
278  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
279  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
280  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
281  ret <4 x float> %res
282}
283define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default, <4 x float> %mask) {
284; CHECK-LABEL: test_masked_float_to_4_mask1:
285; CHECK:       # %bb.0:
286; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
287; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
288; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1}
289; CHECK-NEXT:    vmovaps %xmm1, %xmm0
290; CHECK-NEXT:    retq
291  %vec = insertelement <2 x float> undef, float %s, i32 0
292  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
293  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
294  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
295  ret <4 x float> %res
296}
297
298define <4 x float> @test_masked_z_float_to_4_mask1(float %s, <4 x float> %mask) {
299; CHECK-LABEL: test_masked_z_float_to_4_mask1:
300; CHECK:       # %bb.0:
301; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
302; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
303; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
304; CHECK-NEXT:    retq
305  %vec = insertelement <2 x float> undef, float %s, i32 0
306  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
307  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
308  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
309  ret <4 x float> %res
310}
311define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default, <4 x float> %mask) {
312; CHECK-LABEL: test_masked_float_to_4_mask2:
313; CHECK:       # %bb.0:
314; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
315; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
316; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1}
317; CHECK-NEXT:    vmovaps %xmm1, %xmm0
318; CHECK-NEXT:    retq
319  %vec = insertelement <2 x float> undef, float %s, i32 0
320  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
321  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
322  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
323  ret <4 x float> %res
324}
325
326define <4 x float> @test_masked_z_float_to_4_mask2(float %s, <4 x float> %mask) {
327; CHECK-LABEL: test_masked_z_float_to_4_mask2:
328; CHECK:       # %bb.0:
329; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
330; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
331; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
332; CHECK-NEXT:    retq
333  %vec = insertelement <2 x float> undef, float %s, i32 0
334  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
335  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
336  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
337  ret <4 x float> %res
338}
339define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default, <4 x float> %mask) {
340; CHECK-LABEL: test_masked_float_to_4_mask3:
341; CHECK:       # %bb.0:
342; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
343; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
344; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1}
345; CHECK-NEXT:    vmovaps %xmm1, %xmm0
346; CHECK-NEXT:    retq
347  %vec = insertelement <2 x float> undef, float %s, i32 0
348  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
349  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
350  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
351  ret <4 x float> %res
352}
353
354define <4 x float> @test_masked_z_float_to_4_mask3(float %s, <4 x float> %mask) {
355; CHECK-LABEL: test_masked_z_float_to_4_mask3:
356; CHECK:       # %bb.0:
357; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
358; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
359; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
360; CHECK-NEXT:    retq
361  %vec = insertelement <2 x float> undef, float %s, i32 0
362  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
363  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
364  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
365  ret <4 x float> %res
366}
367define <8 x float> @test_float_to_8(float %s) {
368; CHECK-LABEL: test_float_to_8:
369; CHECK:       # %bb.0:
370; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
371; CHECK-NEXT:    retq
372  %vec = insertelement <2 x float> undef, float %s, i32 0
373  %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
374  ret <8 x float> %res
375}
376define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default, <8 x float> %mask) {
377; CHECK-LABEL: test_masked_float_to_8_mask0:
378; CHECK:       # %bb.0:
379; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
380; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
381; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1}
382; CHECK-NEXT:    vmovaps %ymm1, %ymm0
383; CHECK-NEXT:    retq
384  %vec = insertelement <2 x float> undef, float %s, i32 0
385  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
386  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
387  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
388  ret <8 x float> %res
389}
390
391define <8 x float> @test_masked_z_float_to_8_mask0(float %s, <8 x float> %mask) {
392; CHECK-LABEL: test_masked_z_float_to_8_mask0:
393; CHECK:       # %bb.0:
394; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
395; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
396; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
397; CHECK-NEXT:    retq
398  %vec = insertelement <2 x float> undef, float %s, i32 0
399  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
400  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
401  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
402  ret <8 x float> %res
403}
404define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default, <8 x float> %mask) {
405; CHECK-LABEL: test_masked_float_to_8_mask1:
406; CHECK:       # %bb.0:
407; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
408; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
409; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1}
410; CHECK-NEXT:    vmovaps %ymm1, %ymm0
411; CHECK-NEXT:    retq
412  %vec = insertelement <2 x float> undef, float %s, i32 0
413  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
414  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
415  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
416  ret <8 x float> %res
417}
418
419define <8 x float> @test_masked_z_float_to_8_mask1(float %s, <8 x float> %mask) {
420; CHECK-LABEL: test_masked_z_float_to_8_mask1:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
423; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
424; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
425; CHECK-NEXT:    retq
426  %vec = insertelement <2 x float> undef, float %s, i32 0
427  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
428  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
429  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
430  ret <8 x float> %res
431}
432define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default, <8 x float> %mask) {
433; CHECK-LABEL: test_masked_float_to_8_mask2:
434; CHECK:       # %bb.0:
435; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
436; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
437; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1}
438; CHECK-NEXT:    vmovaps %ymm1, %ymm0
439; CHECK-NEXT:    retq
440  %vec = insertelement <2 x float> undef, float %s, i32 0
441  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
442  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
443  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
444  ret <8 x float> %res
445}
446
447define <8 x float> @test_masked_z_float_to_8_mask2(float %s, <8 x float> %mask) {
448; CHECK-LABEL: test_masked_z_float_to_8_mask2:
449; CHECK:       # %bb.0:
450; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
451; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
452; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
453; CHECK-NEXT:    retq
454  %vec = insertelement <2 x float> undef, float %s, i32 0
455  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
456  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
457  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
458  ret <8 x float> %res
459}
460define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default, <8 x float> %mask) {
461; CHECK-LABEL: test_masked_float_to_8_mask3:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
464; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
465; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1}
466; CHECK-NEXT:    vmovaps %ymm1, %ymm0
467; CHECK-NEXT:    retq
468  %vec = insertelement <2 x float> undef, float %s, i32 0
469  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
470  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
471  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
472  ret <8 x float> %res
473}
474
475define <8 x float> @test_masked_z_float_to_8_mask3(float %s, <8 x float> %mask) {
476; CHECK-LABEL: test_masked_z_float_to_8_mask3:
477; CHECK:       # %bb.0:
478; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
479; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
480; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
481; CHECK-NEXT:    retq
482  %vec = insertelement <2 x float> undef, float %s, i32 0
483  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
484  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
485  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
486  ret <8 x float> %res
487}
488define <16 x float> @test_float_to_16(float %s) {
489; CHECK-LABEL: test_float_to_16:
490; CHECK:       # %bb.0:
491; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
492; CHECK-NEXT:    retq
493  %vec = insertelement <2 x float> undef, float %s, i32 0
494  %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
495  ret <16 x float> %res
496}
497define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %default, <16 x float> %mask) {
498; CHECK-LABEL: test_masked_float_to_16_mask0:
499; CHECK:       # %bb.0:
500; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
501; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
502; CHECK-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
503; CHECK-NEXT:    vmovaps %zmm1, %zmm0
504; CHECK-NEXT:    retq
505  %vec = insertelement <2 x float> undef, float %s, i32 0
506  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
507  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
508  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
509  ret <16 x float> %res
510}
511
512define <16 x float> @test_masked_z_float_to_16_mask0(float %s, <16 x float> %mask) {
513; CHECK-LABEL: test_masked_z_float_to_16_mask0:
514; CHECK:       # %bb.0:
515; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
516; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
517; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
518; CHECK-NEXT:    retq
519  %vec = insertelement <2 x float> undef, float %s, i32 0
520  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
521  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
522  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
523  ret <16 x float> %res
524}
525define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %default, <16 x float> %mask) {
526; CHECK-LABEL: test_masked_float_to_16_mask1:
527; CHECK:       # %bb.0:
528; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
529; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
530; CHECK-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
531; CHECK-NEXT:    vmovaps %zmm1, %zmm0
532; CHECK-NEXT:    retq
533  %vec = insertelement <2 x float> undef, float %s, i32 0
534  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
535  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
536  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
537  ret <16 x float> %res
538}
539
540define <16 x float> @test_masked_z_float_to_16_mask1(float %s, <16 x float> %mask) {
541; CHECK-LABEL: test_masked_z_float_to_16_mask1:
542; CHECK:       # %bb.0:
543; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
544; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
545; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
546; CHECK-NEXT:    retq
547  %vec = insertelement <2 x float> undef, float %s, i32 0
548  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
549  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
550  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
551  ret <16 x float> %res
552}
553define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %default, <16 x float> %mask) {
554; CHECK-LABEL: test_masked_float_to_16_mask2:
555; CHECK:       # %bb.0:
556; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
557; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
558; CHECK-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
559; CHECK-NEXT:    vmovaps %zmm1, %zmm0
560; CHECK-NEXT:    retq
561  %vec = insertelement <2 x float> undef, float %s, i32 0
562  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
563  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
564  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
565  ret <16 x float> %res
566}
567
568define <16 x float> @test_masked_z_float_to_16_mask2(float %s, <16 x float> %mask) {
569; CHECK-LABEL: test_masked_z_float_to_16_mask2:
570; CHECK:       # %bb.0:
571; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
572; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
573; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
574; CHECK-NEXT:    retq
575  %vec = insertelement <2 x float> undef, float %s, i32 0
576  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
577  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
578  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
579  ret <16 x float> %res
580}
581define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %default, <16 x float> %mask) {
582; CHECK-LABEL: test_masked_float_to_16_mask3:
583; CHECK:       # %bb.0:
584; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
585; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
586; CHECK-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
587; CHECK-NEXT:    vmovaps %zmm1, %zmm0
588; CHECK-NEXT:    retq
589  %vec = insertelement <2 x float> undef, float %s, i32 0
590  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
591  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
592  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
593  ret <16 x float> %res
594}
595
596define <16 x float> @test_masked_z_float_to_16_mask3(float %s, <16 x float> %mask) {
597; CHECK-LABEL: test_masked_z_float_to_16_mask3:
598; CHECK:       # %bb.0:
599; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
600; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
601; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
602; CHECK-NEXT:    retq
603  %vec = insertelement <2 x float> undef, float %s, i32 0
604  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
605  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
606  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
607  ret <16 x float> %res
608}
609define <4 x double> @test_double_to_4_mem(double* %p) {
610; CHECK-LABEL: test_double_to_4_mem:
611; CHECK:       # %bb.0:
612; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
613; CHECK-NEXT:    retq
614  %s = load double, double* %p
615  %vec = insertelement <2 x double> undef, double %s, i32 0
616  %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
617  ret <4 x double> %res
618}
619define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double> %default, <4 x double> %mask) {
620; CHECK-LABEL: test_masked_double_to_4_mem_mask0:
621; CHECK:       # %bb.0:
622; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
623; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
624; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
625; CHECK-NEXT:    retq
626  %s = load double, double* %p
627  %vec = insertelement <2 x double> undef, double %s, i32 0
628  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
629  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
630  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
631  ret <4 x double> %res
632}
633
634define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p, <4 x double> %mask) {
635; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0:
636; CHECK:       # %bb.0:
637; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
638; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
639; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1} {z}
640; CHECK-NEXT:    retq
641  %s = load double, double* %p
642  %vec = insertelement <2 x double> undef, double %s, i32 0
643  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
644  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
645  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
646  ret <4 x double> %res
647}
648define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double> %default, <4 x double> %mask) {
649; CHECK-LABEL: test_masked_double_to_4_mem_mask1:
650; CHECK:       # %bb.0:
651; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
652; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
653; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
654; CHECK-NEXT:    retq
655  %s = load double, double* %p
656  %vec = insertelement <2 x double> undef, double %s, i32 0
657  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
658  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
659  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
660  ret <4 x double> %res
661}
662
663define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p, <4 x double> %mask) {
664; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1:
665; CHECK:       # %bb.0:
666; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
667; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
668; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1} {z}
669; CHECK-NEXT:    retq
670  %s = load double, double* %p
671  %vec = insertelement <2 x double> undef, double %s, i32 0
672  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
673  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
674  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
675  ret <4 x double> %res
676}
677define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double> %default, <4 x double> %mask) {
678; CHECK-LABEL: test_masked_double_to_4_mem_mask2:
679; CHECK:       # %bb.0:
680; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
681; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
682; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
683; CHECK-NEXT:    retq
684  %s = load double, double* %p
685  %vec = insertelement <2 x double> undef, double %s, i32 0
686  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
687  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
688  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
689  ret <4 x double> %res
690}
691
692define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p, <4 x double> %mask) {
693; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2:
694; CHECK:       # %bb.0:
695; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
696; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
697; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1} {z}
698; CHECK-NEXT:    retq
699  %s = load double, double* %p
700  %vec = insertelement <2 x double> undef, double %s, i32 0
701  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
702  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
703  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
704  ret <4 x double> %res
705}
706define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double> %default, <4 x double> %mask) {
707; CHECK-LABEL: test_masked_double_to_4_mem_mask3:
708; CHECK:       # %bb.0:
709; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
710; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
711; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
712; CHECK-NEXT:    retq
713  %s = load double, double* %p
714  %vec = insertelement <2 x double> undef, double %s, i32 0
715  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
716  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
717  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
718  ret <4 x double> %res
719}
720
721define <4 x double> @test_masked_z_double_to_4_mem_mask3(double* %p, <4 x double> %mask) {
722; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3:
723; CHECK:       # %bb.0:
724; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
725; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
726; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1} {z}
727; CHECK-NEXT:    retq
728  %s = load double, double* %p
729  %vec = insertelement <2 x double> undef, double %s, i32 0
730  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
731  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
732  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
733  ret <4 x double> %res
734}
735define <8 x double> @test_double_to_8_mem(double* %p) {
736; CHECK-LABEL: test_double_to_8_mem:
737; CHECK:       # %bb.0:
738; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0
739; CHECK-NEXT:    retq
740  %s = load double, double* %p
741  %vec = insertelement <2 x double> undef, double %s, i32 0
742  %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
743  ret <8 x double> %res
744}
745define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double> %default, <8 x double> %mask) {
746; CHECK-LABEL: test_masked_double_to_8_mem_mask0:
747; CHECK:       # %bb.0:
748; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
749; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
750; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
751; CHECK-NEXT:    retq
752  %s = load double, double* %p
753  %vec = insertelement <2 x double> undef, double %s, i32 0
754  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
755  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
756  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
757  ret <8 x double> %res
758}
759
760define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p, <8 x double> %mask) {
761; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0:
762; CHECK:       # %bb.0:
763; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
764; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
765; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
766; CHECK-NEXT:    retq
767  %s = load double, double* %p
768  %vec = insertelement <2 x double> undef, double %s, i32 0
769  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
770  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
771  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
772  ret <8 x double> %res
773}
774define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double> %default, <8 x double> %mask) {
775; CHECK-LABEL: test_masked_double_to_8_mem_mask1:
776; CHECK:       # %bb.0:
777; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
778; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
779; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
780; CHECK-NEXT:    retq
781  %s = load double, double* %p
782  %vec = insertelement <2 x double> undef, double %s, i32 0
783  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
784  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
785  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
786  ret <8 x double> %res
787}
788
789define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p, <8 x double> %mask) {
790; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1:
791; CHECK:       # %bb.0:
792; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
793; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
794; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
795; CHECK-NEXT:    retq
796  %s = load double, double* %p
797  %vec = insertelement <2 x double> undef, double %s, i32 0
798  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
799  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
800  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
801  ret <8 x double> %res
802}
803define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double> %default, <8 x double> %mask) {
804; CHECK-LABEL: test_masked_double_to_8_mem_mask2:
805; CHECK:       # %bb.0:
806; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
807; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
808; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
809; CHECK-NEXT:    retq
810  %s = load double, double* %p
811  %vec = insertelement <2 x double> undef, double %s, i32 0
812  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
813  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
814  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
815  ret <8 x double> %res
816}
817
818define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p, <8 x double> %mask) {
819; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2:
820; CHECK:       # %bb.0:
821; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
822; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
823; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
824; CHECK-NEXT:    retq
825  %s = load double, double* %p
826  %vec = insertelement <2 x double> undef, double %s, i32 0
827  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
828  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
829  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
830  ret <8 x double> %res
831}
832define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double> %default, <8 x double> %mask) {
833; CHECK-LABEL: test_masked_double_to_8_mem_mask3:
834; CHECK:       # %bb.0:
835; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
836; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
837; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
838; CHECK-NEXT:    retq
839  %s = load double, double* %p
840  %vec = insertelement <2 x double> undef, double %s, i32 0
841  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
842  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
843  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
844  ret <8 x double> %res
845}
846
847define <8 x double> @test_masked_z_double_to_8_mem_mask3(double* %p, <8 x double> %mask) {
848; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3:
849; CHECK:       # %bb.0:
850; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
851; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
852; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
853; CHECK-NEXT:    retq
854  %s = load double, double* %p
855  %vec = insertelement <2 x double> undef, double %s, i32 0
856  %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
857  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
858  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
859  ret <8 x double> %res
860}
861define <4 x float> @test_float_to_4_mem(float* %p) {
862; CHECK-LABEL: test_float_to_4_mem:
863; CHECK:       # %bb.0:
864; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
865; CHECK-NEXT:    retq
866  %s = load float, float* %p
867  %vec = insertelement <2 x float> undef, float %s, i32 0
868  %res = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
869  ret <4 x float> %res
870}
871define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %default, <4 x float> %mask) {
872; CHECK-LABEL: test_masked_float_to_4_mem_mask0:
873; CHECK:       # %bb.0:
874; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
875; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
876; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
877; CHECK-NEXT:    retq
878  %s = load float, float* %p
879  %vec = insertelement <2 x float> undef, float %s, i32 0
880  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
881  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
882  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
883  ret <4 x float> %res
884}
885
886define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p, <4 x float> %mask) {
887; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0:
888; CHECK:       # %bb.0:
889; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
890; CHECK-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
891; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1} {z}
892; CHECK-NEXT:    retq
893  %s = load float, float* %p
894  %vec = insertelement <2 x float> undef, float %s, i32 0
895  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
896  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
897  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
898  ret <4 x float> %res
899}
900define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %default, <4 x float> %mask) {
901; CHECK-LABEL: test_masked_float_to_4_mem_mask1:
902; CHECK:       # %bb.0:
903; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
904; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
905; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
906; CHECK-NEXT:    retq
907  %s = load float, float* %p
908  %vec = insertelement <2 x float> undef, float %s, i32 0
909  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
910  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
911  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
912  ret <4 x float> %res
913}
914
915define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p, <4 x float> %mask) {
916; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1:
917; CHECK:       # %bb.0:
918; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
919; CHECK-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
920; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1} {z}
921; CHECK-NEXT:    retq
922  %s = load float, float* %p
923  %vec = insertelement <2 x float> undef, float %s, i32 0
924  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
925  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
926  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
927  ret <4 x float> %res
928}
929define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %default, <4 x float> %mask) {
930; CHECK-LABEL: test_masked_float_to_4_mem_mask2:
931; CHECK:       # %bb.0:
932; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
933; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
934; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
935; CHECK-NEXT:    retq
936  %s = load float, float* %p
937  %vec = insertelement <2 x float> undef, float %s, i32 0
938  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
939  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
940  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
941  ret <4 x float> %res
942}
943
944define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p, <4 x float> %mask) {
945; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2:
946; CHECK:       # %bb.0:
947; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
948; CHECK-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
949; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1} {z}
950; CHECK-NEXT:    retq
951  %s = load float, float* %p
952  %vec = insertelement <2 x float> undef, float %s, i32 0
953  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
954  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
955  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
956  ret <4 x float> %res
957}
958define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %default, <4 x float> %mask) {
959; CHECK-LABEL: test_masked_float_to_4_mem_mask3:
960; CHECK:       # %bb.0:
961; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
962; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
963; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
964; CHECK-NEXT:    retq
965  %s = load float, float* %p
966  %vec = insertelement <2 x float> undef, float %s, i32 0
967  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
968  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
969  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
970  ret <4 x float> %res
971}
972
973define <4 x float> @test_masked_z_float_to_4_mem_mask3(float* %p, <4 x float> %mask) {
974; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3:
975; CHECK:       # %bb.0:
976; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
977; CHECK-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
978; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1} {z}
979; CHECK-NEXT:    retq
980  %s = load float, float* %p
981  %vec = insertelement <2 x float> undef, float %s, i32 0
982  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
983  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
984  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
985  ret <4 x float> %res
986}
987define <8 x float> @test_float_to_8_mem(float* %p) {
988; CHECK-LABEL: test_float_to_8_mem:
989; CHECK:       # %bb.0:
990; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
991; CHECK-NEXT:    retq
992  %s = load float, float* %p
993  %vec = insertelement <2 x float> undef, float %s, i32 0
994  %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
995  ret <8 x float> %res
996}
997define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %default, <8 x float> %mask) {
998; CHECK-LABEL: test_masked_float_to_8_mem_mask0:
999; CHECK:       # %bb.0:
1000; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1001; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
1002; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
1003; CHECK-NEXT:    retq
1004  %s = load float, float* %p
1005  %vec = insertelement <2 x float> undef, float %s, i32 0
1006  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1007  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1008  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
1009  ret <8 x float> %res
1010}
1011
1012define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p, <8 x float> %mask) {
1013; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0:
1014; CHECK:       # %bb.0:
1015; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1016; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
1017; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1} {z}
1018; CHECK-NEXT:    retq
1019  %s = load float, float* %p
1020  %vec = insertelement <2 x float> undef, float %s, i32 0
1021  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1022  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1023  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1024  ret <8 x float> %res
1025}
1026define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %default, <8 x float> %mask) {
1027; CHECK-LABEL: test_masked_float_to_8_mem_mask1:
1028; CHECK:       # %bb.0:
1029; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1030; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
1031; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
1032; CHECK-NEXT:    retq
1033  %s = load float, float* %p
1034  %vec = insertelement <2 x float> undef, float %s, i32 0
1035  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1036  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1037  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
1038  ret <8 x float> %res
1039}
1040
1041define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p, <8 x float> %mask) {
1042; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1:
1043; CHECK:       # %bb.0:
1044; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1045; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
1046; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1} {z}
1047; CHECK-NEXT:    retq
1048  %s = load float, float* %p
1049  %vec = insertelement <2 x float> undef, float %s, i32 0
1050  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1051  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1052  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1053  ret <8 x float> %res
1054}
1055define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %default, <8 x float> %mask) {
1056; CHECK-LABEL: test_masked_float_to_8_mem_mask2:
1057; CHECK:       # %bb.0:
1058; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1059; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
1060; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
1061; CHECK-NEXT:    retq
1062  %s = load float, float* %p
1063  %vec = insertelement <2 x float> undef, float %s, i32 0
1064  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1065  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1066  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
1067  ret <8 x float> %res
1068}
1069
1070define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p, <8 x float> %mask) {
1071; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2:
1072; CHECK:       # %bb.0:
1073; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1074; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
1075; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1} {z}
1076; CHECK-NEXT:    retq
1077  %s = load float, float* %p
1078  %vec = insertelement <2 x float> undef, float %s, i32 0
1079  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1080  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1081  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1082  ret <8 x float> %res
1083}
1084define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %default, <8 x float> %mask) {
1085; CHECK-LABEL: test_masked_float_to_8_mem_mask3:
1086; CHECK:       # %bb.0:
1087; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1088; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
1089; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
1090; CHECK-NEXT:    retq
1091  %s = load float, float* %p
1092  %vec = insertelement <2 x float> undef, float %s, i32 0
1093  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1094  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1095  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
1096  ret <8 x float> %res
1097}
1098
1099define <8 x float> @test_masked_z_float_to_8_mem_mask3(float* %p, <8 x float> %mask) {
1100; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3:
1101; CHECK:       # %bb.0:
1102; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1103; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
1104; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1} {z}
1105; CHECK-NEXT:    retq
1106  %s = load float, float* %p
1107  %vec = insertelement <2 x float> undef, float %s, i32 0
1108  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1109  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1110  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1111  ret <8 x float> %res
1112}
1113define <16 x float> @test_float_to_16_mem(float* %p) {
1114; CHECK-LABEL: test_float_to_16_mem:
1115; CHECK:       # %bb.0:
1116; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0
1117; CHECK-NEXT:    retq
1118  %s = load float, float* %p
1119  %vec = insertelement <2 x float> undef, float %s, i32 0
1120  %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1121  ret <16 x float> %res
1122}
1123define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> %default, <16 x float> %mask) {
1124; CHECK-LABEL: test_masked_float_to_16_mem_mask0:
1125; CHECK:       # %bb.0:
1126; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1127; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1128; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
1129; CHECK-NEXT:    retq
1130  %s = load float, float* %p
1131  %vec = insertelement <2 x float> undef, float %s, i32 0
1132  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1133  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1134  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1135  ret <16 x float> %res
1136}
1137
1138define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p, <16 x float> %mask) {
1139; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0:
1140; CHECK:       # %bb.0:
1141; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1142; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1143; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
1144; CHECK-NEXT:    retq
1145  %s = load float, float* %p
1146  %vec = insertelement <2 x float> undef, float %s, i32 0
1147  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1148  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1149  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1150  ret <16 x float> %res
1151}
1152define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> %default, <16 x float> %mask) {
1153; CHECK-LABEL: test_masked_float_to_16_mem_mask1:
1154; CHECK:       # %bb.0:
1155; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1156; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1157; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
1158; CHECK-NEXT:    retq
1159  %s = load float, float* %p
1160  %vec = insertelement <2 x float> undef, float %s, i32 0
1161  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1162  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1163  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1164  ret <16 x float> %res
1165}
1166
1167define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p, <16 x float> %mask) {
1168; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1:
1169; CHECK:       # %bb.0:
1170; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1171; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1172; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
1173; CHECK-NEXT:    retq
1174  %s = load float, float* %p
1175  %vec = insertelement <2 x float> undef, float %s, i32 0
1176  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1177  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1178  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1179  ret <16 x float> %res
1180}
1181define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> %default, <16 x float> %mask) {
1182; CHECK-LABEL: test_masked_float_to_16_mem_mask2:
1183; CHECK:       # %bb.0:
1184; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1185; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1186; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
1187; CHECK-NEXT:    retq
1188  %s = load float, float* %p
1189  %vec = insertelement <2 x float> undef, float %s, i32 0
1190  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1191  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1192  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1193  ret <16 x float> %res
1194}
1195
1196define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p, <16 x float> %mask) {
1197; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2:
1198; CHECK:       # %bb.0:
1199; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1200; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1201; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
1202; CHECK-NEXT:    retq
1203  %s = load float, float* %p
1204  %vec = insertelement <2 x float> undef, float %s, i32 0
1205  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1206  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1207  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1208  ret <16 x float> %res
1209}
1210define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> %default, <16 x float> %mask) {
1211; CHECK-LABEL: test_masked_float_to_16_mem_mask3:
1212; CHECK:       # %bb.0:
1213; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1214; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
1215; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
1216; CHECK-NEXT:    retq
1217  %s = load float, float* %p
1218  %vec = insertelement <2 x float> undef, float %s, i32 0
1219  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1220  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1221  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1222  ret <16 x float> %res
1223}
1224
1225define <16 x float> @test_masked_z_float_to_16_mem_mask3(float* %p, <16 x float> %mask) {
1226; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3:
1227; CHECK:       # %bb.0:
1228; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1229; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
1230; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
1231; CHECK-NEXT:    retq
1232  %s = load float, float* %p
1233  %vec = insertelement <2 x float> undef, float %s, i32 0
1234  %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1235  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1236  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1237  ret <16 x float> %res
1238}
1239