1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s
3
4define <16 x i8> @test_i8_to_16(i8 %s) {
5; CHECK-LABEL: test_i8_to_16:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vpbroadcastb %edi, %xmm0
8; CHECK-NEXT:    retq
9  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
10  %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
11  ret <16 x i8> %res
12}
13define <16 x i8> @test_masked_i8_to_16_mask0(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
14; CHECK-LABEL: test_masked_i8_to_16_mask0:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
17; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
18; CHECK-NEXT:    retq
19  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
20  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
22  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
23  ret <16 x i8> %res
24}
25
26define <16 x i8> @test_masked_z_i8_to_16_mask0(i8 %s, <16 x i8> %mask) {
27; CHECK-LABEL: test_masked_z_i8_to_16_mask0:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
30; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} {z}
31; CHECK-NEXT:    retq
32  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
33  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
34  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
35  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
36  ret <16 x i8> %res
37}
38define <16 x i8> @test_masked_i8_to_16_mask1(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
39; CHECK-LABEL: test_masked_i8_to_16_mask1:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
42; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
43; CHECK-NEXT:    retq
44  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
45  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
46  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
47  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
48  ret <16 x i8> %res
49}
50
51define <16 x i8> @test_masked_z_i8_to_16_mask1(i8 %s, <16 x i8> %mask) {
52; CHECK-LABEL: test_masked_z_i8_to_16_mask1:
53; CHECK:       # %bb.0:
54; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
55; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} {z}
56; CHECK-NEXT:    retq
57  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
58  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
59  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
60  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
61  ret <16 x i8> %res
62}
63define <16 x i8> @test_masked_i8_to_16_mask2(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
64; CHECK-LABEL: test_masked_i8_to_16_mask2:
65; CHECK:       # %bb.0:
66; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
67; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
68; CHECK-NEXT:    retq
69  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
70  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
71  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
72  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
73  ret <16 x i8> %res
74}
75
76define <16 x i8> @test_masked_z_i8_to_16_mask2(i8 %s, <16 x i8> %mask) {
77; CHECK-LABEL: test_masked_z_i8_to_16_mask2:
78; CHECK:       # %bb.0:
79; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
80; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} {z}
81; CHECK-NEXT:    retq
82  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
83  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
84  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
85  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
86  ret <16 x i8> %res
87}
88define <16 x i8> @test_masked_i8_to_16_mask3(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
89; CHECK-LABEL: test_masked_i8_to_16_mask3:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
92; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
93; CHECK-NEXT:    retq
94  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
95  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
96  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
97  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
98  ret <16 x i8> %res
99}
100
101define <16 x i8> @test_masked_z_i8_to_16_mask3(i8 %s, <16 x i8> %mask) {
102; CHECK-LABEL: test_masked_z_i8_to_16_mask3:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
105; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} {z}
106; CHECK-NEXT:    retq
107  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
108  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
109  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
110  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
111  ret <16 x i8> %res
112}
113define <32 x i8> @test_i8_to_32(i8 %s) {
114; CHECK-LABEL: test_i8_to_32:
115; CHECK:       # %bb.0:
116; CHECK-NEXT:    vpbroadcastb %edi, %ymm0
117; CHECK-NEXT:    retq
118  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
119  %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
120  ret <32 x i8> %res
121}
122define <32 x i8> @test_masked_i8_to_32_mask0(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
123; CHECK-LABEL: test_masked_i8_to_32_mask0:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
126; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
127; CHECK-NEXT:    retq
128  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
129  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
130  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
131  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
132  ret <32 x i8> %res
133}
134
135define <32 x i8> @test_masked_z_i8_to_32_mask0(i8 %s, <32 x i8> %mask) {
136; CHECK-LABEL: test_masked_z_i8_to_32_mask0:
137; CHECK:       # %bb.0:
138; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
139; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} {z}
140; CHECK-NEXT:    retq
141  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
142  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
143  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
144  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
145  ret <32 x i8> %res
146}
147define <32 x i8> @test_masked_i8_to_32_mask1(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
148; CHECK-LABEL: test_masked_i8_to_32_mask1:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
151; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
152; CHECK-NEXT:    retq
153  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
154  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
155  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
156  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
157  ret <32 x i8> %res
158}
159
160define <32 x i8> @test_masked_z_i8_to_32_mask1(i8 %s, <32 x i8> %mask) {
161; CHECK-LABEL: test_masked_z_i8_to_32_mask1:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
164; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} {z}
165; CHECK-NEXT:    retq
166  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
167  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
168  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
169  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
170  ret <32 x i8> %res
171}
172define <32 x i8> @test_masked_i8_to_32_mask2(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
173; CHECK-LABEL: test_masked_i8_to_32_mask2:
174; CHECK:       # %bb.0:
175; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
176; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
177; CHECK-NEXT:    retq
178  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
179  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
180  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
181  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
182  ret <32 x i8> %res
183}
184
185define <32 x i8> @test_masked_z_i8_to_32_mask2(i8 %s, <32 x i8> %mask) {
186; CHECK-LABEL: test_masked_z_i8_to_32_mask2:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
189; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} {z}
190; CHECK-NEXT:    retq
191  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
192  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
193  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
194  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
195  ret <32 x i8> %res
196}
197define <32 x i8> @test_masked_i8_to_32_mask3(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
198; CHECK-LABEL: test_masked_i8_to_32_mask3:
199; CHECK:       # %bb.0:
200; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
201; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
202; CHECK-NEXT:    retq
203  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
204  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
205  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
206  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
207  ret <32 x i8> %res
208}
209
210define <32 x i8> @test_masked_z_i8_to_32_mask3(i8 %s, <32 x i8> %mask) {
211; CHECK-LABEL: test_masked_z_i8_to_32_mask3:
212; CHECK:       # %bb.0:
213; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
214; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} {z}
215; CHECK-NEXT:    retq
216  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
217  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
218  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
219  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
220  ret <32 x i8> %res
221}
222define <64 x i8> @test_i8_to_64(i8 %s) {
223; CHECK-LABEL: test_i8_to_64:
224; CHECK:       # %bb.0:
225; CHECK-NEXT:    vpbroadcastb %edi, %zmm0
226; CHECK-NEXT:    retq
227  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
228  %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
229  ret <64 x i8> %res
230}
231define <64 x i8> @test_masked_i8_to_64_mask0(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
232; CHECK-LABEL: test_masked_i8_to_64_mask0:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
235; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1}
236; CHECK-NEXT:    retq
237  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
238  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
239  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
240  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
241  ret <64 x i8> %res
242}
243
244define <64 x i8> @test_masked_z_i8_to_64_mask0(i8 %s, <64 x i8> %mask) {
245; CHECK-LABEL: test_masked_z_i8_to_64_mask0:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
248; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} {z}
249; CHECK-NEXT:    retq
250  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
251  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
252  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
253  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
254  ret <64 x i8> %res
255}
256define <64 x i8> @test_masked_i8_to_64_mask1(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
257; CHECK-LABEL: test_masked_i8_to_64_mask1:
258; CHECK:       # %bb.0:
259; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
260; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1}
261; CHECK-NEXT:    retq
262  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
263  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
264  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
265  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
266  ret <64 x i8> %res
267}
268
269define <64 x i8> @test_masked_z_i8_to_64_mask1(i8 %s, <64 x i8> %mask) {
270; CHECK-LABEL: test_masked_z_i8_to_64_mask1:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
273; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} {z}
274; CHECK-NEXT:    retq
275  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
276  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
277  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
278  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
279  ret <64 x i8> %res
280}
281define <64 x i8> @test_masked_i8_to_64_mask2(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
282; CHECK-LABEL: test_masked_i8_to_64_mask2:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
285; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1}
286; CHECK-NEXT:    retq
287  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
288  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
289  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
290  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
291  ret <64 x i8> %res
292}
293
294define <64 x i8> @test_masked_z_i8_to_64_mask2(i8 %s, <64 x i8> %mask) {
295; CHECK-LABEL: test_masked_z_i8_to_64_mask2:
296; CHECK:       # %bb.0:
297; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
298; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} {z}
299; CHECK-NEXT:    retq
300  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
301  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
302  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
303  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
304  ret <64 x i8> %res
305}
306define <64 x i8> @test_masked_i8_to_64_mask3(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
307; CHECK-LABEL: test_masked_i8_to_64_mask3:
308; CHECK:       # %bb.0:
309; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
310; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1}
311; CHECK-NEXT:    retq
312  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
313  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
314  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
315  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
316  ret <64 x i8> %res
317}
318
319define <64 x i8> @test_masked_z_i8_to_64_mask3(i8 %s, <64 x i8> %mask) {
320; CHECK-LABEL: test_masked_z_i8_to_64_mask3:
321; CHECK:       # %bb.0:
322; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
323; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} {z}
324; CHECK-NEXT:    retq
325  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
326  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
327  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
328  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
329  ret <64 x i8> %res
330}
331define <8 x i16> @test_i16_to_8(i16 %s) {
332; CHECK-LABEL: test_i16_to_8:
333; CHECK:       # %bb.0:
334; CHECK-NEXT:    vpbroadcastw %edi, %xmm0
335; CHECK-NEXT:    retq
336  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
337  %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
338  ret <8 x i16> %res
339}
340define <8 x i16> @test_masked_i16_to_8_mask0(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
341; CHECK-LABEL: test_masked_i16_to_8_mask0:
342; CHECK:       # %bb.0:
343; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
344; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
345; CHECK-NEXT:    retq
346  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
347  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
348  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
349  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
350  ret <8 x i16> %res
351}
352
353define <8 x i16> @test_masked_z_i16_to_8_mask0(i16 %s, <8 x i16> %mask) {
354; CHECK-LABEL: test_masked_z_i16_to_8_mask0:
355; CHECK:       # %bb.0:
356; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
357; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} {z}
358; CHECK-NEXT:    retq
359  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
360  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
361  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
362  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
363  ret <8 x i16> %res
364}
365define <8 x i16> @test_masked_i16_to_8_mask1(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
366; CHECK-LABEL: test_masked_i16_to_8_mask1:
367; CHECK:       # %bb.0:
368; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
369; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
370; CHECK-NEXT:    retq
371  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
372  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
373  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
374  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
375  ret <8 x i16> %res
376}
377
378define <8 x i16> @test_masked_z_i16_to_8_mask1(i16 %s, <8 x i16> %mask) {
379; CHECK-LABEL: test_masked_z_i16_to_8_mask1:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
382; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} {z}
383; CHECK-NEXT:    retq
384  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
385  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
386  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
387  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
388  ret <8 x i16> %res
389}
390define <8 x i16> @test_masked_i16_to_8_mask2(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
391; CHECK-LABEL: test_masked_i16_to_8_mask2:
392; CHECK:       # %bb.0:
393; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
394; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
395; CHECK-NEXT:    retq
396  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
397  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
398  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
399  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
400  ret <8 x i16> %res
401}
402
403define <8 x i16> @test_masked_z_i16_to_8_mask2(i16 %s, <8 x i16> %mask) {
404; CHECK-LABEL: test_masked_z_i16_to_8_mask2:
405; CHECK:       # %bb.0:
406; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
407; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} {z}
408; CHECK-NEXT:    retq
409  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
410  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
411  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
412  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
413  ret <8 x i16> %res
414}
415define <8 x i16> @test_masked_i16_to_8_mask3(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
416; CHECK-LABEL: test_masked_i16_to_8_mask3:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
419; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
420; CHECK-NEXT:    retq
421  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
422  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
423  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
424  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
425  ret <8 x i16> %res
426}
427
428define <8 x i16> @test_masked_z_i16_to_8_mask3(i16 %s, <8 x i16> %mask) {
429; CHECK-LABEL: test_masked_z_i16_to_8_mask3:
430; CHECK:       # %bb.0:
431; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
432; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} {z}
433; CHECK-NEXT:    retq
434  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
435  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
436  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
437  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
438  ret <8 x i16> %res
439}
440define <16 x i16> @test_i16_to_16(i16 %s) {
441; CHECK-LABEL: test_i16_to_16:
442; CHECK:       # %bb.0:
443; CHECK-NEXT:    vpbroadcastw %edi, %ymm0
444; CHECK-NEXT:    retq
445  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
446  %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
447  ret <16 x i16> %res
448}
449define <16 x i16> @test_masked_i16_to_16_mask0(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
450; CHECK-LABEL: test_masked_i16_to_16_mask0:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
453; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
454; CHECK-NEXT:    retq
455  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
456  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
457  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
458  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
459  ret <16 x i16> %res
460}
461
462define <16 x i16> @test_masked_z_i16_to_16_mask0(i16 %s, <16 x i16> %mask) {
463; CHECK-LABEL: test_masked_z_i16_to_16_mask0:
464; CHECK:       # %bb.0:
465; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
466; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} {z}
467; CHECK-NEXT:    retq
468  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
469  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
470  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
471  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
472  ret <16 x i16> %res
473}
474define <16 x i16> @test_masked_i16_to_16_mask1(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
475; CHECK-LABEL: test_masked_i16_to_16_mask1:
476; CHECK:       # %bb.0:
477; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
478; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
479; CHECK-NEXT:    retq
480  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
481  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
482  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
483  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
484  ret <16 x i16> %res
485}
486
487define <16 x i16> @test_masked_z_i16_to_16_mask1(i16 %s, <16 x i16> %mask) {
488; CHECK-LABEL: test_masked_z_i16_to_16_mask1:
489; CHECK:       # %bb.0:
490; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
491; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} {z}
492; CHECK-NEXT:    retq
493  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
494  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
495  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
496  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
497  ret <16 x i16> %res
498}
499define <16 x i16> @test_masked_i16_to_16_mask2(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
500; CHECK-LABEL: test_masked_i16_to_16_mask2:
501; CHECK:       # %bb.0:
502; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
503; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
504; CHECK-NEXT:    retq
505  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
506  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
507  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
508  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
509  ret <16 x i16> %res
510}
511
512define <16 x i16> @test_masked_z_i16_to_16_mask2(i16 %s, <16 x i16> %mask) {
513; CHECK-LABEL: test_masked_z_i16_to_16_mask2:
514; CHECK:       # %bb.0:
515; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
516; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} {z}
517; CHECK-NEXT:    retq
518  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
519  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
520  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
521  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
522  ret <16 x i16> %res
523}
524define <16 x i16> @test_masked_i16_to_16_mask3(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
525; CHECK-LABEL: test_masked_i16_to_16_mask3:
526; CHECK:       # %bb.0:
527; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
528; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
529; CHECK-NEXT:    retq
530  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
531  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
532  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
533  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
534  ret <16 x i16> %res
535}
536
537define <16 x i16> @test_masked_z_i16_to_16_mask3(i16 %s, <16 x i16> %mask) {
538; CHECK-LABEL: test_masked_z_i16_to_16_mask3:
539; CHECK:       # %bb.0:
540; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
541; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} {z}
542; CHECK-NEXT:    retq
543  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
544  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
545  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
546  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
547  ret <16 x i16> %res
548}
549define <32 x i16> @test_i16_to_32(i16 %s) {
550; CHECK-LABEL: test_i16_to_32:
551; CHECK:       # %bb.0:
552; CHECK-NEXT:    vpbroadcastw %edi, %zmm0
553; CHECK-NEXT:    retq
554  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
555  %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
556  ret <32 x i16> %res
557}
558define <32 x i16> @test_masked_i16_to_32_mask0(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
559; CHECK-LABEL: test_masked_i16_to_32_mask0:
560; CHECK:       # %bb.0:
561; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
562; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1}
563; CHECK-NEXT:    retq
564  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
565  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
566  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
567  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
568  ret <32 x i16> %res
569}
570
571define <32 x i16> @test_masked_z_i16_to_32_mask0(i16 %s, <32 x i16> %mask) {
572; CHECK-LABEL: test_masked_z_i16_to_32_mask0:
573; CHECK:       # %bb.0:
574; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
575; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} {z}
576; CHECK-NEXT:    retq
577  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
578  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
579  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
580  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
581  ret <32 x i16> %res
582}
583define <32 x i16> @test_masked_i16_to_32_mask1(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
584; CHECK-LABEL: test_masked_i16_to_32_mask1:
585; CHECK:       # %bb.0:
586; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
587; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1}
588; CHECK-NEXT:    retq
589  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
590  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
591  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
592  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
593  ret <32 x i16> %res
594}
595
596define <32 x i16> @test_masked_z_i16_to_32_mask1(i16 %s, <32 x i16> %mask) {
597; CHECK-LABEL: test_masked_z_i16_to_32_mask1:
598; CHECK:       # %bb.0:
599; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
600; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} {z}
601; CHECK-NEXT:    retq
602  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
603  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
604  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
605  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
606  ret <32 x i16> %res
607}
608define <32 x i16> @test_masked_i16_to_32_mask2(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
609; CHECK-LABEL: test_masked_i16_to_32_mask2:
610; CHECK:       # %bb.0:
611; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
612; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1}
613; CHECK-NEXT:    retq
614  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
615  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
616  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
617  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
618  ret <32 x i16> %res
619}
620
621define <32 x i16> @test_masked_z_i16_to_32_mask2(i16 %s, <32 x i16> %mask) {
622; CHECK-LABEL: test_masked_z_i16_to_32_mask2:
623; CHECK:       # %bb.0:
624; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
625; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} {z}
626; CHECK-NEXT:    retq
627  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
628  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
629  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
630  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
631  ret <32 x i16> %res
632}
633define <32 x i16> @test_masked_i16_to_32_mask3(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
634; CHECK-LABEL: test_masked_i16_to_32_mask3:
635; CHECK:       # %bb.0:
636; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
637; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1}
638; CHECK-NEXT:    retq
639  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
640  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
641  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
642  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
643  ret <32 x i16> %res
644}
645
646define <32 x i16> @test_masked_z_i16_to_32_mask3(i16 %s, <32 x i16> %mask) {
647; CHECK-LABEL: test_masked_z_i16_to_32_mask3:
648; CHECK:       # %bb.0:
649; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
650; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} {z}
651; CHECK-NEXT:    retq
652  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
653  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
654  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
655  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
656  ret <32 x i16> %res
657}
658define <4 x i32> @test_i32_to_4(i32 %s) {
659; CHECK-LABEL: test_i32_to_4:
660; CHECK:       # %bb.0:
661; CHECK-NEXT:    vpbroadcastd %edi, %xmm0
662; CHECK-NEXT:    retq
663  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
664  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
665  ret <4 x i32> %res
666}
667define <4 x i32> @test_masked_i32_to_4_mask0(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
668; CHECK-LABEL: test_masked_i32_to_4_mask0:
669; CHECK:       # %bb.0:
670; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
671; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
672; CHECK-NEXT:    retq
673  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
674  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
675  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
676  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
677  ret <4 x i32> %res
678}
679
680define <4 x i32> @test_masked_z_i32_to_4_mask0(i32 %s, <4 x i32> %mask) {
681; CHECK-LABEL: test_masked_z_i32_to_4_mask0:
682; CHECK:       # %bb.0:
683; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
684; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} {z}
685; CHECK-NEXT:    retq
686  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
687  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
688  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
689  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
690  ret <4 x i32> %res
691}
692define <4 x i32> @test_masked_i32_to_4_mask1(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
693; CHECK-LABEL: test_masked_i32_to_4_mask1:
694; CHECK:       # %bb.0:
695; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
696; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
697; CHECK-NEXT:    retq
698  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
699  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
700  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
701  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
702  ret <4 x i32> %res
703}
704
705define <4 x i32> @test_masked_z_i32_to_4_mask1(i32 %s, <4 x i32> %mask) {
706; CHECK-LABEL: test_masked_z_i32_to_4_mask1:
707; CHECK:       # %bb.0:
708; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
709; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} {z}
710; CHECK-NEXT:    retq
711  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
712  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
713  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
714  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
715  ret <4 x i32> %res
716}
717define <4 x i32> @test_masked_i32_to_4_mask2(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
718; CHECK-LABEL: test_masked_i32_to_4_mask2:
719; CHECK:       # %bb.0:
720; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
721; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
722; CHECK-NEXT:    retq
723  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
724  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
725  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
726  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
727  ret <4 x i32> %res
728}
729
730define <4 x i32> @test_masked_z_i32_to_4_mask2(i32 %s, <4 x i32> %mask) {
731; CHECK-LABEL: test_masked_z_i32_to_4_mask2:
732; CHECK:       # %bb.0:
733; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
734; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} {z}
735; CHECK-NEXT:    retq
736  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
737  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
738  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
739  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
740  ret <4 x i32> %res
741}
742define <4 x i32> @test_masked_i32_to_4_mask3(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
743; CHECK-LABEL: test_masked_i32_to_4_mask3:
744; CHECK:       # %bb.0:
745; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
746; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
747; CHECK-NEXT:    retq
748  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
749  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
750  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
751  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
752  ret <4 x i32> %res
753}
754
755define <4 x i32> @test_masked_z_i32_to_4_mask3(i32 %s, <4 x i32> %mask) {
756; CHECK-LABEL: test_masked_z_i32_to_4_mask3:
757; CHECK:       # %bb.0:
758; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
759; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} {z}
760; CHECK-NEXT:    retq
761  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
762  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
763  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
764  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
765  ret <4 x i32> %res
766}
767define <8 x i32> @test_i32_to_8(i32 %s) {
768; CHECK-LABEL: test_i32_to_8:
769; CHECK:       # %bb.0:
770; CHECK-NEXT:    vpbroadcastd %edi, %ymm0
771; CHECK-NEXT:    retq
772  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
773  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
774  ret <8 x i32> %res
775}
776define <8 x i32> @test_masked_i32_to_8_mask0(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
777; CHECK-LABEL: test_masked_i32_to_8_mask0:
778; CHECK:       # %bb.0:
779; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
780; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
781; CHECK-NEXT:    retq
782  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
783  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
784  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
785  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
786  ret <8 x i32> %res
787}
788
789define <8 x i32> @test_masked_z_i32_to_8_mask0(i32 %s, <8 x i32> %mask) {
790; CHECK-LABEL: test_masked_z_i32_to_8_mask0:
791; CHECK:       # %bb.0:
792; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
793; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} {z}
794; CHECK-NEXT:    retq
795  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
796  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
797  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
798  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
799  ret <8 x i32> %res
800}
801define <8 x i32> @test_masked_i32_to_8_mask1(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
802; CHECK-LABEL: test_masked_i32_to_8_mask1:
803; CHECK:       # %bb.0:
804; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
805; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
806; CHECK-NEXT:    retq
807  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
808  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
809  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
810  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
811  ret <8 x i32> %res
812}
813
814define <8 x i32> @test_masked_z_i32_to_8_mask1(i32 %s, <8 x i32> %mask) {
815; CHECK-LABEL: test_masked_z_i32_to_8_mask1:
816; CHECK:       # %bb.0:
817; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
818; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} {z}
819; CHECK-NEXT:    retq
820  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
821  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
822  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
823  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
824  ret <8 x i32> %res
825}
826define <8 x i32> @test_masked_i32_to_8_mask2(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
827; CHECK-LABEL: test_masked_i32_to_8_mask2:
828; CHECK:       # %bb.0:
829; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
830; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
831; CHECK-NEXT:    retq
832  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
833  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
834  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
835  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
836  ret <8 x i32> %res
837}
838
839define <8 x i32> @test_masked_z_i32_to_8_mask2(i32 %s, <8 x i32> %mask) {
840; CHECK-LABEL: test_masked_z_i32_to_8_mask2:
841; CHECK:       # %bb.0:
842; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
843; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} {z}
844; CHECK-NEXT:    retq
845  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
846  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
847  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
848  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
849  ret <8 x i32> %res
850}
851define <8 x i32> @test_masked_i32_to_8_mask3(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
852; CHECK-LABEL: test_masked_i32_to_8_mask3:
853; CHECK:       # %bb.0:
854; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
855; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
856; CHECK-NEXT:    retq
857  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
858  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
859  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
860  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
861  ret <8 x i32> %res
862}
863
864define <8 x i32> @test_masked_z_i32_to_8_mask3(i32 %s, <8 x i32> %mask) {
865; CHECK-LABEL: test_masked_z_i32_to_8_mask3:
866; CHECK:       # %bb.0:
867; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
868; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} {z}
869; CHECK-NEXT:    retq
870  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
871  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
872  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
873  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
874  ret <8 x i32> %res
875}
876define <16 x i32> @test_i32_to_16(i32 %s) {
877; CHECK-LABEL: test_i32_to_16:
878; CHECK:       # %bb.0:
879; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
880; CHECK-NEXT:    retq
881  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
882  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
883  ret <16 x i32> %res
884}
885define <16 x i32> @test_masked_i32_to_16_mask0(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
886; CHECK-LABEL: test_masked_i32_to_16_mask0:
887; CHECK:       # %bb.0:
888; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
889; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1}
890; CHECK-NEXT:    retq
891  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
892  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
893  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
894  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
895  ret <16 x i32> %res
896}
897
898define <16 x i32> @test_masked_z_i32_to_16_mask0(i32 %s, <16 x i32> %mask) {
899; CHECK-LABEL: test_masked_z_i32_to_16_mask0:
900; CHECK:       # %bb.0:
901; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
902; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} {z}
903; CHECK-NEXT:    retq
904  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
905  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
906  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
907  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
908  ret <16 x i32> %res
909}
910define <16 x i32> @test_masked_i32_to_16_mask1(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
911; CHECK-LABEL: test_masked_i32_to_16_mask1:
912; CHECK:       # %bb.0:
913; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
914; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1}
915; CHECK-NEXT:    retq
916  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
917  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
918  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
919  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
920  ret <16 x i32> %res
921}
922
923define <16 x i32> @test_masked_z_i32_to_16_mask1(i32 %s, <16 x i32> %mask) {
924; CHECK-LABEL: test_masked_z_i32_to_16_mask1:
925; CHECK:       # %bb.0:
926; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
927; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} {z}
928; CHECK-NEXT:    retq
929  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
930  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
931  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
932  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
933  ret <16 x i32> %res
934}
935define <16 x i32> @test_masked_i32_to_16_mask2(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
936; CHECK-LABEL: test_masked_i32_to_16_mask2:
937; CHECK:       # %bb.0:
938; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
939; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1}
940; CHECK-NEXT:    retq
941  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
942  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
943  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
944  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
945  ret <16 x i32> %res
946}
947
948define <16 x i32> @test_masked_z_i32_to_16_mask2(i32 %s, <16 x i32> %mask) {
949; CHECK-LABEL: test_masked_z_i32_to_16_mask2:
950; CHECK:       # %bb.0:
951; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
952; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} {z}
953; CHECK-NEXT:    retq
954  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
955  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
956  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
957  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
958  ret <16 x i32> %res
959}
960define <16 x i32> @test_masked_i32_to_16_mask3(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
961; CHECK-LABEL: test_masked_i32_to_16_mask3:
962; CHECK:       # %bb.0:
963; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
964; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1}
965; CHECK-NEXT:    retq
966  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
967  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
968  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
969  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
970  ret <16 x i32> %res
971}
972
973define <16 x i32> @test_masked_z_i32_to_16_mask3(i32 %s, <16 x i32> %mask) {
974; CHECK-LABEL: test_masked_z_i32_to_16_mask3:
975; CHECK:       # %bb.0:
976; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
977; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} {z}
978; CHECK-NEXT:    retq
979  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
980  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
981  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
982  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
983  ret <16 x i32> %res
984}
985define <2 x i64> @test_i64_to_2(i64 %s) {
986; CHECK-LABEL: test_i64_to_2:
987; CHECK:       # %bb.0:
988; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0
989; CHECK-NEXT:    retq
990  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
991  %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
992  ret <2 x i64> %res
993}
994define <2 x i64> @test_masked_i64_to_2_mask0(i64 %s, <2 x i64> %default, <2 x i64> %mask) {
995; CHECK-LABEL: test_masked_i64_to_2_mask0:
996; CHECK:       # %bb.0:
997; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
998; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1}
999; CHECK-NEXT:    retq
1000  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1001  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1002  %cmp = icmp eq <2 x i64> %mask, zeroinitializer
1003  %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
1004  ret <2 x i64> %res
1005}
1006
1007define <2 x i64> @test_masked_z_i64_to_2_mask0(i64 %s, <2 x i64> %mask) {
1008; CHECK-LABEL: test_masked_z_i64_to_2_mask0:
1009; CHECK:       # %bb.0:
1010; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
1011; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1} {z}
1012; CHECK-NEXT:    retq
1013  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1014  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1015  %cmp = icmp eq <2 x i64> %mask, zeroinitializer
1016  %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
1017  ret <2 x i64> %res
1018}
1019define <2 x i64> @test_masked_i64_to_2_mask1(i64 %s, <2 x i64> %default, <2 x i64> %mask) {
1020; CHECK-LABEL: test_masked_i64_to_2_mask1:
1021; CHECK:       # %bb.0:
1022; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
1023; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1}
1024; CHECK-NEXT:    retq
1025  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1026  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1027  %cmp = icmp eq <2 x i64> %mask, zeroinitializer
1028  %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
1029  ret <2 x i64> %res
1030}
1031
1032define <2 x i64> @test_masked_z_i64_to_2_mask1(i64 %s, <2 x i64> %mask) {
1033; CHECK-LABEL: test_masked_z_i64_to_2_mask1:
1034; CHECK:       # %bb.0:
1035; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
1036; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1} {z}
1037; CHECK-NEXT:    retq
1038  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1039  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
1040  %cmp = icmp eq <2 x i64> %mask, zeroinitializer
1041  %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
1042  ret <2 x i64> %res
1043}
1044define <4 x i64> @test_i64_to_4(i64 %s) {
1045; CHECK-LABEL: test_i64_to_4:
1046; CHECK:       # %bb.0:
1047; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0
1048; CHECK-NEXT:    retq
1049  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1050  %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1051  ret <4 x i64> %res
1052}
1053define <4 x i64> @test_masked_i64_to_4_mask0(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
1054; CHECK-LABEL: test_masked_i64_to_4_mask0:
1055; CHECK:       # %bb.0:
1056; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1057; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
1058; CHECK-NEXT:    retq
1059  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1060  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1061  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1062  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
1063  ret <4 x i64> %res
1064}
1065
1066define <4 x i64> @test_masked_z_i64_to_4_mask0(i64 %s, <4 x i64> %mask) {
1067; CHECK-LABEL: test_masked_z_i64_to_4_mask0:
1068; CHECK:       # %bb.0:
1069; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
1070; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} {z}
1071; CHECK-NEXT:    retq
1072  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1073  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1074  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1075  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1076  ret <4 x i64> %res
1077}
1078define <4 x i64> @test_masked_i64_to_4_mask1(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
1079; CHECK-LABEL: test_masked_i64_to_4_mask1:
1080; CHECK:       # %bb.0:
1081; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1082; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
1083; CHECK-NEXT:    retq
1084  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1085  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1086  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1087  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
1088  ret <4 x i64> %res
1089}
1090
1091define <4 x i64> @test_masked_z_i64_to_4_mask1(i64 %s, <4 x i64> %mask) {
1092; CHECK-LABEL: test_masked_z_i64_to_4_mask1:
1093; CHECK:       # %bb.0:
1094; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
1095; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} {z}
1096; CHECK-NEXT:    retq
1097  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1098  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1099  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1100  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1101  ret <4 x i64> %res
1102}
1103define <4 x i64> @test_masked_i64_to_4_mask2(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
1104; CHECK-LABEL: test_masked_i64_to_4_mask2:
1105; CHECK:       # %bb.0:
1106; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1107; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
1108; CHECK-NEXT:    retq
1109  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1110  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1111  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1112  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
1113  ret <4 x i64> %res
1114}
1115
1116define <4 x i64> @test_masked_z_i64_to_4_mask2(i64 %s, <4 x i64> %mask) {
1117; CHECK-LABEL: test_masked_z_i64_to_4_mask2:
1118; CHECK:       # %bb.0:
1119; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
1120; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} {z}
1121; CHECK-NEXT:    retq
1122  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1123  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1124  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1125  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1126  ret <4 x i64> %res
1127}
1128define <4 x i64> @test_masked_i64_to_4_mask3(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
1129; CHECK-LABEL: test_masked_i64_to_4_mask3:
1130; CHECK:       # %bb.0:
1131; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
1132; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
1133; CHECK-NEXT:    retq
1134  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1135  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1136  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1137  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
1138  ret <4 x i64> %res
1139}
1140
1141define <4 x i64> @test_masked_z_i64_to_4_mask3(i64 %s, <4 x i64> %mask) {
1142; CHECK-LABEL: test_masked_z_i64_to_4_mask3:
1143; CHECK:       # %bb.0:
1144; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
1145; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} {z}
1146; CHECK-NEXT:    retq
1147  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1148  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1149  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1150  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1151  ret <4 x i64> %res
1152}
1153define <8 x i64> @test_i64_to_8(i64 %s) {
1154; CHECK-LABEL: test_i64_to_8:
1155; CHECK:       # %bb.0:
1156; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
1157; CHECK-NEXT:    retq
1158  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1159  %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1160  ret <8 x i64> %res
1161}
1162define <8 x i64> @test_masked_i64_to_8_mask0(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
1163; CHECK-LABEL: test_masked_i64_to_8_mask0:
1164; CHECK:       # %bb.0:
1165; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1166; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1}
1167; CHECK-NEXT:    retq
1168  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1169  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1170  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1171  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1172  ret <8 x i64> %res
1173}
1174
1175define <8 x i64> @test_masked_z_i64_to_8_mask0(i64 %s, <8 x i64> %mask) {
1176; CHECK-LABEL: test_masked_z_i64_to_8_mask0:
1177; CHECK:       # %bb.0:
1178; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1179; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} {z}
1180; CHECK-NEXT:    retq
1181  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1182  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1183  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1184  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1185  ret <8 x i64> %res
1186}
1187define <8 x i64> @test_masked_i64_to_8_mask1(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
1188; CHECK-LABEL: test_masked_i64_to_8_mask1:
1189; CHECK:       # %bb.0:
1190; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1191; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1}
1192; CHECK-NEXT:    retq
1193  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1194  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1195  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1196  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1197  ret <8 x i64> %res
1198}
1199
1200define <8 x i64> @test_masked_z_i64_to_8_mask1(i64 %s, <8 x i64> %mask) {
1201; CHECK-LABEL: test_masked_z_i64_to_8_mask1:
1202; CHECK:       # %bb.0:
1203; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1204; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} {z}
1205; CHECK-NEXT:    retq
1206  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1207  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1208  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1209  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1210  ret <8 x i64> %res
1211}
1212define <8 x i64> @test_masked_i64_to_8_mask2(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
1213; CHECK-LABEL: test_masked_i64_to_8_mask2:
1214; CHECK:       # %bb.0:
1215; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1216; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1}
1217; CHECK-NEXT:    retq
1218  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1219  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1220  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1221  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1222  ret <8 x i64> %res
1223}
1224
1225define <8 x i64> @test_masked_z_i64_to_8_mask2(i64 %s, <8 x i64> %mask) {
1226; CHECK-LABEL: test_masked_z_i64_to_8_mask2:
1227; CHECK:       # %bb.0:
1228; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1229; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} {z}
1230; CHECK-NEXT:    retq
1231  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1232  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1233  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1234  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1235  ret <8 x i64> %res
1236}
1237define <8 x i64> @test_masked_i64_to_8_mask3(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
1238; CHECK-LABEL: test_masked_i64_to_8_mask3:
1239; CHECK:       # %bb.0:
1240; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
1241; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1}
1242; CHECK-NEXT:    retq
1243  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1244  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1245  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1246  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1247  ret <8 x i64> %res
1248}
1249
1250define <8 x i64> @test_masked_z_i64_to_8_mask3(i64 %s, <8 x i64> %mask) {
1251; CHECK-LABEL: test_masked_z_i64_to_8_mask3:
1252; CHECK:       # %bb.0:
1253; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
1254; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} {z}
1255; CHECK-NEXT:    retq
1256  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
1257  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1258  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1259  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1260  ret <8 x i64> %res
1261}
1262define <16 x i8> @test_i8_to_16_mem(i8* %p) {
1263; CHECK-LABEL: test_i8_to_16_mem:
1264; CHECK:       # %bb.0:
1265; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0
1266; CHECK-NEXT:    retq
1267  %s = load i8, i8* %p
1268  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1269  %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1270  ret <16 x i8> %res
1271}
1272define <16 x i8> @test_masked_i8_to_16_mem_mask0(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
1273; CHECK-LABEL: test_masked_i8_to_16_mem_mask0:
1274; CHECK:       # %bb.0:
1275; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
1276; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
1277; CHECK-NEXT:    retq
1278  %s = load i8, i8* %p
1279  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1280  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1281  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
1282  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
1283  ret <16 x i8> %res
1284}
1285
1286define <16 x i8> @test_masked_z_i8_to_16_mem_mask0(i8* %p, <16 x i8> %mask) {
1287; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask0:
1288; CHECK:       # %bb.0:
1289; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
1290; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1} {z}
1291; CHECK-NEXT:    retq
1292  %s = load i8, i8* %p
1293  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1294  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1295  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
1296  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
1297  ret <16 x i8> %res
1298}
1299define <16 x i8> @test_masked_i8_to_16_mem_mask1(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
1300; CHECK-LABEL: test_masked_i8_to_16_mem_mask1:
1301; CHECK:       # %bb.0:
1302; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
1303; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
1304; CHECK-NEXT:    retq
1305  %s = load i8, i8* %p
1306  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1307  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1308  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
1309  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
1310  ret <16 x i8> %res
1311}
1312
1313define <16 x i8> @test_masked_z_i8_to_16_mem_mask1(i8* %p, <16 x i8> %mask) {
1314; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask1:
1315; CHECK:       # %bb.0:
1316; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
1317; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1} {z}
1318; CHECK-NEXT:    retq
1319  %s = load i8, i8* %p
1320  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1321  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1322  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
1323  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
1324  ret <16 x i8> %res
1325}
1326define <16 x i8> @test_masked_i8_to_16_mem_mask2(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
1327; CHECK-LABEL: test_masked_i8_to_16_mem_mask2:
1328; CHECK:       # %bb.0:
1329; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
1330; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
1331; CHECK-NEXT:    retq
1332  %s = load i8, i8* %p
1333  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1334  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1335  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
1336  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
1337  ret <16 x i8> %res
1338}
1339
1340define <16 x i8> @test_masked_z_i8_to_16_mem_mask2(i8* %p, <16 x i8> %mask) {
1341; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask2:
1342; CHECK:       # %bb.0:
1343; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
1344; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1} {z}
1345; CHECK-NEXT:    retq
1346  %s = load i8, i8* %p
1347  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1348  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1349  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
1350  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
1351  ret <16 x i8> %res
1352}
1353define <16 x i8> @test_masked_i8_to_16_mem_mask3(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
1354; CHECK-LABEL: test_masked_i8_to_16_mem_mask3:
1355; CHECK:       # %bb.0:
1356; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
1357; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
1358; CHECK-NEXT:    retq
1359  %s = load i8, i8* %p
1360  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1361  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1362  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
1363  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
1364  ret <16 x i8> %res
1365}
1366
1367define <16 x i8> @test_masked_z_i8_to_16_mem_mask3(i8* %p, <16 x i8> %mask) {
1368; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask3:
1369; CHECK:       # %bb.0:
1370; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
1371; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1} {z}
1372; CHECK-NEXT:    retq
1373  %s = load i8, i8* %p
1374  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1375  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1376  %cmp = icmp eq <16 x i8> %mask, zeroinitializer
1377  %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
1378  ret <16 x i8> %res
1379}
1380define <32 x i8> @test_i8_to_32_mem(i8* %p) {
1381; CHECK-LABEL: test_i8_to_32_mem:
1382; CHECK:       # %bb.0:
1383; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0
1384; CHECK-NEXT:    retq
1385  %s = load i8, i8* %p
1386  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1387  %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1388  ret <32 x i8> %res
1389}
1390define <32 x i8> @test_masked_i8_to_32_mem_mask0(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
1391; CHECK-LABEL: test_masked_i8_to_32_mem_mask0:
1392; CHECK:       # %bb.0:
1393; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
1394; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
1395; CHECK-NEXT:    retq
1396  %s = load i8, i8* %p
1397  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1398  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1399  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
1400  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
1401  ret <32 x i8> %res
1402}
1403
1404define <32 x i8> @test_masked_z_i8_to_32_mem_mask0(i8* %p, <32 x i8> %mask) {
1405; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask0:
1406; CHECK:       # %bb.0:
1407; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
1408; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1} {z}
1409; CHECK-NEXT:    retq
1410  %s = load i8, i8* %p
1411  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1412  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1413  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
1414  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
1415  ret <32 x i8> %res
1416}
1417define <32 x i8> @test_masked_i8_to_32_mem_mask1(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
1418; CHECK-LABEL: test_masked_i8_to_32_mem_mask1:
1419; CHECK:       # %bb.0:
1420; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
1421; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
1422; CHECK-NEXT:    retq
1423  %s = load i8, i8* %p
1424  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1425  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1426  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
1427  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
1428  ret <32 x i8> %res
1429}
1430
1431define <32 x i8> @test_masked_z_i8_to_32_mem_mask1(i8* %p, <32 x i8> %mask) {
1432; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask1:
1433; CHECK:       # %bb.0:
1434; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
1435; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1} {z}
1436; CHECK-NEXT:    retq
1437  %s = load i8, i8* %p
1438  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1439  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1440  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
1441  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
1442  ret <32 x i8> %res
1443}
1444define <32 x i8> @test_masked_i8_to_32_mem_mask2(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
1445; CHECK-LABEL: test_masked_i8_to_32_mem_mask2:
1446; CHECK:       # %bb.0:
1447; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
1448; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
1449; CHECK-NEXT:    retq
1450  %s = load i8, i8* %p
1451  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1452  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1453  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
1454  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
1455  ret <32 x i8> %res
1456}
1457
1458define <32 x i8> @test_masked_z_i8_to_32_mem_mask2(i8* %p, <32 x i8> %mask) {
1459; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask2:
1460; CHECK:       # %bb.0:
1461; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
1462; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1} {z}
1463; CHECK-NEXT:    retq
1464  %s = load i8, i8* %p
1465  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1466  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1467  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
1468  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
1469  ret <32 x i8> %res
1470}
1471define <32 x i8> @test_masked_i8_to_32_mem_mask3(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
1472; CHECK-LABEL: test_masked_i8_to_32_mem_mask3:
1473; CHECK:       # %bb.0:
1474; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
1475; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
1476; CHECK-NEXT:    retq
1477  %s = load i8, i8* %p
1478  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1479  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1480  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
1481  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
1482  ret <32 x i8> %res
1483}
1484
1485define <32 x i8> @test_masked_z_i8_to_32_mem_mask3(i8* %p, <32 x i8> %mask) {
1486; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask3:
1487; CHECK:       # %bb.0:
1488; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
1489; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1} {z}
1490; CHECK-NEXT:    retq
1491  %s = load i8, i8* %p
1492  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1493  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1494  %cmp = icmp eq <32 x i8> %mask, zeroinitializer
1495  %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
1496  ret <32 x i8> %res
1497}
1498define <64 x i8> @test_i8_to_64_mem(i8* %p) {
1499; CHECK-LABEL: test_i8_to_64_mem:
1500; CHECK:       # %bb.0:
1501; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0
1502; CHECK-NEXT:    retq
1503  %s = load i8, i8* %p
1504  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1505  %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1506  ret <64 x i8> %res
1507}
1508define <64 x i8> @test_masked_i8_to_64_mem_mask0(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
1509; CHECK-LABEL: test_masked_i8_to_64_mem_mask0:
1510; CHECK:       # %bb.0:
1511; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
1512; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1}
1513; CHECK-NEXT:    retq
1514  %s = load i8, i8* %p
1515  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1516  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1517  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1518  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
1519  ret <64 x i8> %res
1520}
1521
1522define <64 x i8> @test_masked_z_i8_to_64_mem_mask0(i8* %p, <64 x i8> %mask) {
1523; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask0:
1524; CHECK:       # %bb.0:
1525; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
1526; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1} {z}
1527; CHECK-NEXT:    retq
1528  %s = load i8, i8* %p
1529  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1530  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1531  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1532  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
1533  ret <64 x i8> %res
1534}
1535define <64 x i8> @test_masked_i8_to_64_mem_mask1(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
1536; CHECK-LABEL: test_masked_i8_to_64_mem_mask1:
1537; CHECK:       # %bb.0:
1538; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
1539; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1}
1540; CHECK-NEXT:    retq
1541  %s = load i8, i8* %p
1542  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1543  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1544  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1545  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
1546  ret <64 x i8> %res
1547}
1548
1549define <64 x i8> @test_masked_z_i8_to_64_mem_mask1(i8* %p, <64 x i8> %mask) {
1550; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask1:
1551; CHECK:       # %bb.0:
1552; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
1553; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1} {z}
1554; CHECK-NEXT:    retq
1555  %s = load i8, i8* %p
1556  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1557  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1558  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1559  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
1560  ret <64 x i8> %res
1561}
1562define <64 x i8> @test_masked_i8_to_64_mem_mask2(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
1563; CHECK-LABEL: test_masked_i8_to_64_mem_mask2:
1564; CHECK:       # %bb.0:
1565; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
1566; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1}
1567; CHECK-NEXT:    retq
1568  %s = load i8, i8* %p
1569  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1570  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1571  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1572  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
1573  ret <64 x i8> %res
1574}
1575
1576define <64 x i8> @test_masked_z_i8_to_64_mem_mask2(i8* %p, <64 x i8> %mask) {
1577; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask2:
1578; CHECK:       # %bb.0:
1579; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
1580; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1} {z}
1581; CHECK-NEXT:    retq
1582  %s = load i8, i8* %p
1583  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1584  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1585  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1586  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
1587  ret <64 x i8> %res
1588}
1589define <64 x i8> @test_masked_i8_to_64_mem_mask3(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
1590; CHECK-LABEL: test_masked_i8_to_64_mem_mask3:
1591; CHECK:       # %bb.0:
1592; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
1593; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1}
1594; CHECK-NEXT:    retq
1595  %s = load i8, i8* %p
1596  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1597  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1598  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1599  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
1600  ret <64 x i8> %res
1601}
1602
1603define <64 x i8> @test_masked_z_i8_to_64_mem_mask3(i8* %p, <64 x i8> %mask) {
1604; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask3:
1605; CHECK:       # %bb.0:
1606; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
1607; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1} {z}
1608; CHECK-NEXT:    retq
1609  %s = load i8, i8* %p
1610  %vec = insertelement <2 x i8> undef, i8 %s, i32 0
1611  %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1612  %cmp = icmp eq <64 x i8> %mask, zeroinitializer
1613  %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
1614  ret <64 x i8> %res
1615}
1616define <8 x i16> @test_i16_to_8_mem(i16* %p) {
1617; CHECK-LABEL: test_i16_to_8_mem:
1618; CHECK:       # %bb.0:
1619; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0
1620; CHECK-NEXT:    retq
1621  %s = load i16, i16* %p
1622  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1623  %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1624  ret <8 x i16> %res
1625}
1626define <8 x i16> @test_masked_i16_to_8_mem_mask0(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
1627; CHECK-LABEL: test_masked_i16_to_8_mem_mask0:
1628; CHECK:       # %bb.0:
1629; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
1630; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
1631; CHECK-NEXT:    retq
1632  %s = load i16, i16* %p
1633  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1634  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1635  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1636  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
1637  ret <8 x i16> %res
1638}
1639
1640define <8 x i16> @test_masked_z_i16_to_8_mem_mask0(i16* %p, <8 x i16> %mask) {
1641; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask0:
1642; CHECK:       # %bb.0:
1643; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
1644; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1} {z}
1645; CHECK-NEXT:    retq
1646  %s = load i16, i16* %p
1647  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1648  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1649  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1650  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1651  ret <8 x i16> %res
1652}
1653define <8 x i16> @test_masked_i16_to_8_mem_mask1(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
1654; CHECK-LABEL: test_masked_i16_to_8_mem_mask1:
1655; CHECK:       # %bb.0:
1656; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
1657; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
1658; CHECK-NEXT:    retq
1659  %s = load i16, i16* %p
1660  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1661  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1662  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1663  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
1664  ret <8 x i16> %res
1665}
1666
1667define <8 x i16> @test_masked_z_i16_to_8_mem_mask1(i16* %p, <8 x i16> %mask) {
1668; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask1:
1669; CHECK:       # %bb.0:
1670; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
1671; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1} {z}
1672; CHECK-NEXT:    retq
1673  %s = load i16, i16* %p
1674  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1675  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1676  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1677  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1678  ret <8 x i16> %res
1679}
1680define <8 x i16> @test_masked_i16_to_8_mem_mask2(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
1681; CHECK-LABEL: test_masked_i16_to_8_mem_mask2:
1682; CHECK:       # %bb.0:
1683; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
1684; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
1685; CHECK-NEXT:    retq
1686  %s = load i16, i16* %p
1687  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1688  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1689  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1690  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
1691  ret <8 x i16> %res
1692}
1693
1694define <8 x i16> @test_masked_z_i16_to_8_mem_mask2(i16* %p, <8 x i16> %mask) {
1695; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask2:
1696; CHECK:       # %bb.0:
1697; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
1698; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1} {z}
1699; CHECK-NEXT:    retq
1700  %s = load i16, i16* %p
1701  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1702  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1703  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1704  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1705  ret <8 x i16> %res
1706}
1707define <8 x i16> @test_masked_i16_to_8_mem_mask3(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
1708; CHECK-LABEL: test_masked_i16_to_8_mem_mask3:
1709; CHECK:       # %bb.0:
1710; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
1711; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
1712; CHECK-NEXT:    retq
1713  %s = load i16, i16* %p
1714  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1715  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1716  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1717  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
1718  ret <8 x i16> %res
1719}
1720
1721define <8 x i16> @test_masked_z_i16_to_8_mem_mask3(i16* %p, <8 x i16> %mask) {
1722; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask3:
1723; CHECK:       # %bb.0:
1724; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
1725; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1} {z}
1726; CHECK-NEXT:    retq
1727  %s = load i16, i16* %p
1728  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1729  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1730  %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1731  %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1732  ret <8 x i16> %res
1733}
1734define <16 x i16> @test_i16_to_16_mem(i16* %p) {
1735; CHECK-LABEL: test_i16_to_16_mem:
1736; CHECK:       # %bb.0:
1737; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0
1738; CHECK-NEXT:    retq
1739  %s = load i16, i16* %p
1740  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1741  %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1742  ret <16 x i16> %res
1743}
1744define <16 x i16> @test_masked_i16_to_16_mem_mask0(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
1745; CHECK-LABEL: test_masked_i16_to_16_mem_mask0:
1746; CHECK:       # %bb.0:
1747; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
1748; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
1749; CHECK-NEXT:    retq
1750  %s = load i16, i16* %p
1751  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1752  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1753  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1754  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
1755  ret <16 x i16> %res
1756}
1757
1758define <16 x i16> @test_masked_z_i16_to_16_mem_mask0(i16* %p, <16 x i16> %mask) {
1759; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask0:
1760; CHECK:       # %bb.0:
1761; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
1762; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1} {z}
1763; CHECK-NEXT:    retq
1764  %s = load i16, i16* %p
1765  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1766  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1767  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1768  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1769  ret <16 x i16> %res
1770}
1771define <16 x i16> @test_masked_i16_to_16_mem_mask1(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
1772; CHECK-LABEL: test_masked_i16_to_16_mem_mask1:
1773; CHECK:       # %bb.0:
1774; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
1775; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
1776; CHECK-NEXT:    retq
1777  %s = load i16, i16* %p
1778  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1779  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1780  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1781  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
1782  ret <16 x i16> %res
1783}
1784
1785define <16 x i16> @test_masked_z_i16_to_16_mem_mask1(i16* %p, <16 x i16> %mask) {
1786; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask1:
1787; CHECK:       # %bb.0:
1788; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
1789; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1} {z}
1790; CHECK-NEXT:    retq
1791  %s = load i16, i16* %p
1792  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1793  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1794  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1795  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1796  ret <16 x i16> %res
1797}
1798define <16 x i16> @test_masked_i16_to_16_mem_mask2(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
1799; CHECK-LABEL: test_masked_i16_to_16_mem_mask2:
1800; CHECK:       # %bb.0:
1801; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
1802; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
1803; CHECK-NEXT:    retq
1804  %s = load i16, i16* %p
1805  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1806  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1807  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1808  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
1809  ret <16 x i16> %res
1810}
1811
1812define <16 x i16> @test_masked_z_i16_to_16_mem_mask2(i16* %p, <16 x i16> %mask) {
1813; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask2:
1814; CHECK:       # %bb.0:
1815; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
1816; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1} {z}
1817; CHECK-NEXT:    retq
1818  %s = load i16, i16* %p
1819  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1820  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1821  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1822  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1823  ret <16 x i16> %res
1824}
1825define <16 x i16> @test_masked_i16_to_16_mem_mask3(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
1826; CHECK-LABEL: test_masked_i16_to_16_mem_mask3:
1827; CHECK:       # %bb.0:
1828; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
1829; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
1830; CHECK-NEXT:    retq
1831  %s = load i16, i16* %p
1832  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1833  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1834  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1835  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
1836  ret <16 x i16> %res
1837}
1838
1839define <16 x i16> @test_masked_z_i16_to_16_mem_mask3(i16* %p, <16 x i16> %mask) {
1840; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask3:
1841; CHECK:       # %bb.0:
1842; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
1843; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1} {z}
1844; CHECK-NEXT:    retq
1845  %s = load i16, i16* %p
1846  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1847  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1848  %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1849  %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1850  ret <16 x i16> %res
1851}
1852define <32 x i16> @test_i16_to_32_mem(i16* %p) {
1853; CHECK-LABEL: test_i16_to_32_mem:
1854; CHECK:       # %bb.0:
1855; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0
1856; CHECK-NEXT:    retq
1857  %s = load i16, i16* %p
1858  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1859  %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1860  ret <32 x i16> %res
1861}
1862define <32 x i16> @test_masked_i16_to_32_mem_mask0(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
1863; CHECK-LABEL: test_masked_i16_to_32_mem_mask0:
1864; CHECK:       # %bb.0:
1865; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
1866; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1}
1867; CHECK-NEXT:    retq
1868  %s = load i16, i16* %p
1869  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1870  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1871  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1872  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
1873  ret <32 x i16> %res
1874}
1875
1876define <32 x i16> @test_masked_z_i16_to_32_mem_mask0(i16* %p, <32 x i16> %mask) {
1877; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask0:
1878; CHECK:       # %bb.0:
1879; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
1880; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1} {z}
1881; CHECK-NEXT:    retq
1882  %s = load i16, i16* %p
1883  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1884  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1885  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1886  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1887  ret <32 x i16> %res
1888}
1889define <32 x i16> @test_masked_i16_to_32_mem_mask1(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
1890; CHECK-LABEL: test_masked_i16_to_32_mem_mask1:
1891; CHECK:       # %bb.0:
1892; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
1893; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1}
1894; CHECK-NEXT:    retq
1895  %s = load i16, i16* %p
1896  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1897  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1898  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1899  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
1900  ret <32 x i16> %res
1901}
1902
1903define <32 x i16> @test_masked_z_i16_to_32_mem_mask1(i16* %p, <32 x i16> %mask) {
1904; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask1:
1905; CHECK:       # %bb.0:
1906; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
1907; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1} {z}
1908; CHECK-NEXT:    retq
1909  %s = load i16, i16* %p
1910  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1911  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1912  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1913  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1914  ret <32 x i16> %res
1915}
1916define <32 x i16> @test_masked_i16_to_32_mem_mask2(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
1917; CHECK-LABEL: test_masked_i16_to_32_mem_mask2:
1918; CHECK:       # %bb.0:
1919; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
1920; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1}
1921; CHECK-NEXT:    retq
1922  %s = load i16, i16* %p
1923  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1924  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1925  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1926  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
1927  ret <32 x i16> %res
1928}
1929
1930define <32 x i16> @test_masked_z_i16_to_32_mem_mask2(i16* %p, <32 x i16> %mask) {
1931; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask2:
1932; CHECK:       # %bb.0:
1933; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
1934; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1} {z}
1935; CHECK-NEXT:    retq
1936  %s = load i16, i16* %p
1937  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1938  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1939  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1940  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1941  ret <32 x i16> %res
1942}
1943define <32 x i16> @test_masked_i16_to_32_mem_mask3(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
1944; CHECK-LABEL: test_masked_i16_to_32_mem_mask3:
1945; CHECK:       # %bb.0:
1946; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
1947; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1}
1948; CHECK-NEXT:    retq
1949  %s = load i16, i16* %p
1950  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1951  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1952  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1953  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
1954  ret <32 x i16> %res
1955}
1956
1957define <32 x i16> @test_masked_z_i16_to_32_mem_mask3(i16* %p, <32 x i16> %mask) {
1958; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask3:
1959; CHECK:       # %bb.0:
1960; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
1961; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1} {z}
1962; CHECK-NEXT:    retq
1963  %s = load i16, i16* %p
1964  %vec = insertelement <2 x i16> undef, i16 %s, i32 0
1965  %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1966  %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1967  %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1968  ret <32 x i16> %res
1969}
1970define <4 x i32> @test_i32_to_4_mem(i32* %p) {
1971; CHECK-LABEL: test_i32_to_4_mem:
1972; CHECK:       # %bb.0:
1973; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
1974; CHECK-NEXT:    retq
1975  %s = load i32, i32* %p
1976  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
1977  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1978  ret <4 x i32> %res
1979}
1980define <4 x i32> @test_masked_i32_to_4_mem_mask0(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
1981; CHECK-LABEL: test_masked_i32_to_4_mem_mask0:
1982; CHECK:       # %bb.0:
1983; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
1984; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
1985; CHECK-NEXT:    retq
1986  %s = load i32, i32* %p
1987  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
1988  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1989  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
1990  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
1991  ret <4 x i32> %res
1992}
1993
1994define <4 x i32> @test_masked_z_i32_to_4_mem_mask0(i32* %p, <4 x i32> %mask) {
1995; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask0:
1996; CHECK:       # %bb.0:
1997; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
1998; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1} {z}
1999; CHECK-NEXT:    retq
2000  %s = load i32, i32* %p
2001  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2002  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2003  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2004  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2005  ret <4 x i32> %res
2006}
2007define <4 x i32> @test_masked_i32_to_4_mem_mask1(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
2008; CHECK-LABEL: test_masked_i32_to_4_mem_mask1:
2009; CHECK:       # %bb.0:
2010; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
2011; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
2012; CHECK-NEXT:    retq
2013  %s = load i32, i32* %p
2014  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2015  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2016  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2017  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
2018  ret <4 x i32> %res
2019}
2020
2021define <4 x i32> @test_masked_z_i32_to_4_mem_mask1(i32* %p, <4 x i32> %mask) {
2022; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask1:
2023; CHECK:       # %bb.0:
2024; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
2025; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1} {z}
2026; CHECK-NEXT:    retq
2027  %s = load i32, i32* %p
2028  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2029  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2030  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2031  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2032  ret <4 x i32> %res
2033}
2034define <4 x i32> @test_masked_i32_to_4_mem_mask2(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
2035; CHECK-LABEL: test_masked_i32_to_4_mem_mask2:
2036; CHECK:       # %bb.0:
2037; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
2038; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
2039; CHECK-NEXT:    retq
2040  %s = load i32, i32* %p
2041  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2042  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2043  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2044  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
2045  ret <4 x i32> %res
2046}
2047
2048define <4 x i32> @test_masked_z_i32_to_4_mem_mask2(i32* %p, <4 x i32> %mask) {
2049; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask2:
2050; CHECK:       # %bb.0:
2051; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
2052; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1} {z}
2053; CHECK-NEXT:    retq
2054  %s = load i32, i32* %p
2055  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2056  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2057  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2058  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2059  ret <4 x i32> %res
2060}
2061define <4 x i32> @test_masked_i32_to_4_mem_mask3(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
2062; CHECK-LABEL: test_masked_i32_to_4_mem_mask3:
2063; CHECK:       # %bb.0:
2064; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
2065; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
2066; CHECK-NEXT:    retq
2067  %s = load i32, i32* %p
2068  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2069  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2070  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2071  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
2072  ret <4 x i32> %res
2073}
2074
2075define <4 x i32> @test_masked_z_i32_to_4_mem_mask3(i32* %p, <4 x i32> %mask) {
2076; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask3:
2077; CHECK:       # %bb.0:
2078; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
2079; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1} {z}
2080; CHECK-NEXT:    retq
2081  %s = load i32, i32* %p
2082  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2083  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2084  %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2085  %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2086  ret <4 x i32> %res
2087}
2088define <8 x i32> @test_i32_to_8_mem(i32* %p) {
2089; CHECK-LABEL: test_i32_to_8_mem:
2090; CHECK:       # %bb.0:
2091; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
2092; CHECK-NEXT:    retq
2093  %s = load i32, i32* %p
2094  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2095  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2096  ret <8 x i32> %res
2097}
2098define <8 x i32> @test_masked_i32_to_8_mem_mask0(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
2099; CHECK-LABEL: test_masked_i32_to_8_mem_mask0:
2100; CHECK:       # %bb.0:
2101; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
2102; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
2103; CHECK-NEXT:    retq
2104  %s = load i32, i32* %p
2105  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2106  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2107  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2108  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
2109  ret <8 x i32> %res
2110}
2111
2112define <8 x i32> @test_masked_z_i32_to_8_mem_mask0(i32* %p, <8 x i32> %mask) {
2113; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask0:
2114; CHECK:       # %bb.0:
2115; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
2116; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1} {z}
2117; CHECK-NEXT:    retq
2118  %s = load i32, i32* %p
2119  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2120  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2121  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2122  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2123  ret <8 x i32> %res
2124}
2125define <8 x i32> @test_masked_i32_to_8_mem_mask1(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
2126; CHECK-LABEL: test_masked_i32_to_8_mem_mask1:
2127; CHECK:       # %bb.0:
2128; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
2129; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
2130; CHECK-NEXT:    retq
2131  %s = load i32, i32* %p
2132  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2133  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2134  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2135  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
2136  ret <8 x i32> %res
2137}
2138
2139define <8 x i32> @test_masked_z_i32_to_8_mem_mask1(i32* %p, <8 x i32> %mask) {
2140; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask1:
2141; CHECK:       # %bb.0:
2142; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
2143; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1} {z}
2144; CHECK-NEXT:    retq
2145  %s = load i32, i32* %p
2146  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2147  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2148  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2149  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2150  ret <8 x i32> %res
2151}
2152define <8 x i32> @test_masked_i32_to_8_mem_mask2(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
2153; CHECK-LABEL: test_masked_i32_to_8_mem_mask2:
2154; CHECK:       # %bb.0:
2155; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
2156; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
2157; CHECK-NEXT:    retq
2158  %s = load i32, i32* %p
2159  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2160  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2161  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2162  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
2163  ret <8 x i32> %res
2164}
2165
2166define <8 x i32> @test_masked_z_i32_to_8_mem_mask2(i32* %p, <8 x i32> %mask) {
2167; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask2:
2168; CHECK:       # %bb.0:
2169; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
2170; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1} {z}
2171; CHECK-NEXT:    retq
2172  %s = load i32, i32* %p
2173  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2174  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2175  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2176  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2177  ret <8 x i32> %res
2178}
2179define <8 x i32> @test_masked_i32_to_8_mem_mask3(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
2180; CHECK-LABEL: test_masked_i32_to_8_mem_mask3:
2181; CHECK:       # %bb.0:
2182; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
2183; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
2184; CHECK-NEXT:    retq
2185  %s = load i32, i32* %p
2186  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2187  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2188  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2189  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
2190  ret <8 x i32> %res
2191}
2192
2193define <8 x i32> @test_masked_z_i32_to_8_mem_mask3(i32* %p, <8 x i32> %mask) {
2194; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask3:
2195; CHECK:       # %bb.0:
2196; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
2197; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1} {z}
2198; CHECK-NEXT:    retq
2199  %s = load i32, i32* %p
2200  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2201  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2202  %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2203  %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2204  ret <8 x i32> %res
2205}
2206define <16 x i32> @test_i32_to_16_mem(i32* %p) {
2207; CHECK-LABEL: test_i32_to_16_mem:
2208; CHECK:       # %bb.0:
2209; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0
2210; CHECK-NEXT:    retq
2211  %s = load i32, i32* %p
2212  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2213  %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2214  ret <16 x i32> %res
2215}
2216define <16 x i32> @test_masked_i32_to_16_mem_mask0(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
2217; CHECK-LABEL: test_masked_i32_to_16_mem_mask0:
2218; CHECK:       # %bb.0:
2219; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
2220; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1}
2221; CHECK-NEXT:    retq
2222  %s = load i32, i32* %p
2223  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2224  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2225  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2226  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
2227  ret <16 x i32> %res
2228}
2229
2230define <16 x i32> @test_masked_z_i32_to_16_mem_mask0(i32* %p, <16 x i32> %mask) {
2231; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask0:
2232; CHECK:       # %bb.0:
2233; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
2234; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1} {z}
2235; CHECK-NEXT:    retq
2236  %s = load i32, i32* %p
2237  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2238  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2239  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2240  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2241  ret <16 x i32> %res
2242}
2243define <16 x i32> @test_masked_i32_to_16_mem_mask1(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
2244; CHECK-LABEL: test_masked_i32_to_16_mem_mask1:
2245; CHECK:       # %bb.0:
2246; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
2247; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1}
2248; CHECK-NEXT:    retq
2249  %s = load i32, i32* %p
2250  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2251  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2252  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2253  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
2254  ret <16 x i32> %res
2255}
2256
2257define <16 x i32> @test_masked_z_i32_to_16_mem_mask1(i32* %p, <16 x i32> %mask) {
2258; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask1:
2259; CHECK:       # %bb.0:
2260; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
2261; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1} {z}
2262; CHECK-NEXT:    retq
2263  %s = load i32, i32* %p
2264  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2265  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2266  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2267  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2268  ret <16 x i32> %res
2269}
2270define <16 x i32> @test_masked_i32_to_16_mem_mask2(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
2271; CHECK-LABEL: test_masked_i32_to_16_mem_mask2:
2272; CHECK:       # %bb.0:
2273; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
2274; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1}
2275; CHECK-NEXT:    retq
2276  %s = load i32, i32* %p
2277  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2278  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2279  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2280  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
2281  ret <16 x i32> %res
2282}
2283
2284define <16 x i32> @test_masked_z_i32_to_16_mem_mask2(i32* %p, <16 x i32> %mask) {
2285; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask2:
2286; CHECK:       # %bb.0:
2287; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
2288; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1} {z}
2289; CHECK-NEXT:    retq
2290  %s = load i32, i32* %p
2291  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2292  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2293  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2294  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2295  ret <16 x i32> %res
2296}
2297define <16 x i32> @test_masked_i32_to_16_mem_mask3(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
2298; CHECK-LABEL: test_masked_i32_to_16_mem_mask3:
2299; CHECK:       # %bb.0:
2300; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
2301; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1}
2302; CHECK-NEXT:    retq
2303  %s = load i32, i32* %p
2304  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2305  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2306  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2307  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
2308  ret <16 x i32> %res
2309}
2310
2311define <16 x i32> @test_masked_z_i32_to_16_mem_mask3(i32* %p, <16 x i32> %mask) {
2312; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask3:
2313; CHECK:       # %bb.0:
2314; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
2315; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1} {z}
2316; CHECK-NEXT:    retq
2317  %s = load i32, i32* %p
2318  %vec = insertelement <2 x i32> undef, i32 %s, i32 0
2319  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2320  %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2321  %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2322  ret <16 x i32> %res
2323}
2324define <2 x i64> @test_i64_to_2_mem(i64* %p) {
2325; CHECK-LABEL: test_i64_to_2_mem:
2326; CHECK:       # %bb.0:
2327; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
2328; CHECK-NEXT:    retq
2329  %s = load i64, i64* %p
2330  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2331  %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2332  ret <2 x i64> %res
2333}
2334define <2 x i64> @test_masked_i64_to_2_mem_mask0(i64* %p, <2 x i64> %default, <2 x i64> %mask) {
2335; CHECK-LABEL: test_masked_i64_to_2_mem_mask0:
2336; CHECK:       # %bb.0:
2337; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
2338; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1}
2339; CHECK-NEXT:    retq
2340  %s = load i64, i64* %p
2341  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2342  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2343  %cmp = icmp eq <2 x i64> %mask, zeroinitializer
2344  %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
2345  ret <2 x i64> %res
2346}
2347
2348define <2 x i64> @test_masked_z_i64_to_2_mem_mask0(i64* %p, <2 x i64> %mask) {
2349; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask0:
2350; CHECK:       # %bb.0:
2351; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
2352; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1} {z}
2353; CHECK-NEXT:    retq
2354  %s = load i64, i64* %p
2355  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2356  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2357  %cmp = icmp eq <2 x i64> %mask, zeroinitializer
2358  %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
2359  ret <2 x i64> %res
2360}
2361define <2 x i64> @test_masked_i64_to_2_mem_mask1(i64* %p, <2 x i64> %default, <2 x i64> %mask) {
2362; CHECK-LABEL: test_masked_i64_to_2_mem_mask1:
2363; CHECK:       # %bb.0:
2364; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
2365; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1}
2366; CHECK-NEXT:    retq
2367  %s = load i64, i64* %p
2368  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2369  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2370  %cmp = icmp eq <2 x i64> %mask, zeroinitializer
2371  %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
2372  ret <2 x i64> %res
2373}
2374
2375define <2 x i64> @test_masked_z_i64_to_2_mem_mask1(i64* %p, <2 x i64> %mask) {
2376; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask1:
2377; CHECK:       # %bb.0:
2378; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
2379; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1} {z}
2380; CHECK-NEXT:    retq
2381  %s = load i64, i64* %p
2382  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2383  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
2384  %cmp = icmp eq <2 x i64> %mask, zeroinitializer
2385  %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
2386  ret <2 x i64> %res
2387}
2388define <4 x i64> @test_i64_to_4_mem(i64* %p) {
2389; CHECK-LABEL: test_i64_to_4_mem:
2390; CHECK:       # %bb.0:
2391; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
2392; CHECK-NEXT:    retq
2393  %s = load i64, i64* %p
2394  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2395  %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2396  ret <4 x i64> %res
2397}
2398define <4 x i64> @test_masked_i64_to_4_mem_mask0(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
2399; CHECK-LABEL: test_masked_i64_to_4_mem_mask0:
2400; CHECK:       # %bb.0:
2401; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
2402; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
2403; CHECK-NEXT:    retq
2404  %s = load i64, i64* %p
2405  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2406  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2407  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
2408  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
2409  ret <4 x i64> %res
2410}
2411
2412define <4 x i64> @test_masked_z_i64_to_4_mem_mask0(i64* %p, <4 x i64> %mask) {
2413; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask0:
2414; CHECK:       # %bb.0:
2415; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
2416; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1} {z}
2417; CHECK-NEXT:    retq
2418  %s = load i64, i64* %p
2419  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2420  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2421  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
2422  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
2423  ret <4 x i64> %res
2424}
2425define <4 x i64> @test_masked_i64_to_4_mem_mask1(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
2426; CHECK-LABEL: test_masked_i64_to_4_mem_mask1:
2427; CHECK:       # %bb.0:
2428; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
2429; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
2430; CHECK-NEXT:    retq
2431  %s = load i64, i64* %p
2432  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2433  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2434  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
2435  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
2436  ret <4 x i64> %res
2437}
2438
2439define <4 x i64> @test_masked_z_i64_to_4_mem_mask1(i64* %p, <4 x i64> %mask) {
2440; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask1:
2441; CHECK:       # %bb.0:
2442; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
2443; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1} {z}
2444; CHECK-NEXT:    retq
2445  %s = load i64, i64* %p
2446  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2447  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2448  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
2449  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
2450  ret <4 x i64> %res
2451}
2452define <4 x i64> @test_masked_i64_to_4_mem_mask2(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
2453; CHECK-LABEL: test_masked_i64_to_4_mem_mask2:
2454; CHECK:       # %bb.0:
2455; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
2456; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
2457; CHECK-NEXT:    retq
2458  %s = load i64, i64* %p
2459  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2460  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2461  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
2462  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
2463  ret <4 x i64> %res
2464}
2465
2466define <4 x i64> @test_masked_z_i64_to_4_mem_mask2(i64* %p, <4 x i64> %mask) {
2467; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask2:
2468; CHECK:       # %bb.0:
2469; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
2470; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1} {z}
2471; CHECK-NEXT:    retq
2472  %s = load i64, i64* %p
2473  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2474  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2475  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
2476  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
2477  ret <4 x i64> %res
2478}
2479define <4 x i64> @test_masked_i64_to_4_mem_mask3(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
2480; CHECK-LABEL: test_masked_i64_to_4_mem_mask3:
2481; CHECK:       # %bb.0:
2482; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
2483; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
2484; CHECK-NEXT:    retq
2485  %s = load i64, i64* %p
2486  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2487  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2488  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
2489  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
2490  ret <4 x i64> %res
2491}
2492
2493define <4 x i64> @test_masked_z_i64_to_4_mem_mask3(i64* %p, <4 x i64> %mask) {
2494; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask3:
2495; CHECK:       # %bb.0:
2496; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
2497; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1} {z}
2498; CHECK-NEXT:    retq
2499  %s = load i64, i64* %p
2500  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2501  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
2502  %cmp = icmp eq <4 x i64> %mask, zeroinitializer
2503  %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
2504  ret <4 x i64> %res
2505}
2506define <8 x i64> @test_i64_to_8_mem(i64* %p) {
2507; CHECK-LABEL: test_i64_to_8_mem:
2508; CHECK:       # %bb.0:
2509; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0
2510; CHECK-NEXT:    retq
2511  %s = load i64, i64* %p
2512  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2513  %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2514  ret <8 x i64> %res
2515}
2516define <8 x i64> @test_masked_i64_to_8_mem_mask0(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
2517; CHECK-LABEL: test_masked_i64_to_8_mem_mask0:
2518; CHECK:       # %bb.0:
2519; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
2520; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1}
2521; CHECK-NEXT:    retq
2522  %s = load i64, i64* %p
2523  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2524  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2525  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
2526  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
2527  ret <8 x i64> %res
2528}
2529
2530define <8 x i64> @test_masked_z_i64_to_8_mem_mask0(i64* %p, <8 x i64> %mask) {
2531; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask0:
2532; CHECK:       # %bb.0:
2533; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
2534; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1} {z}
2535; CHECK-NEXT:    retq
2536  %s = load i64, i64* %p
2537  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2538  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2539  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
2540  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
2541  ret <8 x i64> %res
2542}
2543define <8 x i64> @test_masked_i64_to_8_mem_mask1(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
2544; CHECK-LABEL: test_masked_i64_to_8_mem_mask1:
2545; CHECK:       # %bb.0:
2546; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
2547; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1}
2548; CHECK-NEXT:    retq
2549  %s = load i64, i64* %p
2550  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2551  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2552  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
2553  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
2554  ret <8 x i64> %res
2555}
2556
2557define <8 x i64> @test_masked_z_i64_to_8_mem_mask1(i64* %p, <8 x i64> %mask) {
2558; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask1:
2559; CHECK:       # %bb.0:
2560; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
2561; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1} {z}
2562; CHECK-NEXT:    retq
2563  %s = load i64, i64* %p
2564  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2565  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2566  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
2567  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
2568  ret <8 x i64> %res
2569}
2570define <8 x i64> @test_masked_i64_to_8_mem_mask2(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
2571; CHECK-LABEL: test_masked_i64_to_8_mem_mask2:
2572; CHECK:       # %bb.0:
2573; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
2574; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1}
2575; CHECK-NEXT:    retq
2576  %s = load i64, i64* %p
2577  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2578  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2579  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
2580  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
2581  ret <8 x i64> %res
2582}
2583
2584define <8 x i64> @test_masked_z_i64_to_8_mem_mask2(i64* %p, <8 x i64> %mask) {
2585; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask2:
2586; CHECK:       # %bb.0:
2587; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
2588; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1} {z}
2589; CHECK-NEXT:    retq
2590  %s = load i64, i64* %p
2591  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2592  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2593  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
2594  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
2595  ret <8 x i64> %res
2596}
2597define <8 x i64> @test_masked_i64_to_8_mem_mask3(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
2598; CHECK-LABEL: test_masked_i64_to_8_mem_mask3:
2599; CHECK:       # %bb.0:
2600; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
2601; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1}
2602; CHECK-NEXT:    retq
2603  %s = load i64, i64* %p
2604  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2605  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2606  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
2607  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
2608  ret <8 x i64> %res
2609}
2610
2611define <8 x i64> @test_masked_z_i64_to_8_mem_mask3(i64* %p, <8 x i64> %mask) {
2612; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask3:
2613; CHECK:       # %bb.0:
2614; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
2615; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1} {z}
2616; CHECK-NEXT:    retq
2617  %s = load i64, i64* %p
2618  %vec = insertelement <2 x i64> undef, i64 %s, i32 0
2619  %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2620  %cmp = icmp eq <8 x i64> %mask, zeroinitializer
2621  %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
2622  ret <8 x i64> %res
2623}
2624