1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
3
4define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2) {
5; CHECK-LABEL: test_4xfloat_unpack_low_mask0:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8; CHECK-NEXT:    retq
9  %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
10  ret <4 x float> %res
11}
12define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
13; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask0:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
16; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
17; CHECK-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
18; CHECK-NEXT:    vmovaps %xmm2, %xmm0
19; CHECK-NEXT:    retq
20  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
22  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
23  ret <4 x float> %res
24}
25
26define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
27; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask0:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
30; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
31; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
32; CHECK-NEXT:    retq
33  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
34  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
35  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
36  ret <4 x float> %res
37}
38define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
39; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask1:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
42; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
43; CHECK-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
44; CHECK-NEXT:    vmovaps %xmm2, %xmm0
45; CHECK-NEXT:    retq
46  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
47  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
48  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
49  ret <4 x float> %res
50}
51
52define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
53; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask1:
54; CHECK:       # %bb.0:
55; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
56; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
57; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
58; CHECK-NEXT:    retq
59  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
60  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
61  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
62  ret <4 x float> %res
63}
64define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
65; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask2:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
68; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
69; CHECK-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
70; CHECK-NEXT:    vmovaps %xmm2, %xmm0
71; CHECK-NEXT:    retq
72  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
73  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
74  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
75  ret <4 x float> %res
76}
77
78define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
79; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask2:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
82; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
83; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
84; CHECK-NEXT:    retq
85  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
86  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
87  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
88  ret <4 x float> %res
89}
90define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2) {
91; CHECK-LABEL: test_4xfloat_unpack_low_mask3:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
94; CHECK-NEXT:    retq
95  %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
96  ret <4 x float> %res
97}
98define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
99; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask3:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
102; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
103; CHECK-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
104; CHECK-NEXT:    vmovaps %xmm2, %xmm0
105; CHECK-NEXT:    retq
106  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
107  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
108  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
109  ret <4 x float> %res
110}
111
112define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
113; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask3:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
116; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
117; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
118; CHECK-NEXT:    retq
119  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
120  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
121  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
122  ret <4 x float> %res
123}
124define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) {
125; CHECK-LABEL: test_4xfloat_unpack_low_mem_mask0:
126; CHECK:       # %bb.0:
127; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
128; CHECK-NEXT:    retq
129  %vec2 = load <4 x float>, <4 x float>* %vec2p
130  %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
131  ret <4 x float> %res
132}
133define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
134; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
137; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
138; CHECK-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
139; CHECK-NEXT:    vmovaps %xmm1, %xmm0
140; CHECK-NEXT:    retq
141  %vec2 = load <4 x float>, <4 x float>* %vec2p
142  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
143  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
144  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
145  ret <4 x float> %res
146}
147
148define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
149; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
150; CHECK:       # %bb.0:
151; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
152; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
153; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
154; CHECK-NEXT:    retq
155  %vec2 = load <4 x float>, <4 x float>* %vec2p
156  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
157  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
158  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
159  ret <4 x float> %res
160}
161
162define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
163; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
164; CHECK:       # %bb.0:
165; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
166; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
167; CHECK-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
168; CHECK-NEXT:    vmovaps %xmm1, %xmm0
169; CHECK-NEXT:    retq
170  %vec2 = load <4 x float>, <4 x float>* %vec2p
171  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
172  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
173  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
174  ret <4 x float> %res
175}
176
177define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
178; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
181; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
182; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
183; CHECK-NEXT:    retq
184  %vec2 = load <4 x float>, <4 x float>* %vec2p
185  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
186  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
187  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
188  ret <4 x float> %res
189}
190
191define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
192; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
195; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
196; CHECK-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
197; CHECK-NEXT:    vmovaps %xmm1, %xmm0
198; CHECK-NEXT:    retq
199  %vec2 = load <4 x float>, <4 x float>* %vec2p
200  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
201  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
202  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
203  ret <4 x float> %res
204}
205
206define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
207; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
208; CHECK:       # %bb.0:
209; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
210; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
211; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
212; CHECK-NEXT:    retq
213  %vec2 = load <4 x float>, <4 x float>* %vec2p
214  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
215  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
216  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
217  ret <4 x float> %res
218}
219
220define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) {
221; CHECK-LABEL: test_4xfloat_unpack_low_mem_mask3:
222; CHECK:       # %bb.0:
223; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
224; CHECK-NEXT:    retq
225  %vec2 = load <4 x float>, <4 x float>* %vec2p
226  %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
227  ret <4 x float> %res
228}
229define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
230; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
233; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
234; CHECK-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
235; CHECK-NEXT:    vmovaps %xmm1, %xmm0
236; CHECK-NEXT:    retq
237  %vec2 = load <4 x float>, <4 x float>* %vec2p
238  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
239  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
240  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
241  ret <4 x float> %res
242}
243
244define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
245; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
248; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
249; CHECK-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
250; CHECK-NEXT:    retq
251  %vec2 = load <4 x float>, <4 x float>* %vec2p
252  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
253  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
254  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
255  ret <4 x float> %res
256}
257
258define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2) {
259; CHECK-LABEL: test_8xfloat_unpack_low_mask0:
260; CHECK:       # %bb.0:
261; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
262; CHECK-NEXT:    retq
263  %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
264  ret <8 x float> %res
265}
266define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
267; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask0:
268; CHECK:       # %bb.0:
269; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
270; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
271; CHECK-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
272; CHECK-NEXT:    vmovaps %ymm2, %ymm0
273; CHECK-NEXT:    retq
274  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
275  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
276  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
277  ret <8 x float> %res
278}
279
280define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
281; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask0:
282; CHECK:       # %bb.0:
283; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
284; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
285; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
286; CHECK-NEXT:    retq
287  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
288  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
289  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
290  ret <8 x float> %res
291}
292define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
293; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask1:
294; CHECK:       # %bb.0:
295; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
296; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
297; CHECK-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
298; CHECK-NEXT:    vmovaps %ymm2, %ymm0
299; CHECK-NEXT:    retq
300  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
301  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
302  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
303  ret <8 x float> %res
304}
305
306define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
307; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask1:
308; CHECK:       # %bb.0:
309; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
310; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
311; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
312; CHECK-NEXT:    retq
313  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
314  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
315  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
316  ret <8 x float> %res
317}
318define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
319; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask2:
320; CHECK:       # %bb.0:
321; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
322; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
323; CHECK-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
324; CHECK-NEXT:    vmovaps %ymm2, %ymm0
325; CHECK-NEXT:    retq
326  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
327  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
328  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
329  ret <8 x float> %res
330}
331
332define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
333; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask2:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
336; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
337; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
338; CHECK-NEXT:    retq
339  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
340  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
341  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
342  ret <8 x float> %res
343}
344define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2) {
345; CHECK-LABEL: test_8xfloat_unpack_low_mask3:
346; CHECK:       # %bb.0:
347; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
348; CHECK-NEXT:    retq
349  %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
350  ret <8 x float> %res
351}
352define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
353; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask3:
354; CHECK:       # %bb.0:
355; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
356; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
357; CHECK-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
358; CHECK-NEXT:    vmovaps %ymm2, %ymm0
359; CHECK-NEXT:    retq
360  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
361  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
362  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
363  ret <8 x float> %res
364}
365
366define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
367; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask3:
368; CHECK:       # %bb.0:
369; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
370; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
371; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
372; CHECK-NEXT:    retq
373  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
374  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
375  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
376  ret <8 x float> %res
377}
378define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
379; CHECK-LABEL: test_8xfloat_unpack_low_mem_mask0:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
382; CHECK-NEXT:    retq
383  %vec2 = load <8 x float>, <8 x float>* %vec2p
384  %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
385  ret <8 x float> %res
386}
387define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
388; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
389; CHECK:       # %bb.0:
390; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
391; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
392; CHECK-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
393; CHECK-NEXT:    vmovaps %ymm1, %ymm0
394; CHECK-NEXT:    retq
395  %vec2 = load <8 x float>, <8 x float>* %vec2p
396  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
397  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
398  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
399  ret <8 x float> %res
400}
401
402define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
403; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
404; CHECK:       # %bb.0:
405; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
406; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
407; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
408; CHECK-NEXT:    retq
409  %vec2 = load <8 x float>, <8 x float>* %vec2p
410  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
411  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
412  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
413  ret <8 x float> %res
414}
415
416define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
417; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
418; CHECK:       # %bb.0:
419; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
420; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
421; CHECK-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
422; CHECK-NEXT:    vmovaps %ymm1, %ymm0
423; CHECK-NEXT:    retq
424  %vec2 = load <8 x float>, <8 x float>* %vec2p
425  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
426  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
427  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
428  ret <8 x float> %res
429}
430
431define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
432; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
433; CHECK:       # %bb.0:
434; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
435; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
436; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
437; CHECK-NEXT:    retq
438  %vec2 = load <8 x float>, <8 x float>* %vec2p
439  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
440  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
441  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
442  ret <8 x float> %res
443}
444
445define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
446; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
447; CHECK:       # %bb.0:
448; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
449; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
450; CHECK-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
451; CHECK-NEXT:    vmovaps %ymm1, %ymm0
452; CHECK-NEXT:    retq
453  %vec2 = load <8 x float>, <8 x float>* %vec2p
454  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
455  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
456  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
457  ret <8 x float> %res
458}
459
460define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
461; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
464; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
465; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
466; CHECK-NEXT:    retq
467  %vec2 = load <8 x float>, <8 x float>* %vec2p
468  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
469  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
470  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
471  ret <8 x float> %res
472}
473
474define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
475; CHECK-LABEL: test_8xfloat_unpack_low_mem_mask3:
476; CHECK:       # %bb.0:
477; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
478; CHECK-NEXT:    retq
479  %vec2 = load <8 x float>, <8 x float>* %vec2p
480  %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
481  ret <8 x float> %res
482}
483define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
484; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
485; CHECK:       # %bb.0:
486; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
487; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
488; CHECK-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
489; CHECK-NEXT:    vmovaps %ymm1, %ymm0
490; CHECK-NEXT:    retq
491  %vec2 = load <8 x float>, <8 x float>* %vec2p
492  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
493  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
494  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
495  ret <8 x float> %res
496}
497
498define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
499; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
502; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
503; CHECK-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
504; CHECK-NEXT:    retq
505  %vec2 = load <8 x float>, <8 x float>* %vec2p
506  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
507  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
508  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
509  ret <8 x float> %res
510}
511
512define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) {
513; CHECK-LABEL: test_16xfloat_unpack_low_mask0:
514; CHECK:       # %bb.0:
515; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
516; CHECK-NEXT:    retq
517  %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
518  ret <16 x float> %res
519}
520define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
521; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask0:
522; CHECK:       # %bb.0:
523; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
524; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
525; CHECK-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
526; CHECK-NEXT:    vmovaps %zmm2, %zmm0
527; CHECK-NEXT:    retq
528  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
529  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
530  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
531  ret <16 x float> %res
532}
533
534define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
535; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask0:
536; CHECK:       # %bb.0:
537; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
538; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
539; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
540; CHECK-NEXT:    retq
541  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
542  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
543  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
544  ret <16 x float> %res
545}
546define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
547; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask1:
548; CHECK:       # %bb.0:
549; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
550; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
551; CHECK-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
552; CHECK-NEXT:    vmovaps %zmm2, %zmm0
553; CHECK-NEXT:    retq
554  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
555  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
556  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
557  ret <16 x float> %res
558}
559
560define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
561; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask1:
562; CHECK:       # %bb.0:
563; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
564; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
565; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
566; CHECK-NEXT:    retq
567  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
568  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
569  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
570  ret <16 x float> %res
571}
572define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
573; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask2:
574; CHECK:       # %bb.0:
575; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
576; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
577; CHECK-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
578; CHECK-NEXT:    vmovaps %zmm2, %zmm0
579; CHECK-NEXT:    retq
580  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
581  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
582  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
583  ret <16 x float> %res
584}
585
586define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
587; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask2:
588; CHECK:       # %bb.0:
589; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
590; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
591; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
592; CHECK-NEXT:    retq
593  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
594  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
595  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
596  ret <16 x float> %res
597}
598define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) {
599; CHECK-LABEL: test_16xfloat_unpack_low_mask3:
600; CHECK:       # %bb.0:
601; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
602; CHECK-NEXT:    retq
603  %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
604  ret <16 x float> %res
605}
606define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
607; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask3:
608; CHECK:       # %bb.0:
609; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
610; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
611; CHECK-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
612; CHECK-NEXT:    vmovaps %zmm2, %zmm0
613; CHECK-NEXT:    retq
614  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
615  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
616  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
617  ret <16 x float> %res
618}
619
620define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
621; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask3:
622; CHECK:       # %bb.0:
623; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
624; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
625; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
626; CHECK-NEXT:    retq
627  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
628  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
629  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
630  ret <16 x float> %res
631}
632define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
633; CHECK-LABEL: test_16xfloat_unpack_low_mem_mask0:
634; CHECK:       # %bb.0:
635; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
636; CHECK-NEXT:    retq
637  %vec2 = load <16 x float>, <16 x float>* %vec2p
638  %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
639  ret <16 x float> %res
640}
641define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
642; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
643; CHECK:       # %bb.0:
644; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
645; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
646; CHECK-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
647; CHECK-NEXT:    vmovaps %zmm1, %zmm0
648; CHECK-NEXT:    retq
649  %vec2 = load <16 x float>, <16 x float>* %vec2p
650  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
651  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
652  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
653  ret <16 x float> %res
654}
655
656define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
657; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
660; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
661; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
662; CHECK-NEXT:    retq
663  %vec2 = load <16 x float>, <16 x float>* %vec2p
664  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
665  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
666  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
667  ret <16 x float> %res
668}
669
670define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
671; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
672; CHECK:       # %bb.0:
673; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
674; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
675; CHECK-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
676; CHECK-NEXT:    vmovaps %zmm1, %zmm0
677; CHECK-NEXT:    retq
678  %vec2 = load <16 x float>, <16 x float>* %vec2p
679  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
680  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
681  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
682  ret <16 x float> %res
683}
684
685define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
686; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
687; CHECK:       # %bb.0:
688; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
689; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
690; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
691; CHECK-NEXT:    retq
692  %vec2 = load <16 x float>, <16 x float>* %vec2p
693  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
694  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
695  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
696  ret <16 x float> %res
697}
698
699define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
700; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
701; CHECK:       # %bb.0:
702; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
703; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
704; CHECK-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
705; CHECK-NEXT:    vmovaps %zmm1, %zmm0
706; CHECK-NEXT:    retq
707  %vec2 = load <16 x float>, <16 x float>* %vec2p
708  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
709  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
710  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
711  ret <16 x float> %res
712}
713
714define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
715; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
716; CHECK:       # %bb.0:
717; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
718; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
719; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
720; CHECK-NEXT:    retq
721  %vec2 = load <16 x float>, <16 x float>* %vec2p
722  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
723  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
724  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
725  ret <16 x float> %res
726}
727
728define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
729; CHECK-LABEL: test_16xfloat_unpack_low_mem_mask3:
730; CHECK:       # %bb.0:
731; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
732; CHECK-NEXT:    retq
733  %vec2 = load <16 x float>, <16 x float>* %vec2p
734  %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
735  ret <16 x float> %res
736}
737define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
738; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
739; CHECK:       # %bb.0:
740; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
741; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
742; CHECK-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
743; CHECK-NEXT:    vmovaps %zmm1, %zmm0
744; CHECK-NEXT:    retq
745  %vec2 = load <16 x float>, <16 x float>* %vec2p
746  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
747  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
748  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
749  ret <16 x float> %res
750}
751
752define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
753; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
754; CHECK:       # %bb.0:
755; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
756; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
757; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
758; CHECK-NEXT:    retq
759  %vec2 = load <16 x float>, <16 x float>* %vec2p
760  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
761  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
762  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
763  ret <16 x float> %res
764}
765
766define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) {
767; CHECK-LABEL: test_2xdouble_unpack_low_mask0:
768; CHECK:       # %bb.0:
769; CHECK-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
770; CHECK-NEXT:    retq
771  %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
772  ret <2 x double> %res
773}
774define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
775; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask0:
776; CHECK:       # %bb.0:
777; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
778; CHECK-NEXT:    vcmpeqpd %xmm4, %xmm3, %k1
779; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
780; CHECK-NEXT:    vmovapd %xmm2, %xmm0
781; CHECK-NEXT:    retq
782  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
783  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
784  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
785  ret <2 x double> %res
786}
787
788define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
789; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask0:
790; CHECK:       # %bb.0:
791; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
792; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
793; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
794; CHECK-NEXT:    retq
795  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
796  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
797  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
798  ret <2 x double> %res
799}
800define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
801; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask1:
802; CHECK:       # %bb.0:
803; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
804; CHECK-NEXT:    vcmpeqpd %xmm4, %xmm3, %k1
805; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
806; CHECK-NEXT:    vmovapd %xmm2, %xmm0
807; CHECK-NEXT:    retq
808  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
809  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
810  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
811  ret <2 x double> %res
812}
813
814define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
815; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask1:
816; CHECK:       # %bb.0:
817; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
818; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
819; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
820; CHECK-NEXT:    retq
821  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
822  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
823  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
824  ret <2 x double> %res
825}
826define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) {
827; CHECK-LABEL: test_2xdouble_unpack_low_mem_mask0:
828; CHECK:       # %bb.0:
829; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
830; CHECK-NEXT:    retq
831  %vec2 = load <2 x double>, <2 x double>* %vec2p
832  %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
833  ret <2 x double> %res
834}
835define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
836; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
837; CHECK:       # %bb.0:
838; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
839; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
840; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0]
841; CHECK-NEXT:    vmovapd %xmm1, %xmm0
842; CHECK-NEXT:    retq
843  %vec2 = load <2 x double>, <2 x double>* %vec2p
844  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
845  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
846  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
847  ret <2 x double> %res
848}
849
850define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
851; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
852; CHECK:       # %bb.0:
853; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
854; CHECK-NEXT:    vcmpeqpd %xmm2, %xmm1, %k1
855; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0]
856; CHECK-NEXT:    retq
857  %vec2 = load <2 x double>, <2 x double>* %vec2p
858  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
859  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
860  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
861  ret <2 x double> %res
862}
863
864define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
865; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
866; CHECK:       # %bb.0:
867; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
868; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
869; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0]
870; CHECK-NEXT:    vmovapd %xmm1, %xmm0
871; CHECK-NEXT:    retq
872  %vec2 = load <2 x double>, <2 x double>* %vec2p
873  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
874  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
875  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
876  ret <2 x double> %res
877}
878
879define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
880; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
881; CHECK:       # %bb.0:
882; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
883; CHECK-NEXT:    vcmpeqpd %xmm2, %xmm1, %k1
884; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0]
885; CHECK-NEXT:    retq
886  %vec2 = load <2 x double>, <2 x double>* %vec2p
887  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
888  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
889  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
890  ret <2 x double> %res
891}
892
893define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2) {
894; CHECK-LABEL: test_4xdouble_unpack_low_mask0:
895; CHECK:       # %bb.0:
896; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
897; CHECK-NEXT:    retq
898  %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
899  ret <4 x double> %res
900}
901define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
902; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask0:
903; CHECK:       # %bb.0:
904; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
905; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
906; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
907; CHECK-NEXT:    vmovapd %ymm2, %ymm0
908; CHECK-NEXT:    retq
909  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
910  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
911  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
912  ret <4 x double> %res
913}
914
915define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
916; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask0:
917; CHECK:       # %bb.0:
918; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
919; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
920; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
921; CHECK-NEXT:    retq
922  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
923  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
924  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
925  ret <4 x double> %res
926}
927define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
928; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask1:
929; CHECK:       # %bb.0:
930; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
931; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
932; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
933; CHECK-NEXT:    vmovapd %ymm2, %ymm0
934; CHECK-NEXT:    retq
935  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
936  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
937  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
938  ret <4 x double> %res
939}
940
941define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
942; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask1:
943; CHECK:       # %bb.0:
944; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
945; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
946; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
947; CHECK-NEXT:    retq
948  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
949  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
950  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
951  ret <4 x double> %res
952}
953define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
954; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask2:
955; CHECK:       # %bb.0:
956; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
957; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
958; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
959; CHECK-NEXT:    vmovapd %ymm2, %ymm0
960; CHECK-NEXT:    retq
961  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
962  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
963  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
964  ret <4 x double> %res
965}
966
967define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
968; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask2:
969; CHECK:       # %bb.0:
970; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
971; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
972; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
973; CHECK-NEXT:    retq
974  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
975  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
976  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
977  ret <4 x double> %res
978}
979define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2) {
980; CHECK-LABEL: test_4xdouble_unpack_low_mask3:
981; CHECK:       # %bb.0:
982; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
983; CHECK-NEXT:    retq
984  %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
985  ret <4 x double> %res
986}
987define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
988; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask3:
989; CHECK:       # %bb.0:
990; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
991; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
992; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
993; CHECK-NEXT:    vmovapd %ymm2, %ymm0
994; CHECK-NEXT:    retq
995  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
996  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
997  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
998  ret <4 x double> %res
999}
1000
1001define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
1002; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask3:
1003; CHECK:       # %bb.0:
1004; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1005; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
1006; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1007; CHECK-NEXT:    retq
1008  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1009  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1010  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1011  ret <4 x double> %res
1012}
1013define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
1014; CHECK-LABEL: test_4xdouble_unpack_low_mem_mask0:
1015; CHECK:       # %bb.0:
1016; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
1017; CHECK-NEXT:    retq
1018  %vec2 = load <4 x double>, <4 x double>* %vec2p
1019  %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1020  ret <4 x double> %res
1021}
1022define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
1023; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
1024; CHECK:       # %bb.0:
1025; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1026; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
1027; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
1028; CHECK-NEXT:    vmovapd %ymm1, %ymm0
1029; CHECK-NEXT:    retq
1030  %vec2 = load <4 x double>, <4 x double>* %vec2p
1031  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1032  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1033  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
1034  ret <4 x double> %res
1035}
1036
1037define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
1038; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
1039; CHECK:       # %bb.0:
1040; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1041; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
1042; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
1043; CHECK-NEXT:    retq
1044  %vec2 = load <4 x double>, <4 x double>* %vec2p
1045  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1046  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1047  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1048  ret <4 x double> %res
1049}
1050
1051define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
1052; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
1053; CHECK:       # %bb.0:
1054; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1055; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
1056; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
1057; CHECK-NEXT:    vmovapd %ymm1, %ymm0
1058; CHECK-NEXT:    retq
1059  %vec2 = load <4 x double>, <4 x double>* %vec2p
1060  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1061  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1062  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
1063  ret <4 x double> %res
1064}
1065
1066define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
1067; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
1068; CHECK:       # %bb.0:
1069; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1070; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
1071; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
1072; CHECK-NEXT:    retq
1073  %vec2 = load <4 x double>, <4 x double>* %vec2p
1074  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1075  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1076  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1077  ret <4 x double> %res
1078}
1079
1080define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
1081; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
1082; CHECK:       # %bb.0:
1083; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1084; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
1085; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
1086; CHECK-NEXT:    vmovapd %ymm1, %ymm0
1087; CHECK-NEXT:    retq
1088  %vec2 = load <4 x double>, <4 x double>* %vec2p
1089  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1090  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1091  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
1092  ret <4 x double> %res
1093}
1094
1095define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
1096; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
1097; CHECK:       # %bb.0:
1098; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1099; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
1100; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
1101; CHECK-NEXT:    retq
1102  %vec2 = load <4 x double>, <4 x double>* %vec2p
1103  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1104  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1105  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1106  ret <4 x double> %res
1107}
1108
1109define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
1110; CHECK-LABEL: test_4xdouble_unpack_low_mem_mask3:
1111; CHECK:       # %bb.0:
1112; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
1113; CHECK-NEXT:    retq
1114  %vec2 = load <4 x double>, <4 x double>* %vec2p
1115  %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1116  ret <4 x double> %res
1117}
1118define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
1119; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
1120; CHECK:       # %bb.0:
1121; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1122; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
1123; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
1124; CHECK-NEXT:    vmovapd %ymm1, %ymm0
1125; CHECK-NEXT:    retq
1126  %vec2 = load <4 x double>, <4 x double>* %vec2p
1127  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1128  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1129  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
1130  ret <4 x double> %res
1131}
1132
1133define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
1134; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
1135; CHECK:       # %bb.0:
1136; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1137; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
1138; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
1139; CHECK-NEXT:    retq
1140  %vec2 = load <4 x double>, <4 x double>* %vec2p
1141  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1142  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1143  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1144  ret <4 x double> %res
1145}
1146
1147define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) {
1148; CHECK-LABEL: test_8xdouble_unpack_low_mask0:
1149; CHECK:       # %bb.0:
1150; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1151; CHECK-NEXT:    retq
1152  %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1153  ret <8 x double> %res
1154}
1155define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
1156; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask0:
1157; CHECK:       # %bb.0:
1158; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
1159; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
1160; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1161; CHECK-NEXT:    vmovapd %zmm2, %zmm0
1162; CHECK-NEXT:    retq
1163  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1164  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1165  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1166  ret <8 x double> %res
1167}
1168
1169define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
1170; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask0:
1171; CHECK:       # %bb.0:
1172; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1173; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
1174; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1175; CHECK-NEXT:    retq
1176  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1177  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1178  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1179  ret <8 x double> %res
1180}
1181define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
1182; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask1:
1183; CHECK:       # %bb.0:
1184; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
1185; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
1186; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1187; CHECK-NEXT:    vmovapd %zmm2, %zmm0
1188; CHECK-NEXT:    retq
1189  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1190  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1191  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1192  ret <8 x double> %res
1193}
1194
1195define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
1196; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask1:
1197; CHECK:       # %bb.0:
1198; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1199; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
1200; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1201; CHECK-NEXT:    retq
1202  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1203  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1204  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1205  ret <8 x double> %res
1206}
1207define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
1208; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask2:
1209; CHECK:       # %bb.0:
1210; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
1211; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
1212; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1213; CHECK-NEXT:    vmovapd %zmm2, %zmm0
1214; CHECK-NEXT:    retq
1215  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1216  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1217  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1218  ret <8 x double> %res
1219}
1220
1221define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
1222; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask2:
1223; CHECK:       # %bb.0:
1224; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1225; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
1226; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1227; CHECK-NEXT:    retq
1228  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1229  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1230  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1231  ret <8 x double> %res
1232}
1233define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) {
1234; CHECK-LABEL: test_8xdouble_unpack_low_mask3:
1235; CHECK:       # %bb.0:
1236; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1237; CHECK-NEXT:    retq
1238  %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1239  ret <8 x double> %res
1240}
1241define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
1242; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask3:
1243; CHECK:       # %bb.0:
1244; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
1245; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
1246; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1247; CHECK-NEXT:    vmovapd %zmm2, %zmm0
1248; CHECK-NEXT:    retq
1249  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1250  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1251  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1252  ret <8 x double> %res
1253}
1254
1255define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
1256; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask3:
1257; CHECK:       # %bb.0:
1258; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1259; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
1260; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1261; CHECK-NEXT:    retq
1262  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1263  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1264  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1265  ret <8 x double> %res
1266}
1267define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
1268; CHECK-LABEL: test_8xdouble_unpack_low_mem_mask0:
1269; CHECK:       # %bb.0:
1270; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1271; CHECK-NEXT:    retq
1272  %vec2 = load <8 x double>, <8 x double>* %vec2p
1273  %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1274  ret <8 x double> %res
1275}
1276define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
1277; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
1278; CHECK:       # %bb.0:
1279; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1280; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
1281; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1282; CHECK-NEXT:    vmovapd %zmm1, %zmm0
1283; CHECK-NEXT:    retq
1284  %vec2 = load <8 x double>, <8 x double>* %vec2p
1285  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1286  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1287  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1288  ret <8 x double> %res
1289}
1290
1291define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
1292; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
1293; CHECK:       # %bb.0:
1294; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1295; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
1296; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1297; CHECK-NEXT:    retq
1298  %vec2 = load <8 x double>, <8 x double>* %vec2p
1299  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1300  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1301  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1302  ret <8 x double> %res
1303}
1304
1305define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
1306; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
1307; CHECK:       # %bb.0:
1308; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1309; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
1310; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1311; CHECK-NEXT:    vmovapd %zmm1, %zmm0
1312; CHECK-NEXT:    retq
1313  %vec2 = load <8 x double>, <8 x double>* %vec2p
1314  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1315  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1316  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1317  ret <8 x double> %res
1318}
1319
1320define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
1321; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
1322; CHECK:       # %bb.0:
1323; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1324; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
1325; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1326; CHECK-NEXT:    retq
1327  %vec2 = load <8 x double>, <8 x double>* %vec2p
1328  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1329  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1330  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1331  ret <8 x double> %res
1332}
1333
1334define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
1335; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
1336; CHECK:       # %bb.0:
1337; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1338; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
1339; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1340; CHECK-NEXT:    vmovapd %zmm1, %zmm0
1341; CHECK-NEXT:    retq
1342  %vec2 = load <8 x double>, <8 x double>* %vec2p
1343  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1344  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1345  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1346  ret <8 x double> %res
1347}
1348
1349define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
1350; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
1351; CHECK:       # %bb.0:
1352; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1353; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
1354; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1355; CHECK-NEXT:    retq
1356  %vec2 = load <8 x double>, <8 x double>* %vec2p
1357  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1358  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1359  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1360  ret <8 x double> %res
1361}
1362
1363define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
1364; CHECK-LABEL: test_8xdouble_unpack_low_mem_mask3:
1365; CHECK:       # %bb.0:
1366; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1367; CHECK-NEXT:    retq
1368  %vec2 = load <8 x double>, <8 x double>* %vec2p
1369  %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1370  ret <8 x double> %res
1371}
1372define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
1373; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
1374; CHECK:       # %bb.0:
1375; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
1376; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
1377; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1378; CHECK-NEXT:    vmovapd %zmm1, %zmm0
1379; CHECK-NEXT:    retq
1380  %vec2 = load <8 x double>, <8 x double>* %vec2p
1381  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1382  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1383  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1384  ret <8 x double> %res
1385}
1386
1387define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
1388; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
1389; CHECK:       # %bb.0:
1390; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1391; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
1392; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1393; CHECK-NEXT:    retq
1394  %vec2 = load <8 x double>, <8 x double>* %vec2p
1395  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1396  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1397  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1398  ret <8 x double> %res
1399}
1400
1401define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2) {
1402; CHECK-LABEL: test_4xfloat_unpack_high_mask0:
1403; CHECK:       # %bb.0:
1404; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1405; CHECK-NEXT:    retq
1406  %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1407  ret <4 x float> %res
1408}
1409define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
1410; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask0:
1411; CHECK:       # %bb.0:
1412; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1413; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
1414; CHECK-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1415; CHECK-NEXT:    vmovaps %xmm2, %xmm0
1416; CHECK-NEXT:    retq
1417  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1418  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1419  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1420  ret <4 x float> %res
1421}
1422
1423define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
1424; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask0:
1425; CHECK:       # %bb.0:
1426; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1427; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
1428; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1429; CHECK-NEXT:    retq
1430  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1431  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1432  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1433  ret <4 x float> %res
1434}
1435define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
1436; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask1:
1437; CHECK:       # %bb.0:
1438; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1439; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
1440; CHECK-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1441; CHECK-NEXT:    vmovaps %xmm2, %xmm0
1442; CHECK-NEXT:    retq
1443  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1444  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1445  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1446  ret <4 x float> %res
1447}
1448
1449define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
1450; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask1:
1451; CHECK:       # %bb.0:
1452; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1453; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
1454; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1455; CHECK-NEXT:    retq
1456  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1457  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1458  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1459  ret <4 x float> %res
1460}
1461define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
1462; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask2:
1463; CHECK:       # %bb.0:
1464; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1465; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
1466; CHECK-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1467; CHECK-NEXT:    vmovaps %xmm2, %xmm0
1468; CHECK-NEXT:    retq
1469  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1470  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1471  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1472  ret <4 x float> %res
1473}
1474
1475define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
1476; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask2:
1477; CHECK:       # %bb.0:
1478; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1479; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
1480; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1481; CHECK-NEXT:    retq
1482  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1483  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1484  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1485  ret <4 x float> %res
1486}
1487define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2) {
1488; CHECK-LABEL: test_4xfloat_unpack_high_mask3:
1489; CHECK:       # %bb.0:
1490; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1491; CHECK-NEXT:    retq
1492  %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1493  ret <4 x float> %res
1494}
1495define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
1496; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask3:
1497; CHECK:       # %bb.0:
1498; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1499; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
1500; CHECK-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1501; CHECK-NEXT:    vmovaps %xmm2, %xmm0
1502; CHECK-NEXT:    retq
1503  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1504  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1505  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1506  ret <4 x float> %res
1507}
1508
1509define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
1510; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask3:
1511; CHECK:       # %bb.0:
1512; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1513; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
1514; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1515; CHECK-NEXT:    retq
1516  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1517  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1518  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1519  ret <4 x float> %res
1520}
1521define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) {
1522; CHECK-LABEL: test_4xfloat_unpack_high_mem_mask0:
1523; CHECK:       # %bb.0:
1524; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
1525; CHECK-NEXT:    retq
1526  %vec2 = load <4 x float>, <4 x float>* %vec2p
1527  %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1528  ret <4 x float> %res
1529}
1530define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
1531; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
1532; CHECK:       # %bb.0:
1533; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1534; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
1535; CHECK-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
1536; CHECK-NEXT:    vmovaps %xmm1, %xmm0
1537; CHECK-NEXT:    retq
1538  %vec2 = load <4 x float>, <4 x float>* %vec2p
1539  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1540  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1541  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1542  ret <4 x float> %res
1543}
1544
1545define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
1546; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
1547; CHECK:       # %bb.0:
1548; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1549; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
1550; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
1551; CHECK-NEXT:    retq
1552  %vec2 = load <4 x float>, <4 x float>* %vec2p
1553  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1554  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1555  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1556  ret <4 x float> %res
1557}
1558
1559define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
1560; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
1561; CHECK:       # %bb.0:
1562; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1563; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
1564; CHECK-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
1565; CHECK-NEXT:    vmovaps %xmm1, %xmm0
1566; CHECK-NEXT:    retq
1567  %vec2 = load <4 x float>, <4 x float>* %vec2p
1568  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1569  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1570  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1571  ret <4 x float> %res
1572}
1573
1574define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
1575; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
1576; CHECK:       # %bb.0:
1577; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1578; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
1579; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
1580; CHECK-NEXT:    retq
1581  %vec2 = load <4 x float>, <4 x float>* %vec2p
1582  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1583  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1584  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1585  ret <4 x float> %res
1586}
1587
1588define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
1589; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
1590; CHECK:       # %bb.0:
1591; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1592; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
1593; CHECK-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
1594; CHECK-NEXT:    vmovaps %xmm1, %xmm0
1595; CHECK-NEXT:    retq
1596  %vec2 = load <4 x float>, <4 x float>* %vec2p
1597  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1598  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1599  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1600  ret <4 x float> %res
1601}
1602
1603define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
1604; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
1605; CHECK:       # %bb.0:
1606; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1607; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
1608; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
1609; CHECK-NEXT:    retq
1610  %vec2 = load <4 x float>, <4 x float>* %vec2p
1611  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1612  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1613  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1614  ret <4 x float> %res
1615}
1616
1617define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) {
1618; CHECK-LABEL: test_4xfloat_unpack_high_mem_mask3:
1619; CHECK:       # %bb.0:
1620; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
1621; CHECK-NEXT:    retq
1622  %vec2 = load <4 x float>, <4 x float>* %vec2p
1623  %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1624  ret <4 x float> %res
1625}
1626define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
1627; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
1628; CHECK:       # %bb.0:
1629; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1630; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
1631; CHECK-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
1632; CHECK-NEXT:    vmovaps %xmm1, %xmm0
1633; CHECK-NEXT:    retq
1634  %vec2 = load <4 x float>, <4 x float>* %vec2p
1635  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1636  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1637  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1638  ret <4 x float> %res
1639}
1640
1641define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
1642; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
1643; CHECK:       # %bb.0:
1644; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1645; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
1646; CHECK-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
1647; CHECK-NEXT:    retq
1648  %vec2 = load <4 x float>, <4 x float>* %vec2p
1649  %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1650  %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1651  %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1652  ret <4 x float> %res
1653}
1654
1655define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2) {
1656; CHECK-LABEL: test_8xfloat_unpack_high_mask0:
1657; CHECK:       # %bb.0:
1658; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1659; CHECK-NEXT:    retq
1660  %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1661  ret <8 x float> %res
1662}
1663define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
1664; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask0:
1665; CHECK:       # %bb.0:
1666; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1667; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
1668; CHECK-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1669; CHECK-NEXT:    vmovaps %ymm2, %ymm0
1670; CHECK-NEXT:    retq
1671  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1672  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1673  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1674  ret <8 x float> %res
1675}
1676
1677define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
1678; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask0:
1679; CHECK:       # %bb.0:
1680; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1681; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
1682; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1683; CHECK-NEXT:    retq
1684  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1685  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1686  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1687  ret <8 x float> %res
1688}
1689define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
1690; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask1:
1691; CHECK:       # %bb.0:
1692; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1693; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
1694; CHECK-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1695; CHECK-NEXT:    vmovaps %ymm2, %ymm0
1696; CHECK-NEXT:    retq
1697  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1698  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1699  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1700  ret <8 x float> %res
1701}
1702
1703define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
1704; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask1:
1705; CHECK:       # %bb.0:
1706; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1707; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
1708; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1709; CHECK-NEXT:    retq
1710  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1711  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1712  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1713  ret <8 x float> %res
1714}
1715define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
1716; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask2:
1717; CHECK:       # %bb.0:
1718; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1719; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
1720; CHECK-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1721; CHECK-NEXT:    vmovaps %ymm2, %ymm0
1722; CHECK-NEXT:    retq
1723  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1724  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1725  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1726  ret <8 x float> %res
1727}
1728
1729define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
1730; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask2:
1731; CHECK:       # %bb.0:
1732; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1733; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
1734; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1735; CHECK-NEXT:    retq
1736  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1737  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1738  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1739  ret <8 x float> %res
1740}
1741define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2) {
1742; CHECK-LABEL: test_8xfloat_unpack_high_mask3:
1743; CHECK:       # %bb.0:
1744; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1745; CHECK-NEXT:    retq
1746  %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1747  ret <8 x float> %res
1748}
1749define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
1750; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask3:
1751; CHECK:       # %bb.0:
1752; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1753; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
1754; CHECK-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1755; CHECK-NEXT:    vmovaps %ymm2, %ymm0
1756; CHECK-NEXT:    retq
1757  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1758  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1759  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1760  ret <8 x float> %res
1761}
1762
1763define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
1764; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask3:
1765; CHECK:       # %bb.0:
1766; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1767; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
1768; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1769; CHECK-NEXT:    retq
1770  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1771  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1772  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1773  ret <8 x float> %res
1774}
1775define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
1776; CHECK-LABEL: test_8xfloat_unpack_high_mem_mask0:
1777; CHECK:       # %bb.0:
1778; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1779; CHECK-NEXT:    retq
1780  %vec2 = load <8 x float>, <8 x float>* %vec2p
1781  %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1782  ret <8 x float> %res
1783}
1784define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
1785; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
1786; CHECK:       # %bb.0:
1787; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1788; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
1789; CHECK-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1790; CHECK-NEXT:    vmovaps %ymm1, %ymm0
1791; CHECK-NEXT:    retq
1792  %vec2 = load <8 x float>, <8 x float>* %vec2p
1793  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1794  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1795  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1796  ret <8 x float> %res
1797}
1798
1799define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
1800; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
1801; CHECK:       # %bb.0:
1802; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1803; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
1804; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1805; CHECK-NEXT:    retq
1806  %vec2 = load <8 x float>, <8 x float>* %vec2p
1807  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1808  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1809  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1810  ret <8 x float> %res
1811}
1812
1813define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
1814; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
1815; CHECK:       # %bb.0:
1816; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1817; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
1818; CHECK-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1819; CHECK-NEXT:    vmovaps %ymm1, %ymm0
1820; CHECK-NEXT:    retq
1821  %vec2 = load <8 x float>, <8 x float>* %vec2p
1822  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1823  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1824  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1825  ret <8 x float> %res
1826}
1827
1828define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
1829; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
1830; CHECK:       # %bb.0:
1831; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1832; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
1833; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1834; CHECK-NEXT:    retq
1835  %vec2 = load <8 x float>, <8 x float>* %vec2p
1836  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1837  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1838  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1839  ret <8 x float> %res
1840}
1841
1842define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
1843; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
1844; CHECK:       # %bb.0:
1845; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1846; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
1847; CHECK-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1848; CHECK-NEXT:    vmovaps %ymm1, %ymm0
1849; CHECK-NEXT:    retq
1850  %vec2 = load <8 x float>, <8 x float>* %vec2p
1851  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1852  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1853  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1854  ret <8 x float> %res
1855}
1856
1857define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
1858; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
1859; CHECK:       # %bb.0:
1860; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1861; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
1862; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1863; CHECK-NEXT:    retq
1864  %vec2 = load <8 x float>, <8 x float>* %vec2p
1865  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1866  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1867  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1868  ret <8 x float> %res
1869}
1870
1871define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
1872; CHECK-LABEL: test_8xfloat_unpack_high_mem_mask3:
1873; CHECK:       # %bb.0:
1874; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1875; CHECK-NEXT:    retq
1876  %vec2 = load <8 x float>, <8 x float>* %vec2p
1877  %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1878  ret <8 x float> %res
1879}
1880define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
1881; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
1882; CHECK:       # %bb.0:
1883; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1884; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
1885; CHECK-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1886; CHECK-NEXT:    vmovaps %ymm1, %ymm0
1887; CHECK-NEXT:    retq
1888  %vec2 = load <8 x float>, <8 x float>* %vec2p
1889  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1890  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1891  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1892  ret <8 x float> %res
1893}
1894
1895define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
1896; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
1897; CHECK:       # %bb.0:
1898; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
1899; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
1900; CHECK-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1901; CHECK-NEXT:    retq
1902  %vec2 = load <8 x float>, <8 x float>* %vec2p
1903  %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1904  %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1905  %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1906  ret <8 x float> %res
1907}
1908
1909define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) {
1910; CHECK-LABEL: test_16xfloat_unpack_high_mask0:
1911; CHECK:       # %bb.0:
1912; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1913; CHECK-NEXT:    retq
1914  %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1915  ret <16 x float> %res
1916}
1917define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
1918; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask0:
1919; CHECK:       # %bb.0:
1920; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1921; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
1922; CHECK-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1923; CHECK-NEXT:    vmovaps %zmm2, %zmm0
1924; CHECK-NEXT:    retq
1925  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1926  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1927  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
1928  ret <16 x float> %res
1929}
1930
1931define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
1932; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask0:
1933; CHECK:       # %bb.0:
1934; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1935; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
1936; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1937; CHECK-NEXT:    retq
1938  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1939  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1940  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1941  ret <16 x float> %res
1942}
1943define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
1944; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask1:
1945; CHECK:       # %bb.0:
1946; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1947; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
1948; CHECK-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1949; CHECK-NEXT:    vmovaps %zmm2, %zmm0
1950; CHECK-NEXT:    retq
1951  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1952  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1953  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
1954  ret <16 x float> %res
1955}
1956
1957define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
1958; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask1:
1959; CHECK:       # %bb.0:
1960; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1961; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
1962; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1963; CHECK-NEXT:    retq
1964  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1965  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1966  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1967  ret <16 x float> %res
1968}
1969define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
1970; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask2:
1971; CHECK:       # %bb.0:
1972; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
1973; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
1974; CHECK-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1975; CHECK-NEXT:    vmovaps %zmm2, %zmm0
1976; CHECK-NEXT:    retq
1977  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1978  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1979  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
1980  ret <16 x float> %res
1981}
1982
1983define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
1984; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask2:
1985; CHECK:       # %bb.0:
1986; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
1987; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
1988; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1989; CHECK-NEXT:    retq
1990  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1991  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1992  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1993  ret <16 x float> %res
1994}
1995define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) {
1996; CHECK-LABEL: test_16xfloat_unpack_high_mask3:
1997; CHECK:       # %bb.0:
1998; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1999; CHECK-NEXT:    retq
2000  %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2001  ret <16 x float> %res
2002}
2003define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
2004; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask3:
2005; CHECK:       # %bb.0:
2006; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
2007; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
2008; CHECK-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
2009; CHECK-NEXT:    vmovaps %zmm2, %zmm0
2010; CHECK-NEXT:    retq
2011  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2012  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2013  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2014  ret <16 x float> %res
2015}
2016
2017define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
2018; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask3:
2019; CHECK:       # %bb.0:
2020; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
2021; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
2022; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
2023; CHECK-NEXT:    retq
2024  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2025  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2026  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2027  ret <16 x float> %res
2028}
2029define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
2030; CHECK-LABEL: test_16xfloat_unpack_high_mem_mask0:
2031; CHECK:       # %bb.0:
2032; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2033; CHECK-NEXT:    retq
2034  %vec2 = load <16 x float>, <16 x float>* %vec2p
2035  %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2036  ret <16 x float> %res
2037}
2038define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
2039; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
2040; CHECK:       # %bb.0:
2041; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
2042; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
2043; CHECK-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2044; CHECK-NEXT:    vmovaps %zmm1, %zmm0
2045; CHECK-NEXT:    retq
2046  %vec2 = load <16 x float>, <16 x float>* %vec2p
2047  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2048  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2049  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2050  ret <16 x float> %res
2051}
2052
2053define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
2054; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
2055; CHECK:       # %bb.0:
2056; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
2057; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
2058; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2059; CHECK-NEXT:    retq
2060  %vec2 = load <16 x float>, <16 x float>* %vec2p
2061  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2062  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2063  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2064  ret <16 x float> %res
2065}
2066
2067define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
2068; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
2069; CHECK:       # %bb.0:
2070; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
2071; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
2072; CHECK-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2073; CHECK-NEXT:    vmovaps %zmm1, %zmm0
2074; CHECK-NEXT:    retq
2075  %vec2 = load <16 x float>, <16 x float>* %vec2p
2076  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2077  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2078  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2079  ret <16 x float> %res
2080}
2081
2082define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
2083; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
2084; CHECK:       # %bb.0:
2085; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
2086; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
2087; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2088; CHECK-NEXT:    retq
2089  %vec2 = load <16 x float>, <16 x float>* %vec2p
2090  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2091  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2092  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2093  ret <16 x float> %res
2094}
2095
2096define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
2097; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
2098; CHECK:       # %bb.0:
2099; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
2100; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
2101; CHECK-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2102; CHECK-NEXT:    vmovaps %zmm1, %zmm0
2103; CHECK-NEXT:    retq
2104  %vec2 = load <16 x float>, <16 x float>* %vec2p
2105  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2106  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2107  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2108  ret <16 x float> %res
2109}
2110
2111define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
2112; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
2113; CHECK:       # %bb.0:
2114; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
2115; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
2116; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2117; CHECK-NEXT:    retq
2118  %vec2 = load <16 x float>, <16 x float>* %vec2p
2119  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2120  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2121  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2122  ret <16 x float> %res
2123}
2124
2125define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
2126; CHECK-LABEL: test_16xfloat_unpack_high_mem_mask3:
2127; CHECK:       # %bb.0:
2128; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2129; CHECK-NEXT:    retq
2130  %vec2 = load <16 x float>, <16 x float>* %vec2p
2131  %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2132  ret <16 x float> %res
2133}
2134define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
2135; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
2136; CHECK:       # %bb.0:
2137; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
2138; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
2139; CHECK-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2140; CHECK-NEXT:    vmovaps %zmm1, %zmm0
2141; CHECK-NEXT:    retq
2142  %vec2 = load <16 x float>, <16 x float>* %vec2p
2143  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2144  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2145  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2146  ret <16 x float> %res
2147}
2148
2149define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
2150; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
2151; CHECK:       # %bb.0:
2152; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
2153; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
2154; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2155; CHECK-NEXT:    retq
2156  %vec2 = load <16 x float>, <16 x float>* %vec2p
2157  %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2158  %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2159  %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2160  ret <16 x float> %res
2161}
2162
2163define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2) {
2164; CHECK-LABEL: test_2xdouble_unpack_high_mask0:
2165; CHECK:       # %bb.0:
2166; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2167; CHECK-NEXT:    retq
2168  %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2169  ret <2 x double> %res
2170}
2171define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
2172; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask0:
2173; CHECK:       # %bb.0:
2174; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2175; CHECK-NEXT:    vcmpeqpd %xmm4, %xmm3, %k1
2176; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
2177; CHECK-NEXT:    vmovapd %xmm2, %xmm0
2178; CHECK-NEXT:    retq
2179  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2180  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2181  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
2182  ret <2 x double> %res
2183}
2184
2185define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
2186; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask0:
2187; CHECK:       # %bb.0:
2188; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2189; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
2190; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
2191; CHECK-NEXT:    retq
2192  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2193  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2194  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
2195  ret <2 x double> %res
2196}
2197define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
2198; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask1:
2199; CHECK:       # %bb.0:
2200; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2201; CHECK-NEXT:    vcmpeqpd %xmm4, %xmm3, %k1
2202; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
2203; CHECK-NEXT:    vmovapd %xmm2, %xmm0
2204; CHECK-NEXT:    retq
2205  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2206  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2207  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
2208  ret <2 x double> %res
2209}
2210
2211define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
2212; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask1:
2213; CHECK:       # %bb.0:
2214; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2215; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
2216; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
2217; CHECK-NEXT:    retq
2218  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2219  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2220  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
2221  ret <2 x double> %res
2222}
2223define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) {
2224; CHECK-LABEL: test_2xdouble_unpack_high_mem_mask0:
2225; CHECK:       # %bb.0:
2226; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
2227; CHECK-NEXT:    retq
2228  %vec2 = load <2 x double>, <2 x double>* %vec2p
2229  %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2230  ret <2 x double> %res
2231}
2232define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
2233; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
2234; CHECK:       # %bb.0:
2235; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2236; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
2237; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1]
2238; CHECK-NEXT:    vmovapd %xmm1, %xmm0
2239; CHECK-NEXT:    retq
2240  %vec2 = load <2 x double>, <2 x double>* %vec2p
2241  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2242  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2243  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
2244  ret <2 x double> %res
2245}
2246
2247define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
2248; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
2249; CHECK:       # %bb.0:
2250; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2251; CHECK-NEXT:    vcmpeqpd %xmm2, %xmm1, %k1
2252; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1]
2253; CHECK-NEXT:    retq
2254  %vec2 = load <2 x double>, <2 x double>* %vec2p
2255  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2256  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2257  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
2258  ret <2 x double> %res
2259}
2260
2261define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
2262; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
2263; CHECK:       # %bb.0:
2264; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2265; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
2266; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1]
2267; CHECK-NEXT:    vmovapd %xmm1, %xmm0
2268; CHECK-NEXT:    retq
2269  %vec2 = load <2 x double>, <2 x double>* %vec2p
2270  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2271  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2272  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
2273  ret <2 x double> %res
2274}
2275
2276define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
2277; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
2278; CHECK:       # %bb.0:
2279; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2280; CHECK-NEXT:    vcmpeqpd %xmm2, %xmm1, %k1
2281; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1]
2282; CHECK-NEXT:    retq
2283  %vec2 = load <2 x double>, <2 x double>* %vec2p
2284  %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2285  %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2286  %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
2287  ret <2 x double> %res
2288}
2289
2290define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2) {
2291; CHECK-LABEL: test_4xdouble_unpack_high_mask0:
2292; CHECK:       # %bb.0:
2293; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2294; CHECK-NEXT:    retq
2295  %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2296  ret <4 x double> %res
2297}
2298define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
2299; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask0:
2300; CHECK:       # %bb.0:
2301; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2302; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
2303; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2304; CHECK-NEXT:    vmovapd %ymm2, %ymm0
2305; CHECK-NEXT:    retq
2306  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2307  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2308  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2309  ret <4 x double> %res
2310}
2311
2312define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
2313; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask0:
2314; CHECK:       # %bb.0:
2315; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2316; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
2317; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2318; CHECK-NEXT:    retq
2319  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2320  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2321  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2322  ret <4 x double> %res
2323}
2324define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
2325; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask1:
2326; CHECK:       # %bb.0:
2327; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2328; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
2329; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2330; CHECK-NEXT:    vmovapd %ymm2, %ymm0
2331; CHECK-NEXT:    retq
2332  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2333  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2334  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2335  ret <4 x double> %res
2336}
2337
2338define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
2339; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask1:
2340; CHECK:       # %bb.0:
2341; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2342; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
2343; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2344; CHECK-NEXT:    retq
2345  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2346  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2347  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2348  ret <4 x double> %res
2349}
2350define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
2351; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask2:
2352; CHECK:       # %bb.0:
2353; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2354; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
2355; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2356; CHECK-NEXT:    vmovapd %ymm2, %ymm0
2357; CHECK-NEXT:    retq
2358  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2359  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2360  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2361  ret <4 x double> %res
2362}
2363
2364define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
2365; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask2:
2366; CHECK:       # %bb.0:
2367; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2368; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
2369; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2370; CHECK-NEXT:    retq
2371  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2372  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2373  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2374  ret <4 x double> %res
2375}
2376define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2) {
2377; CHECK-LABEL: test_4xdouble_unpack_high_mask3:
2378; CHECK:       # %bb.0:
2379; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2380; CHECK-NEXT:    retq
2381  %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2382  ret <4 x double> %res
2383}
2384define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
2385; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask3:
2386; CHECK:       # %bb.0:
2387; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2388; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
2389; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2390; CHECK-NEXT:    vmovapd %ymm2, %ymm0
2391; CHECK-NEXT:    retq
2392  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2393  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2394  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2395  ret <4 x double> %res
2396}
2397
2398define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
2399; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask3:
2400; CHECK:       # %bb.0:
2401; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2402; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
2403; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2404; CHECK-NEXT:    retq
2405  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2406  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2407  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2408  ret <4 x double> %res
2409}
2410define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
2411; CHECK-LABEL: test_4xdouble_unpack_high_mem_mask0:
2412; CHECK:       # %bb.0:
2413; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
2414; CHECK-NEXT:    retq
2415  %vec2 = load <4 x double>, <4 x double>* %vec2p
2416  %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2417  ret <4 x double> %res
2418}
2419define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
2420; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
2421; CHECK:       # %bb.0:
2422; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2423; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
2424; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
2425; CHECK-NEXT:    vmovapd %ymm1, %ymm0
2426; CHECK-NEXT:    retq
2427  %vec2 = load <4 x double>, <4 x double>* %vec2p
2428  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2429  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2430  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2431  ret <4 x double> %res
2432}
2433
2434define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
2435; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
2436; CHECK:       # %bb.0:
2437; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2438; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
2439; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
2440; CHECK-NEXT:    retq
2441  %vec2 = load <4 x double>, <4 x double>* %vec2p
2442  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2443  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2444  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2445  ret <4 x double> %res
2446}
2447
2448define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
2449; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
2450; CHECK:       # %bb.0:
2451; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2452; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
2453; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
2454; CHECK-NEXT:    vmovapd %ymm1, %ymm0
2455; CHECK-NEXT:    retq
2456  %vec2 = load <4 x double>, <4 x double>* %vec2p
2457  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2458  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2459  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2460  ret <4 x double> %res
2461}
2462
2463define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
2464; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
2465; CHECK:       # %bb.0:
2466; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2467; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
2468; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
2469; CHECK-NEXT:    retq
2470  %vec2 = load <4 x double>, <4 x double>* %vec2p
2471  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2472  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2473  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2474  ret <4 x double> %res
2475}
2476
2477define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
2478; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
2479; CHECK:       # %bb.0:
2480; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2481; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
2482; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
2483; CHECK-NEXT:    vmovapd %ymm1, %ymm0
2484; CHECK-NEXT:    retq
2485  %vec2 = load <4 x double>, <4 x double>* %vec2p
2486  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2487  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2488  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2489  ret <4 x double> %res
2490}
2491
2492define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
2493; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
2494; CHECK:       # %bb.0:
2495; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2496; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
2497; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
2498; CHECK-NEXT:    retq
2499  %vec2 = load <4 x double>, <4 x double>* %vec2p
2500  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2501  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2502  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2503  ret <4 x double> %res
2504}
2505
2506define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
2507; CHECK-LABEL: test_4xdouble_unpack_high_mem_mask3:
2508; CHECK:       # %bb.0:
2509; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
2510; CHECK-NEXT:    retq
2511  %vec2 = load <4 x double>, <4 x double>* %vec2p
2512  %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2513  ret <4 x double> %res
2514}
2515define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
2516; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
2517; CHECK:       # %bb.0:
2518; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2519; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
2520; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
2521; CHECK-NEXT:    vmovapd %ymm1, %ymm0
2522; CHECK-NEXT:    retq
2523  %vec2 = load <4 x double>, <4 x double>* %vec2p
2524  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2525  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2526  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2527  ret <4 x double> %res
2528}
2529
2530define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
2531; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
2532; CHECK:       # %bb.0:
2533; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2534; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
2535; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
2536; CHECK-NEXT:    retq
2537  %vec2 = load <4 x double>, <4 x double>* %vec2p
2538  %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2539  %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2540  %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2541  ret <4 x double> %res
2542}
2543
2544define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) {
2545; CHECK-LABEL: test_8xdouble_unpack_high_mask0:
2546; CHECK:       # %bb.0:
2547; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2548; CHECK-NEXT:    retq
2549  %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2550  ret <8 x double> %res
2551}
2552define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
2553; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask0:
2554; CHECK:       # %bb.0:
2555; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2556; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
2557; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2558; CHECK-NEXT:    vmovapd %zmm2, %zmm0
2559; CHECK-NEXT:    retq
2560  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2561  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2562  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2563  ret <8 x double> %res
2564}
2565
2566define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
2567; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask0:
2568; CHECK:       # %bb.0:
2569; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2570; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
2571; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2572; CHECK-NEXT:    retq
2573  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2574  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2575  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2576  ret <8 x double> %res
2577}
2578define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
2579; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask1:
2580; CHECK:       # %bb.0:
2581; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2582; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
2583; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2584; CHECK-NEXT:    vmovapd %zmm2, %zmm0
2585; CHECK-NEXT:    retq
2586  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2587  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2588  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2589  ret <8 x double> %res
2590}
2591
2592define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
2593; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask1:
2594; CHECK:       # %bb.0:
2595; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2596; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
2597; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2598; CHECK-NEXT:    retq
2599  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2600  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2601  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2602  ret <8 x double> %res
2603}
2604define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
2605; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask2:
2606; CHECK:       # %bb.0:
2607; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2608; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
2609; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2610; CHECK-NEXT:    vmovapd %zmm2, %zmm0
2611; CHECK-NEXT:    retq
2612  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2613  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2614  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2615  ret <8 x double> %res
2616}
2617
2618define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
2619; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask2:
2620; CHECK:       # %bb.0:
2621; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2622; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
2623; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2624; CHECK-NEXT:    retq
2625  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2626  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2627  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2628  ret <8 x double> %res
2629}
2630define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) {
2631; CHECK-LABEL: test_8xdouble_unpack_high_mask3:
2632; CHECK:       # %bb.0:
2633; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2634; CHECK-NEXT:    retq
2635  %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2636  ret <8 x double> %res
2637}
2638define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
2639; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask3:
2640; CHECK:       # %bb.0:
2641; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
2642; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
2643; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2644; CHECK-NEXT:    vmovapd %zmm2, %zmm0
2645; CHECK-NEXT:    retq
2646  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2647  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2648  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2649  ret <8 x double> %res
2650}
2651
2652define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
2653; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask3:
2654; CHECK:       # %bb.0:
2655; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2656; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
2657; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2658; CHECK-NEXT:    retq
2659  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2660  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2661  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2662  ret <8 x double> %res
2663}
2664define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
2665; CHECK-LABEL: test_8xdouble_unpack_high_mem_mask0:
2666; CHECK:       # %bb.0:
2667; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2668; CHECK-NEXT:    retq
2669  %vec2 = load <8 x double>, <8 x double>* %vec2p
2670  %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2671  ret <8 x double> %res
2672}
2673define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
2674; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
2675; CHECK:       # %bb.0:
2676; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2677; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
2678; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2679; CHECK-NEXT:    vmovapd %zmm1, %zmm0
2680; CHECK-NEXT:    retq
2681  %vec2 = load <8 x double>, <8 x double>* %vec2p
2682  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2683  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2684  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2685  ret <8 x double> %res
2686}
2687
2688define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
2689; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
2690; CHECK:       # %bb.0:
2691; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2692; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
2693; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2694; CHECK-NEXT:    retq
2695  %vec2 = load <8 x double>, <8 x double>* %vec2p
2696  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2697  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2698  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2699  ret <8 x double> %res
2700}
2701
2702define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
2703; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
2704; CHECK:       # %bb.0:
2705; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2706; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
2707; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2708; CHECK-NEXT:    vmovapd %zmm1, %zmm0
2709; CHECK-NEXT:    retq
2710  %vec2 = load <8 x double>, <8 x double>* %vec2p
2711  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2712  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2713  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2714  ret <8 x double> %res
2715}
2716
2717define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
2718; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
2719; CHECK:       # %bb.0:
2720; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2721; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
2722; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2723; CHECK-NEXT:    retq
2724  %vec2 = load <8 x double>, <8 x double>* %vec2p
2725  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2726  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2727  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2728  ret <8 x double> %res
2729}
2730
2731define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
2732; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
2733; CHECK:       # %bb.0:
2734; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2735; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
2736; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2737; CHECK-NEXT:    vmovapd %zmm1, %zmm0
2738; CHECK-NEXT:    retq
2739  %vec2 = load <8 x double>, <8 x double>* %vec2p
2740  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2741  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2742  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2743  ret <8 x double> %res
2744}
2745
2746define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
2747; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
2748; CHECK:       # %bb.0:
2749; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2750; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
2751; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2752; CHECK-NEXT:    retq
2753  %vec2 = load <8 x double>, <8 x double>* %vec2p
2754  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2755  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2756  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2757  ret <8 x double> %res
2758}
2759
2760define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
2761; CHECK-LABEL: test_8xdouble_unpack_high_mem_mask3:
2762; CHECK:       # %bb.0:
2763; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2764; CHECK-NEXT:    retq
2765  %vec2 = load <8 x double>, <8 x double>* %vec2p
2766  %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2767  ret <8 x double> %res
2768}
2769define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
2770; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
2771; CHECK:       # %bb.0:
2772; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
2773; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
2774; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2775; CHECK-NEXT:    vmovapd %zmm1, %zmm0
2776; CHECK-NEXT:    retq
2777  %vec2 = load <8 x double>, <8 x double>* %vec2p
2778  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2779  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2780  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2781  ret <8 x double> %res
2782}
2783
2784define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
2785; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
2786; CHECK:       # %bb.0:
2787; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
2788; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
2789; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2790; CHECK-NEXT:    retq
2791  %vec2 = load <8 x double>, <8 x double>* %vec2p
2792  %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2793  %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2794  %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2795  ret <8 x double> %res
2796}
2797
2798