1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
3
4
5define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
6; SKX-LABEL: extract_subvector128_v32i16:
7; SKX:       ## %bb.0:
8; SKX-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
9; SKX-NEXT:    vzeroupper
10; SKX-NEXT:    retq
11  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
12  ret <8 x i16> %r1
13}
14
15define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind {
16; SKX-LABEL: extract_subvector128_v32i16_first_element:
17; SKX:       ## %bb.0:
18; SKX-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
19; SKX-NEXT:    vzeroupper
20; SKX-NEXT:    retq
21  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
22  ret <8 x i16> %r1
23}
24
25define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
26; SKX-LABEL: extract_subvector128_v64i8:
27; SKX:       ## %bb.0:
28; SKX-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
29; SKX-NEXT:    vzeroupper
30; SKX-NEXT:    retq
31  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
32  ret <16 x i8> %r1
33}
34
35define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind {
36; SKX-LABEL: extract_subvector128_v64i8_first_element:
37; SKX:       ## %bb.0:
38; SKX-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
39; SKX-NEXT:    vzeroupper
40; SKX-NEXT:    retq
41  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
42  ret <16 x i8> %r1
43}
44
45
46define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
47; SKX-LABEL: extract_subvector256_v32i16:
48; SKX:       ## %bb.0:
49; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
50; SKX-NEXT:    retq
51  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
52  ret <16 x i16> %r1
53}
54
55define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind {
56; SKX-LABEL: extract_subvector256_v64i8:
57; SKX:       ## %bb.0:
58; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
59; SKX-NEXT:    retq
60  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
61  ret <32 x i8> %r1
62}
63
64define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
65; SKX-LABEL: extract_subvector256_v8f64_store:
66; SKX:       ## %bb.0: ## %entry
67; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
68; SKX-NEXT:    vzeroupper
69; SKX-NEXT:    retq
70entry:
71  %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
72  %1 = bitcast double* %addr to <2 x double>*
73  store <2 x double> %0, <2 x double>* %1, align 1
74  ret void
75}
76
77define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
78; SKX-LABEL: extract_subvector256_v8f32_store:
79; SKX:       ## %bb.0: ## %entry
80; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
81; SKX-NEXT:    vzeroupper
82; SKX-NEXT:    retq
83entry:
84  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
85  %1 = bitcast float* %addr to <4 x float>*
86  store <4 x float> %0, <4 x float>* %1, align 1
87  ret void
88}
89
90define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
91; SKX-LABEL: extract_subvector256_v4i64_store:
92; SKX:       ## %bb.0: ## %entry
93; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
94; SKX-NEXT:    vzeroupper
95; SKX-NEXT:    retq
96entry:
97  %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
98  %1 = bitcast i64* %addr to <2 x i64>*
99  store <2 x i64> %0, <2 x i64>* %1, align 1
100  ret void
101}
102
103define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
104; SKX-LABEL: extract_subvector256_v8i32_store:
105; SKX:       ## %bb.0: ## %entry
106; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
107; SKX-NEXT:    vzeroupper
108; SKX-NEXT:    retq
109entry:
110  %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
111  %1 = bitcast i32* %addr to <4 x i32>*
112  store <4 x i32> %0, <4 x i32>* %1, align 1
113  ret void
114}
115
116define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
117; SKX-LABEL: extract_subvector256_v16i16_store:
118; SKX:       ## %bb.0: ## %entry
119; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
120; SKX-NEXT:    vzeroupper
121; SKX-NEXT:    retq
122entry:
123  %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
124  %1 = bitcast i16* %addr to <8 x i16>*
125  store <8 x i16> %0, <8 x i16>* %1, align 1
126  ret void
127}
128
129define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
130; SKX-LABEL: extract_subvector256_v32i8_store:
131; SKX:       ## %bb.0: ## %entry
132; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
133; SKX-NEXT:    vzeroupper
134; SKX-NEXT:    retq
135entry:
136  %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
137  %1 = bitcast i8* %addr to <16 x i8>*
138  store <16 x i8> %0, <16 x i8>* %1, align 1
139  ret void
140}
141
142define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
143; SKX-LABEL: extract_subvector256_v4f64_store_lo:
144; SKX:       ## %bb.0: ## %entry
145; SKX-NEXT:    vmovups %xmm0, (%rdi)
146; SKX-NEXT:    vzeroupper
147; SKX-NEXT:    retq
148entry:
149  %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
150  %1 = bitcast double* %addr to <2 x double>*
151  store <2 x double> %0, <2 x double>* %1, align 1
152  ret void
153}
154
155define void @extract_subvector256_v4f64_store_lo_align_16(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
156; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16:
157; SKX:       ## %bb.0: ## %entry
158; SKX-NEXT:    vmovaps %xmm0, (%rdi)
159; SKX-NEXT:    vzeroupper
160; SKX-NEXT:    retq
161entry:
162  %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
163  %1 = bitcast double* %addr to <2 x double>*
164  store <2 x double> %0, <2 x double>* %1, align 16
165  ret void
166}
167
168define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
169; SKX-LABEL: extract_subvector256_v4f32_store_lo:
170; SKX:       ## %bb.0: ## %entry
171; SKX-NEXT:    vmovups %xmm0, (%rdi)
172; SKX-NEXT:    vzeroupper
173; SKX-NEXT:    retq
174entry:
175  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
176  %1 = bitcast float* %addr to <4 x float>*
177  store <4 x float> %0, <4 x float>* %1, align 1
178  ret void
179}
180
181define void @extract_subvector256_v4f32_store_lo_align_16(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
182; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16:
183; SKX:       ## %bb.0: ## %entry
184; SKX-NEXT:    vmovaps %xmm0, (%rdi)
185; SKX-NEXT:    vzeroupper
186; SKX-NEXT:    retq
187entry:
188  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
189  %1 = bitcast float* %addr to <4 x float>*
190  store <4 x float> %0, <4 x float>* %1, align 16
191  ret void
192}
193
194define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
195; SKX-LABEL: extract_subvector256_v2i64_store_lo:
196; SKX:       ## %bb.0: ## %entry
197; SKX-NEXT:    vmovups %xmm0, (%rdi)
198; SKX-NEXT:    vzeroupper
199; SKX-NEXT:    retq
200entry:
201  %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
202  %1 = bitcast i64* %addr to <2 x i64>*
203  store <2 x i64> %0, <2 x i64>* %1, align 1
204  ret void
205}
206
207define void @extract_subvector256_v2i64_store_lo_align_16(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
208; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16:
209; SKX:       ## %bb.0: ## %entry
210; SKX-NEXT:    vmovaps %xmm0, (%rdi)
211; SKX-NEXT:    vzeroupper
212; SKX-NEXT:    retq
213entry:
214  %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
215  %1 = bitcast i64* %addr to <2 x i64>*
216  store <2 x i64> %0, <2 x i64>* %1, align 16
217  ret void
218}
219
220define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
221; SKX-LABEL: extract_subvector256_v4i32_store_lo:
222; SKX:       ## %bb.0: ## %entry
223; SKX-NEXT:    vmovups %xmm0, (%rdi)
224; SKX-NEXT:    vzeroupper
225; SKX-NEXT:    retq
226entry:
227  %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
228  %1 = bitcast i32* %addr to <4 x i32>*
229  store <4 x i32> %0, <4 x i32>* %1, align 1
230  ret void
231}
232
233define void @extract_subvector256_v4i32_store_lo_align_16(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
234; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16:
235; SKX:       ## %bb.0: ## %entry
236; SKX-NEXT:    vmovaps %xmm0, (%rdi)
237; SKX-NEXT:    vzeroupper
238; SKX-NEXT:    retq
239entry:
240  %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
241  %1 = bitcast i32* %addr to <4 x i32>*
242  store <4 x i32> %0, <4 x i32>* %1, align 16
243  ret void
244}
245
246define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
247; SKX-LABEL: extract_subvector256_v8i16_store_lo:
248; SKX:       ## %bb.0: ## %entry
249; SKX-NEXT:    vmovups %xmm0, (%rdi)
250; SKX-NEXT:    vzeroupper
251; SKX-NEXT:    retq
252entry:
253  %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
254  %1 = bitcast i16* %addr to <8 x i16>*
255  store <8 x i16> %0, <8 x i16>* %1, align 1
256  ret void
257}
258
259define void @extract_subvector256_v8i16_store_lo_align_16(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
260; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16:
261; SKX:       ## %bb.0: ## %entry
262; SKX-NEXT:    vmovaps %xmm0, (%rdi)
263; SKX-NEXT:    vzeroupper
264; SKX-NEXT:    retq
265entry:
266  %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
267  %1 = bitcast i16* %addr to <8 x i16>*
268  store <8 x i16> %0, <8 x i16>* %1, align 16
269  ret void
270}
271
272define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
273; SKX-LABEL: extract_subvector256_v16i8_store_lo:
274; SKX:       ## %bb.0: ## %entry
275; SKX-NEXT:    vmovups %xmm0, (%rdi)
276; SKX-NEXT:    vzeroupper
277; SKX-NEXT:    retq
278entry:
279  %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
280  %1 = bitcast i8* %addr to <16 x i8>*
281  store <16 x i8> %0, <16 x i8>* %1, align 1
282  ret void
283}
284
285define void @extract_subvector256_v16i8_store_lo_align_16(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
286; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16:
287; SKX:       ## %bb.0: ## %entry
288; SKX-NEXT:    vmovaps %xmm0, (%rdi)
289; SKX-NEXT:    vzeroupper
290; SKX-NEXT:    retq
291entry:
292  %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
293  %1 = bitcast i8* %addr to <16 x i8>*
294  store <16 x i8> %0, <16 x i8>* %1, align 16
295  ret void
296}
297
298define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
299; SKX-LABEL: extract_subvector512_v2f64_store_lo:
300; SKX:       ## %bb.0: ## %entry
301; SKX-NEXT:    vmovups %xmm0, (%rdi)
302; SKX-NEXT:    vzeroupper
303; SKX-NEXT:    retq
304entry:
305  %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
306  %1 = bitcast double* %addr to <2 x double>*
307  store <2 x double> %0, <2 x double>* %1, align 1
308  ret void
309}
310
311define void @extract_subvector512_v2f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
312; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16:
313; SKX:       ## %bb.0: ## %entry
314; SKX-NEXT:    vmovaps %xmm0, (%rdi)
315; SKX-NEXT:    vzeroupper
316; SKX-NEXT:    retq
317entry:
318  %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
319  %1 = bitcast double* %addr to <2 x double>*
320  store <2 x double> %0, <2 x double>* %1, align 16
321  ret void
322}
323
324define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
325; SKX-LABEL: extract_subvector512_v4f32_store_lo:
326; SKX:       ## %bb.0: ## %entry
327; SKX-NEXT:    vmovups %xmm0, (%rdi)
328; SKX-NEXT:    vzeroupper
329; SKX-NEXT:    retq
330entry:
331  %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
332  %1 = bitcast float* %addr to <4 x float>*
333  store <4 x float> %0, <4 x float>* %1, align 1
334  ret void
335}
336
337define void @extract_subvector512_v4f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
338; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16:
339; SKX:       ## %bb.0: ## %entry
340; SKX-NEXT:    vmovaps %xmm0, (%rdi)
341; SKX-NEXT:    vzeroupper
342; SKX-NEXT:    retq
343entry:
344  %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
345  %1 = bitcast float* %addr to <4 x float>*
346  store <4 x float> %0, <4 x float>* %1, align 16
347  ret void
348}
349
350define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
351; SKX-LABEL: extract_subvector512_v2i64_store_lo:
352; SKX:       ## %bb.0: ## %entry
353; SKX-NEXT:    vmovups %xmm0, (%rdi)
354; SKX-NEXT:    vzeroupper
355; SKX-NEXT:    retq
356entry:
357  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
358  %1 = bitcast i64* %addr to <2 x i64>*
359  store <2 x i64> %0, <2 x i64>* %1, align 1
360  ret void
361}
362
363define void @extract_subvector512_v2i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
364; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16:
365; SKX:       ## %bb.0: ## %entry
366; SKX-NEXT:    vmovaps %xmm0, (%rdi)
367; SKX-NEXT:    vzeroupper
368; SKX-NEXT:    retq
369entry:
370  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
371  %1 = bitcast i64* %addr to <2 x i64>*
372  store <2 x i64> %0, <2 x i64>* %1, align 16
373  ret void
374}
375
376define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
377; SKX-LABEL: extract_subvector512_v4i32_store_lo:
378; SKX:       ## %bb.0: ## %entry
379; SKX-NEXT:    vmovups %xmm0, (%rdi)
380; SKX-NEXT:    vzeroupper
381; SKX-NEXT:    retq
382entry:
383  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
384  %1 = bitcast i32* %addr to <4 x i32>*
385  store <4 x i32> %0, <4 x i32>* %1, align 1
386  ret void
387}
388
389define void @extract_subvector512_v4i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
390; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16:
391; SKX:       ## %bb.0: ## %entry
392; SKX-NEXT:    vmovaps %xmm0, (%rdi)
393; SKX-NEXT:    vzeroupper
394; SKX-NEXT:    retq
395entry:
396  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
397  %1 = bitcast i32* %addr to <4 x i32>*
398  store <4 x i32> %0, <4 x i32>* %1, align 16
399  ret void
400}
401
402define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
403; SKX-LABEL: extract_subvector512_v8i16_store_lo:
404; SKX:       ## %bb.0: ## %entry
405; SKX-NEXT:    vmovups %xmm0, (%rdi)
406; SKX-NEXT:    vzeroupper
407; SKX-NEXT:    retq
408entry:
409  %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
410  %1 = bitcast i16* %addr to <8 x i16>*
411  store <8 x i16> %0, <8 x i16>* %1, align 1
412  ret void
413}
414
415define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
416; SKX-LABEL: extract_subvector512_v16i8_store_lo:
417; SKX:       ## %bb.0: ## %entry
418; SKX-NEXT:    vmovups %xmm0, (%rdi)
419; SKX-NEXT:    vzeroupper
420; SKX-NEXT:    retq
421entry:
422  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
423  %1 = bitcast i8* %addr to <16 x i8>*
424  store <16 x i8> %0, <16 x i8>* %1, align 1
425  ret void
426}
427
428define void @extract_subvector512_v16i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
429; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16:
430; SKX:       ## %bb.0: ## %entry
431; SKX-NEXT:    vmovaps %xmm0, (%rdi)
432; SKX-NEXT:    vzeroupper
433; SKX-NEXT:    retq
434entry:
435  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
436  %1 = bitcast i8* %addr to <16 x i8>*
437  store <16 x i8> %0, <16 x i8>* %1, align 16
438  ret void
439}
440
441define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
442; SKX-LABEL: extract_subvector512_v4f64_store_lo:
443; SKX:       ## %bb.0: ## %entry
444; SKX-NEXT:    vmovups %ymm0, (%rdi)
445; SKX-NEXT:    vzeroupper
446; SKX-NEXT:    retq
447entry:
448  %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
449  %1 = bitcast double* %addr to <4 x double>*
450  store <4 x double> %0, <4 x double>* %1, align 1
451  ret void
452}
453
454define void @extract_subvector512_v4f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
455; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16:
456; SKX:       ## %bb.0: ## %entry
457; SKX-NEXT:    vmovups %ymm0, (%rdi)
458; SKX-NEXT:    vzeroupper
459; SKX-NEXT:    retq
460entry:
461  %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
462  %1 = bitcast double* %addr to <4 x double>*
463  store <4 x double> %0, <4 x double>* %1, align 16
464  ret void
465}
466
467define void @extract_subvector512_v4f64_store_lo_align_32(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
468; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32:
469; SKX:       ## %bb.0: ## %entry
470; SKX-NEXT:    vmovaps %ymm0, (%rdi)
471; SKX-NEXT:    vzeroupper
472; SKX-NEXT:    retq
473entry:
474  %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
475  %1 = bitcast double* %addr to <4 x double>*
476  store <4 x double> %0, <4 x double>* %1, align 32
477  ret void
478}
479
480define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
481; SKX-LABEL: extract_subvector512_v8f32_store_lo:
482; SKX:       ## %bb.0: ## %entry
483; SKX-NEXT:    vmovups %ymm0, (%rdi)
484; SKX-NEXT:    vzeroupper
485; SKX-NEXT:    retq
486entry:
487  %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
488  %1 = bitcast float* %addr to <8 x float>*
489  store <8 x float> %0, <8 x float>* %1, align 1
490  ret void
491}
492
493define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
494; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16:
495; SKX:       ## %bb.0: ## %entry
496; SKX-NEXT:    vmovups %ymm0, (%rdi)
497; SKX-NEXT:    vzeroupper
498; SKX-NEXT:    retq
499entry:
500  %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
501  %1 = bitcast float* %addr to <8 x float>*
502  store <8 x float> %0, <8 x float>* %1, align 16
503  ret void
504}
505
506define void @extract_subvector512_v8f32_store_lo_align_32(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
507; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32:
508; SKX:       ## %bb.0: ## %entry
509; SKX-NEXT:    vmovaps %ymm0, (%rdi)
510; SKX-NEXT:    vzeroupper
511; SKX-NEXT:    retq
512entry:
513  %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
514  %1 = bitcast float* %addr to <8 x float>*
515  store <8 x float> %0, <8 x float>* %1, align 32
516  ret void
517}
518
519define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
520; SKX-LABEL: extract_subvector512_v4i64_store_lo:
521; SKX:       ## %bb.0: ## %entry
522; SKX-NEXT:    vmovups %ymm0, (%rdi)
523; SKX-NEXT:    vzeroupper
524; SKX-NEXT:    retq
525entry:
526  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
527  %1 = bitcast i64* %addr to <4 x i64>*
528  store <4 x i64> %0, <4 x i64>* %1, align 1
529  ret void
530}
531
532define void @extract_subvector512_v4i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
533; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16:
534; SKX:       ## %bb.0: ## %entry
535; SKX-NEXT:    vmovups %ymm0, (%rdi)
536; SKX-NEXT:    vzeroupper
537; SKX-NEXT:    retq
538entry:
539  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
540  %1 = bitcast i64* %addr to <4 x i64>*
541  store <4 x i64> %0, <4 x i64>* %1, align 16
542  ret void
543}
544
545define void @extract_subvector512_v4i64_store_lo_align_32(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
546; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32:
547; SKX:       ## %bb.0: ## %entry
548; SKX-NEXT:    vmovaps %ymm0, (%rdi)
549; SKX-NEXT:    vzeroupper
550; SKX-NEXT:    retq
551entry:
552  %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
553  %1 = bitcast i64* %addr to <4 x i64>*
554  store <4 x i64> %0, <4 x i64>* %1, align 32
555  ret void
556}
557
558define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
559; SKX-LABEL: extract_subvector512_v8i32_store_lo:
560; SKX:       ## %bb.0: ## %entry
561; SKX-NEXT:    vmovups %ymm0, (%rdi)
562; SKX-NEXT:    vzeroupper
563; SKX-NEXT:    retq
564entry:
565  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
566  %1 = bitcast i32* %addr to <8 x i32>*
567  store <8 x i32> %0, <8 x i32>* %1, align 1
568  ret void
569}
570
571define void @extract_subvector512_v8i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
572; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16:
573; SKX:       ## %bb.0: ## %entry
574; SKX-NEXT:    vmovups %ymm0, (%rdi)
575; SKX-NEXT:    vzeroupper
576; SKX-NEXT:    retq
577entry:
578  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
579  %1 = bitcast i32* %addr to <8 x i32>*
580  store <8 x i32> %0, <8 x i32>* %1, align 16
581  ret void
582}
583
584define void @extract_subvector512_v8i32_store_lo_align_32(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
585; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32:
586; SKX:       ## %bb.0: ## %entry
587; SKX-NEXT:    vmovaps %ymm0, (%rdi)
588; SKX-NEXT:    vzeroupper
589; SKX-NEXT:    retq
590entry:
591  %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
592  %1 = bitcast i32* %addr to <8 x i32>*
593  store <8 x i32> %0, <8 x i32>* %1, align 32
594  ret void
595}
596
597define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
598; SKX-LABEL: extract_subvector512_v16i16_store_lo:
599; SKX:       ## %bb.0: ## %entry
600; SKX-NEXT:    vmovups %ymm0, (%rdi)
601; SKX-NEXT:    vzeroupper
602; SKX-NEXT:    retq
603entry:
604  %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
605  %1 = bitcast i16* %addr to <16 x i16>*
606  store <16 x i16> %0, <16 x i16>* %1, align 1
607  ret void
608}
609
610define void @extract_subvector512_v16i16_store_lo_align_16(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
611; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16:
612; SKX:       ## %bb.0: ## %entry
613; SKX-NEXT:    vmovups %ymm0, (%rdi)
614; SKX-NEXT:    vzeroupper
615; SKX-NEXT:    retq
616entry:
617  %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
618  %1 = bitcast i16* %addr to <16 x i16>*
619  store <16 x i16> %0, <16 x i16>* %1, align 16
620  ret void
621}
622
623define void @extract_subvector512_v16i16_store_lo_align_32(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
624; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32:
625; SKX:       ## %bb.0: ## %entry
626; SKX-NEXT:    vmovaps %ymm0, (%rdi)
627; SKX-NEXT:    vzeroupper
628; SKX-NEXT:    retq
629entry:
630  %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
631  %1 = bitcast i16* %addr to <16 x i16>*
632  store <16 x i16> %0, <16 x i16>* %1, align 32
633  ret void
634}
635
636define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
637; SKX-LABEL: extract_subvector512_v32i8_store_lo:
638; SKX:       ## %bb.0: ## %entry
639; SKX-NEXT:    vmovups %ymm0, (%rdi)
640; SKX-NEXT:    vzeroupper
641; SKX-NEXT:    retq
642entry:
643  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
644  %1 = bitcast i8* %addr to <32 x i8>*
645  store <32 x i8> %0, <32 x i8>* %1, align 1
646  ret void
647}
648
649define void @extract_subvector512_v32i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
650; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16:
651; SKX:       ## %bb.0: ## %entry
652; SKX-NEXT:    vmovups %ymm0, (%rdi)
653; SKX-NEXT:    vzeroupper
654; SKX-NEXT:    retq
655entry:
656  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
657  %1 = bitcast i8* %addr to <32 x i8>*
658  store <32 x i8> %0, <32 x i8>* %1, align 16
659  ret void
660}
661
662define void @extract_subvector512_v32i8_store_lo_align_32(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
663; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32:
664; SKX:       ## %bb.0: ## %entry
665; SKX-NEXT:    vmovaps %ymm0, (%rdi)
666; SKX-NEXT:    vzeroupper
667; SKX-NEXT:    retq
668entry:
669  %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
670  %1 = bitcast i8* %addr to <32 x i8>*
671  store <32 x i8> %0, <32 x i8>* %1, align 32
672  ret void
673}
674
675define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) {
676; SKX-LABEL: test_mm512_mask_extractf64x4_pd:
677; SKX:       ## %bb.0: ## %entry
678; SKX-NEXT:    kmovd %edi, %k1
679; SKX-NEXT:    vextractf64x4 $1, %zmm1, %ymm0 {%k1}
680; SKX-NEXT:    retq
681entry:
682  %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
683  %0 = bitcast i8 %__U to <8 x i1>
684  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
685  %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> %__W
686  ret <4 x double> %1
687}
688
689define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) {
690; SKX-LABEL: test_mm512_maskz_extractf64x4_pd:
691; SKX:       ## %bb.0: ## %entry
692; SKX-NEXT:    kmovd %edi, %k1
693; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
694; SKX-NEXT:    retq
695entry:
696  %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
697  %0 = bitcast i8 %__U to <8 x i1>
698  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
699  %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> zeroinitializer
700  ret <4 x double> %1
701}
702
703define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) {
704; SKX-LABEL: test_mm512_mask_extractf32x4_ps:
705; SKX:       ## %bb.0: ## %entry
706; SKX-NEXT:    kmovd %edi, %k1
707; SKX-NEXT:    vextractf32x4 $1, %zmm1, %xmm0 {%k1}
708; SKX-NEXT:    vzeroupper
709; SKX-NEXT:    retq
710entry:
711  %0 = bitcast <8 x double> %__A to <16 x float>
712  %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
713  %1 = bitcast i8 %__U to <8 x i1>
714  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
715  %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
716  ret <4 x float> %2
717}
718
719define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) {
720; SKX-LABEL: test_mm512_maskz_extractf32x4_ps:
721; SKX:       ## %bb.0: ## %entry
722; SKX-NEXT:    kmovd %edi, %k1
723; SKX-NEXT:    vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
724; SKX-NEXT:    vzeroupper
725; SKX-NEXT:    retq
726entry:
727  %0 = bitcast <8 x double> %__A to <16 x float>
728  %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
729  %1 = bitcast i8 %__U to <8 x i1>
730  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
731  %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
732  ret <4 x float> %2
733}
734
735define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) {
736; SKX-LABEL: test_mm256_mask_extractf64x2_pd:
737; SKX:       ## %bb.0: ## %entry
738; SKX-NEXT:    kmovd %edi, %k1
739; SKX-NEXT:    vextractf64x2 $1, %ymm1, %xmm0 {%k1}
740; SKX-NEXT:    vzeroupper
741; SKX-NEXT:    retq
742entry:
743  %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
744  %0 = bitcast i8 %__U to <8 x i1>
745  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
746  %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
747  ret <2 x double> %1
748}
749
750define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) {
751; SKX-LABEL: test_mm256_maskz_extractf64x2_pd:
752; SKX:       ## %bb.0: ## %entry
753; SKX-NEXT:    kmovd %edi, %k1
754; SKX-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
755; SKX-NEXT:    vzeroupper
756; SKX-NEXT:    retq
757entry:
758  %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
759  %0 = bitcast i8 %__U to <8 x i1>
760  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
761  %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer
762  ret <2 x double> %1
763}
764
765define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
766; SKX-LABEL: test_mm256_mask_extracti64x2_epi64:
767; SKX:       ## %bb.0: ## %entry
768; SKX-NEXT:    kmovd %edi, %k1
769; SKX-NEXT:    vextracti64x2 $1, %ymm1, %xmm0 {%k1}
770; SKX-NEXT:    vzeroupper
771; SKX-NEXT:    retq
772entry:
773  %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
774  %0 = bitcast i8 %__U to <8 x i1>
775  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
776  %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> %__W
777  ret <2 x i64> %1
778}
779
780define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) {
781; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64:
782; SKX:       ## %bb.0: ## %entry
783; SKX-NEXT:    kmovd %edi, %k1
784; SKX-NEXT:    vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
785; SKX-NEXT:    vzeroupper
786; SKX-NEXT:    retq
787entry:
788  %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
789  %0 = bitcast i8 %__U to <8 x i1>
790  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
791  %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
792  ret <2 x i64> %1
793}
794
795define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) {
796; SKX-LABEL: test_mm256_mask_extractf32x4_ps:
797; SKX:       ## %bb.0: ## %entry
798; SKX-NEXT:    kmovd %edi, %k1
799; SKX-NEXT:    vextractf32x4 $1, %ymm1, %xmm0 {%k1}
800; SKX-NEXT:    vzeroupper
801; SKX-NEXT:    retq
802entry:
803  %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
804  %0 = bitcast i8 %__U to <8 x i1>
805  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
806  %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
807  ret <4 x float> %1
808}
809
810define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) {
811; SKX-LABEL: test_mm256_maskz_extractf32x4_ps:
812; SKX:       ## %bb.0: ## %entry
813; SKX-NEXT:    kmovd %edi, %k1
814; SKX-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
815; SKX-NEXT:    vzeroupper
816; SKX-NEXT:    retq
817entry:
818  %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
819  %0 = bitcast i8 %__U to <8 x i1>
820  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
821  %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
822  ret <4 x float> %1
823}
824
825define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
826; SKX-LABEL: test_mm256_mask_extracti32x4_epi32:
827; SKX:       ## %bb.0: ## %entry
828; SKX-NEXT:    kmovd %edi, %k1
829; SKX-NEXT:    vextracti32x4 $1, %ymm1, %xmm0 {%k1}
830; SKX-NEXT:    vzeroupper
831; SKX-NEXT:    retq
832entry:
833  %0 = bitcast <4 x i64> %__A to <8 x i32>
834  %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
835  %1 = bitcast <2 x i64> %__W to <4 x i32>
836  %2 = bitcast i8 %__U to <8 x i1>
837  %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
838  %3 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> %1
839  %4 = bitcast <4 x i32> %3 to <2 x i64>
840  ret <2 x i64> %4
841}
842
843define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) {
844; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32:
845; SKX:       ## %bb.0: ## %entry
846; SKX-NEXT:    kmovd %edi, %k1
847; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
848; SKX-NEXT:    vzeroupper
849; SKX-NEXT:    retq
850entry:
851  %0 = bitcast <4 x i64> %__A to <8 x i32>
852  %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
853  %1 = bitcast i8 %__U to <8 x i1>
854  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
855  %2 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
856  %3 = bitcast <4 x i32> %2 to <2 x i64>
857  ret <2 x i64> %3
858}
859
860define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) {
861; SKX-LABEL: test_mm512_mask_extractf32x8_ps:
862; SKX:       ## %bb.0: ## %entry
863; SKX-NEXT:    kmovd %edi, %k1
864; SKX-NEXT:    vextractf32x8 $1, %zmm1, %ymm0 {%k1}
865; SKX-NEXT:    retq
866entry:
867  %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
868  %0 = bitcast i8 %__U to <8 x i1>
869  %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> %__W
870  ret <8 x float> %1
871}
872
873define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) {
874; SKX-LABEL: test_mm512_maskz_extractf32x8_ps:
875; SKX:       ## %bb.0: ## %entry
876; SKX-NEXT:    kmovd %edi, %k1
877; SKX-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
878; SKX-NEXT:    retq
879entry:
880  %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
881  %0 = bitcast i8 %__U to <8 x i1>
882  %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> zeroinitializer
883  ret <8 x float> %1
884}
885
886define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) {
887; SKX-LABEL: test_mm512_mask_extractf64x2_pd:
888; SKX:       ## %bb.0: ## %entry
889; SKX-NEXT:    kmovd %edi, %k1
890; SKX-NEXT:    vextractf64x2 $3, %zmm1, %xmm0 {%k1}
891; SKX-NEXT:    vzeroupper
892; SKX-NEXT:    retq
893entry:
894  %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
895  %0 = bitcast i8 %__U to <8 x i1>
896  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
897  %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
898  ret <2 x double> %1
899}
900
901define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) {
902; SKX-LABEL: test_mm512_maskz_extractf64x2_pd:
903; SKX:       ## %bb.0: ## %entry
904; SKX-NEXT:    kmovd %edi, %k1
905; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z}
906; SKX-NEXT:    vzeroupper
907; SKX-NEXT:    retq
908entry:
909  %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
910  %0 = bitcast i8 %__U to <8 x i1>
911  %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
912  %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer
913  ret <2 x double> %1
914}
915