1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512dq -mattr=+avx512vl| FileCheck %s
3
4define <8 x i1> @test(<2 x i1> %a) {
5; CHECK-LABEL: test:
6; CHECK:       # BB#0:
7; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
8; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
9; CHECK-NEXT:    kshiftlb $2, %k0, %k0
10; CHECK-NEXT:    vpmovm2w %k0, %xmm0
11; CHECK-NEXT:    retq
12  %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
13  ret <8 x i1> %res
14}
15
16define <8 x i1> @test1(<2 x i1> %a) {
17; CHECK-LABEL: test1:
18; CHECK:       # BB#0:
19; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
20; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
21; CHECK-NEXT:    kshiftlb $4, %k0, %k0
22; CHECK-NEXT:    vpmovm2w %k0, %xmm0
23; CHECK-NEXT:    retq
24  %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
25  ret <8 x i1> %res
26}
27
28define <8 x i1> @test2(<2 x i1> %a) {
29; CHECK-LABEL: test2:
30; CHECK:       # BB#0:
31; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
32; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
33; CHECK-NEXT:    vpmovm2q %k0, %zmm0
34; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1
35; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1],zmm0[0,1,0,1]
36; CHECK-NEXT:    vpsllq $63, %zmm0, %zmm0
37; CHECK-NEXT:    vptestmq %zmm0, %zmm0, %k0
38; CHECK-NEXT:    vpmovm2w %k0, %xmm0
39; CHECK-NEXT:    retq
40  %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
41  ret <8 x i1> %res
42}
43
44define <8 x i1> @test3(<4 x i1> %a) {
45; CHECK-LABEL: test3:
46; CHECK:       # BB#0:
47; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
48; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k0
49; CHECK-NEXT:    kshiftlb $4, %k0, %k0
50; CHECK-NEXT:    kshiftrb $4, %k0, %k0
51; CHECK-NEXT:    vpmovm2w %k0, %xmm0
52; CHECK-NEXT:    retq
53
54  %res = shufflevector <4 x i1> %a, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
55  ret <8 x i1> %res
56}
57
58define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
59; CHECK-LABEL: test4:
60; CHECK:       # BB#0:
61; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
62; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k0
63; CHECK-NEXT:    vpslld $31, %xmm1, %xmm0
64; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1
65; CHECK-NEXT:    kshiftlb $4, %k1, %k1
66; CHECK-NEXT:    kshiftlb $4, %k0, %k0
67; CHECK-NEXT:    kshiftrb $4, %k0, %k0
68; CHECK-NEXT:    korb %k1, %k0, %k0
69; CHECK-NEXT:    vpmovm2w %k0, %xmm0
70; CHECK-NEXT:    retq
71
72  %res = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
73  ret <8 x i1> %res
74}
75
76define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
77; CHECK-LABEL: test5:
78; CHECK:       # BB#0:
79; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
80; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
81; CHECK-NEXT:    vpsllq $63, %xmm1, %xmm0
82; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1
83; CHECK-NEXT:    kshiftlb $2, %k1, %k1
84; CHECK-NEXT:    kshiftlb $2, %k0, %k0
85; CHECK-NEXT:    kshiftrb $2, %k0, %k0
86; CHECK-NEXT:    korb %k1, %k0, %k0
87; CHECK-NEXT:    vpmovm2d %k0, %xmm0
88; CHECK-NEXT:    retq
89
90  %res = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
91  ret <4 x i1> %res
92}
93
94define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
95; CHECK-LABEL: test6:
96; CHECK:       # BB#0:
97; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
98; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k0
99; CHECK-NEXT:    vpsllq $63, %xmm1, %xmm0
100; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1
101; CHECK-NEXT:    kshiftlb $2, %k1, %k1
102; CHECK-NEXT:    kshiftlb $2, %k0, %k0
103; CHECK-NEXT:    kshiftrb $2, %k0, %k0
104; CHECK-NEXT:    korb %k1, %k0, %k0
105; CHECK-NEXT:    kunpckbw %k0, %k0, %k0
106; CHECK-NEXT:    vpmovm2b %k0, %xmm0
107; CHECK-NEXT:    retq
108
109  %res = shufflevector <2 x i1> %a, <2 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
110  ret <16 x i1> %res
111}
112
113define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
114; CHECK-LABEL: test7:
115; CHECK:       # BB#0:
116; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
117; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k0
118; CHECK-NEXT:    vpslld $31, %xmm1, %xmm0
119; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1
120; CHECK-NEXT:    kshiftlb $4, %k1, %k1
121; CHECK-NEXT:    kshiftlb $4, %k0, %k0
122; CHECK-NEXT:    kshiftrb $4, %k0, %k0
123; CHECK-NEXT:    korb %k1, %k0, %k0
124; CHECK-NEXT:    kunpckbw %k0, %k0, %k0
125; CHECK-NEXT:    kunpckwd %k0, %k0, %k0
126; CHECK-NEXT:    vpmovm2b %k0, %ymm0
127; CHECK-NEXT:    retq
128
129  %res = shufflevector <4 x i1> %a, <4 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
130  ret <32 x i1> %res
131}
132
133define <64 x i1> @test8(<8 x i1> %a, <8 x i1>%b) {
134; CHECK-LABEL: test8:
135; CHECK:       # BB#0:
136; CHECK-NEXT:    vpsllw $15, %xmm1, %xmm1
137; CHECK-NEXT:    vpmovw2m %xmm1, %k0
138; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
139; CHECK-NEXT:    vpmovw2m %xmm0, %k1
140; CHECK-NEXT:    kunpckdq %k1, %k0, %k0
141; CHECK-NEXT:    vpmovm2b %k0, %zmm0
142; CHECK-NEXT:    retq
143
144  %res = shufflevector <8 x i1> %a, <8 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
145  ret <64 x i1> %res
146}
147
148define <4 x i1> @test9(<8 x i1> %a, <8 x i1> %b) {
149; CHECK-LABEL: test9:
150; CHECK:       # BB#0:
151; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
152; CHECK-NEXT:    vpmovw2m %xmm0, %k0
153; CHECK-NEXT:    kshiftrw $4, %k0, %k0
154; CHECK-NEXT:    vpmovm2d %k0, %xmm0
155; CHECK-NEXT:    retq
156  %res = shufflevector <8 x i1> %a, <8 x i1> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
157  ret <4 x i1> %res
158}
159
160define <2 x i1> @test10(<4 x i1> %a, <4 x i1> %b) {
161; CHECK-LABEL: test10:
162; CHECK:       # BB#0:
163; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
164; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k0
165; CHECK-NEXT:    kshiftrw $2, %k0, %k0
166; CHECK-NEXT:    vpmovm2q %k0, %xmm0
167; CHECK-NEXT:    retq
168  %res = shufflevector <4 x i1> %a, <4 x i1> %b, <2 x i32> <i32 2, i32 3>
169  ret <2 x i1> %res
170}
171