1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
6; X86-LABEL: test_mask_expand_load_w_512:
7; X86:       # %bb.0:
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
10; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
11; X86-NEXT:    retl # encoding: [0xc3]
12;
13; X64-LABEL: test_mask_expand_load_w_512:
14; X64:       # %bb.0:
15; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
16; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
17; X64-NEXT:    retq # encoding: [0xc3]
18  %1 = bitcast i8* %addr to i16*
19  %2 = bitcast i32 %mask to <32 x i1>
20  %3 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %data)
21  ret <32 x i16> %3
22}
23
24define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) {
25; X86-LABEL: test_maskz_expand_load_w_512:
26; X86:       # %bb.0:
27; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
28; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
29; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00]
30; X86-NEXT:    retl # encoding: [0xc3]
31;
32; X64-LABEL: test_maskz_expand_load_w_512:
33; X64:       # %bb.0:
34; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
35; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07]
36; X64-NEXT:    retq # encoding: [0xc3]
37  %1 = bitcast i8* %addr to i16*
38  %2 = bitcast i32 %mask to <32 x i1>
39  %3 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> zeroinitializer)
40  ret <32 x i16> %3
41}
42
43define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
44; X86-LABEL: test_expand_load_w_512:
45; X86:       # %bb.0:
46; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
47; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
48; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
49; X86-NEXT:    retl # encoding: [0xc3]
50;
51; X64-LABEL: test_expand_load_w_512:
52; X64:       # %bb.0:
53; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
54; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
55; X64-NEXT:    retq # encoding: [0xc3]
56  %1 = bitcast i8* %addr to i16*
57  %2 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> %data)
58  ret <32 x i16> %2
59}
60
61define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
62; CHECK-LABEL: test_expand_w_512:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
65  %1 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
66  ret <32 x i16> %1
67}
68
69define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
70; X86-LABEL: test_mask_expand_w_512:
71; X86:       # %bb.0:
72; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
73; X86-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
74; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
75; X86-NEXT:    retl # encoding: [0xc3]
76;
77; X64-LABEL: test_mask_expand_w_512:
78; X64:       # %bb.0:
79; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
80; X64-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
81; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
82; X64-NEXT:    retq # encoding: [0xc3]
83  %1 = bitcast i32 %mask to <32 x i1>
84  %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
85  ret <32 x i16> %2
86}
87
88define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
89; X86-LABEL: test_maskz_expand_w_512:
90; X86:       # %bb.0:
91; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
92; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
93; X86-NEXT:    retl # encoding: [0xc3]
94;
95; X64-LABEL: test_maskz_expand_w_512:
96; X64:       # %bb.0:
97; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
98; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
99; X64-NEXT:    retq # encoding: [0xc3]
100  %1 = bitcast i32 %mask to <32 x i1>
101  %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
102  ret <32 x i16> %2
103}
104
105define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
106; X86-LABEL: test_mask_expand_load_b_512:
107; X86:       # %bb.0:
108; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
109; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
110; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
111; X86-NEXT:    retl # encoding: [0xc3]
112;
113; X64-LABEL: test_mask_expand_load_b_512:
114; X64:       # %bb.0:
115; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
116; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
117; X64-NEXT:    retq # encoding: [0xc3]
118  %1 = bitcast i64 %mask to <64 x i1>
119  %2 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> %1, <64 x i8> %data)
120  ret <64 x i8> %2
121}
122
123define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) {
124; X86-LABEL: test_maskz_expand_load_b_512:
125; X86:       # %bb.0:
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
127; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
128; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00]
129; X86-NEXT:    retl # encoding: [0xc3]
130;
131; X64-LABEL: test_maskz_expand_load_b_512:
132; X64:       # %bb.0:
133; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
134; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07]
135; X64-NEXT:    retq # encoding: [0xc3]
136  %1 = bitcast i64 %mask to <64 x i1>
137  %2 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> %1, <64 x i8> zeroinitializer)
138  ret <64 x i8> %2
139}
140
141define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
142; X86-LABEL: test_expand_load_b_512:
143; X86:       # %bb.0:
144; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
145; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
146; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
147; X86-NEXT:    retl # encoding: [0xc3]
148;
149; X64-LABEL: test_expand_load_b_512:
150; X64:       # %bb.0:
151; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
152; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
153; X64-NEXT:    retq # encoding: [0xc3]
154  %1 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <64 x i8> %data)
155  ret <64 x i8> %1
156}
157
158define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
159; CHECK-LABEL: test_expand_b_512:
160; CHECK:       # %bb.0:
161; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
162  %1 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
163  ret <64 x i8> %1
164}
165
166define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
167; X86-LABEL: test_mask_expand_b_512:
168; X86:       # %bb.0:
169; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
170; X86-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
171; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
172; X86-NEXT:    retl # encoding: [0xc3]
173;
174; X64-LABEL: test_mask_expand_b_512:
175; X64:       # %bb.0:
176; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
177; X64-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
178; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
179; X64-NEXT:    retq # encoding: [0xc3]
180  %1 = bitcast i64 %mask to <64 x i1>
181  %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
182  ret <64 x i8> %2
183}
184
185define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
186; X86-LABEL: test_maskz_expand_b_512:
187; X86:       # %bb.0:
188; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
189; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
190; X86-NEXT:    retl # encoding: [0xc3]
191;
192; X64-LABEL: test_maskz_expand_b_512:
193; X64:       # %bb.0:
194; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
195; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
196; X64-NEXT:    retq # encoding: [0xc3]
197  %1 = bitcast i64 %mask to <64 x i1>
198  %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
199  ret <64 x i8> %2
200}
201
202define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
203; X86-LABEL: test_mask_compress_store_w_512:
204; X86:       # %bb.0:
205; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
206; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
207; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
208; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
209; X86-NEXT:    retl # encoding: [0xc3]
210;
211; X64-LABEL: test_mask_compress_store_w_512:
212; X64:       # %bb.0:
213; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
214; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
215; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
216; X64-NEXT:    retq # encoding: [0xc3]
217  %1 = bitcast i8* %addr to i16*
218  %2 = bitcast i32 %mask to <32 x i1>
219  call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> %2)
220  ret void
221}
222
223define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
224; X86-LABEL: test_compress_store_w_512:
225; X86:       # %bb.0:
226; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
227; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
228; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
229; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
230; X86-NEXT:    retl # encoding: [0xc3]
231;
232; X64-LABEL: test_compress_store_w_512:
233; X64:       # %bb.0:
234; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
235; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
236; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
237; X64-NEXT:    retq # encoding: [0xc3]
238  %1 = bitcast i8* %addr to i16*
239  call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
240  ret void
241}
242
243define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
244; X86-LABEL: test_mask_compress_w_512:
245; X86:       # %bb.0:
246; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
247; X86-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
248; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
249; X86-NEXT:    retl # encoding: [0xc3]
250;
251; X64-LABEL: test_mask_compress_w_512:
252; X64:       # %bb.0:
253; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
254; X64-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
255; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
256; X64-NEXT:    retq # encoding: [0xc3]
257  %1 = bitcast i32 %mask to <32 x i1>
258  %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
259  ret <32 x i16> %2
260}
261
262define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
263; X86-LABEL: test_maskz_compress_w_512:
264; X86:       # %bb.0:
265; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
266; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
267; X86-NEXT:    retl # encoding: [0xc3]
268;
269; X64-LABEL: test_maskz_compress_w_512:
270; X64:       # %bb.0:
271; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
272; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
273; X64-NEXT:    retq # encoding: [0xc3]
274  %1 = bitcast i32 %mask to <32 x i1>
275  %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
276  ret <32 x i16> %2
277}
278
279define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
280; CHECK-LABEL: test_compress_w_512:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
283  %1 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
284  ret <32 x i16> %1
285}
286
287define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
288; X86-LABEL: test_mask_compress_store_b_512:
289; X86:       # %bb.0:
290; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
291; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
292; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
293; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
294; X86-NEXT:    retl # encoding: [0xc3]
295;
296; X64-LABEL: test_mask_compress_store_b_512:
297; X64:       # %bb.0:
298; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
299; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
300; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
301; X64-NEXT:    retq # encoding: [0xc3]
302  %1 = bitcast i64 %mask to <64 x i1>
303  call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> %1)
304  ret void
305}
306
307define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
308; X86-LABEL: test_compress_store_b_512:
309; X86:       # %bb.0:
310; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
311; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
312; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
313; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
314; X86-NEXT:    retl # encoding: [0xc3]
315;
316; X64-LABEL: test_compress_store_b_512:
317; X64:       # %bb.0:
318; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
319; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
320; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
321; X64-NEXT:    retq # encoding: [0xc3]
322  call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
323  ret void
324}
325
326define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
327; X86-LABEL: test_mask_compress_b_512:
328; X86:       # %bb.0:
329; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
330; X86-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
331; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
332; X86-NEXT:    retl # encoding: [0xc3]
333;
334; X64-LABEL: test_mask_compress_b_512:
335; X64:       # %bb.0:
336; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
337; X64-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
338; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
339; X64-NEXT:    retq # encoding: [0xc3]
340  %1 = bitcast i64 %mask to <64 x i1>
341  %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
342  ret <64 x i8> %2
343}
344
345define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
346; X86-LABEL: test_maskz_compress_b_512:
347; X86:       # %bb.0:
348; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
349; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
350; X86-NEXT:    retl # encoding: [0xc3]
351;
352; X64-LABEL: test_maskz_compress_b_512:
353; X64:       # %bb.0:
354; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
355; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
356; X64-NEXT:    retq # encoding: [0xc3]
357  %1 = bitcast i64 %mask to <64 x i1>
358  %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
359  ret <64 x i8> %2
360}
361
362define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
363; CHECK-LABEL: test_compress_b_512:
364; CHECK:       # %bb.0:
365; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
366  %1 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
367  ret <64 x i8> %1
368}
369
370define <16 x i32> @test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
371; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
372; X86:       # %bb.0:
373; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
374; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
375; X86-NEXT:    vpshldd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc1,0x17]
376; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
377; X86-NEXT:    retl # encoding: [0xc3]
378;
379; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
380; X64:       # %bb.0:
381; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
382; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
383; X64-NEXT:    vpshldd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc1,0x17]
384; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
385; X64-NEXT:    retq # encoding: [0xc3]
386  %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
387  %2 = bitcast i16 %x4 to <16 x i1>
388  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
389  %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
390  %res2 = add <16 x i32> %3, %4
391  ret <16 x i32> %res2
392}
393
394define <8 x i64> @test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
395; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
396; X86:       # %bb.0:
397; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
398; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
399; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
400; X86-NEXT:    vpshldq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc1,0x17]
401; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
402; X86-NEXT:    retl # encoding: [0xc3]
403;
404; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
405; X64:       # %bb.0:
406; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
407; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
408; X64-NEXT:    vpshldq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc1,0x17]
409; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
410; X64-NEXT:    retq # encoding: [0xc3]
411  %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
412  %2 = bitcast i8 %x4 to <8 x i1>
413  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
414  %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>)
415  %res2 = add <8 x i64> %3, %4
416  ret <8 x i64> %res2
417}
418
419define <32 x i16> @test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
420; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
421; X86:       # %bb.0:
422; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
423; X86-NEXT:    vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
424; X86-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc1,0x07]
425; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
426; X86-NEXT:    retl # encoding: [0xc3]
427;
428; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
429; X64:       # %bb.0:
430; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
431; X64-NEXT:    vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
432; X64-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc1,0x07]
433; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
434; X64-NEXT:    retq # encoding: [0xc3]
435  %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
436  %2 = bitcast i32 %x4 to <32 x i1>
437  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
438  %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
439  %res2 = add <32 x i16> %3, %4
440  ret <32 x i16> %res2
441}
442
443define <16 x i32> @test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
444; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
445; X86:       # %bb.0:
446; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
447; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
448; X86-NEXT:    vpshrdd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc1,0x17]
449; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
450; X86-NEXT:    retl # encoding: [0xc3]
451;
452; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
453; X64:       # %bb.0:
454; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
455; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
456; X64-NEXT:    vpshrdd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc1,0x17]
457; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
458; X64-NEXT:    retq # encoding: [0xc3]
459  %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
460  %2 = bitcast i16 %x4 to <16 x i1>
461  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
462  %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
463  %res2 = add <16 x i32> %3, %4
464  ret <16 x i32> %res2
465}
466
467define <8 x i64> @test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
468; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
469; X86:       # %bb.0:
470; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
471; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
472; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
473; X86-NEXT:    vpshrdq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc1,0x17]
474; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
475; X86-NEXT:    retl # encoding: [0xc3]
476;
477; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
478; X64:       # %bb.0:
479; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
480; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
481; X64-NEXT:    vpshrdq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc1,0x17]
482; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
483; X64-NEXT:    retq # encoding: [0xc3]
484  %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
485  %2 = bitcast i8 %x4 to <8 x i1>
486  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
487  %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>)
488  %res2 = add <8 x i64> %3, %4
489  ret <8 x i64> %res2
490}
491
492define <32 x i16> @test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
493; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
494; X86:       # %bb.0:
495; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
496; X86-NEXT:    vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
497; X86-NEXT:    vpshrdw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc1,0x07]
498; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
499; X86-NEXT:    retl # encoding: [0xc3]
500;
501; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
502; X64:       # %bb.0:
503; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
504; X64-NEXT:    vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
505; X64-NEXT:    vpshrdw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc1,0x07]
506; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
507; X64-NEXT:    retq # encoding: [0xc3]
508  %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
509  %2 = bitcast i32 %x4 to <32 x i1>
510  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
511  %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
512  %res2 = add <32 x i16> %3, %4
513  ret <32 x i16> %res2
514}
515
516define <16 x i32> @test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
517; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
518; X86:       # %bb.0:
519; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
520; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
521; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
522; X86-NEXT:    vpshrdvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x18]
523; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2]
524; X86-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
525; X86-NEXT:    retl # encoding: [0xc3]
526;
527; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
528; X64:       # %bb.0:
529; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
530; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
531; X64-NEXT:    vpshrdvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x1f]
532; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2]
533; X64-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
534; X64-NEXT:    retq # encoding: [0xc3]
535  %x2 = load <16 x i32>, <16 x i32>* %x2p
536  %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
537  %2 = bitcast i16 %x3 to <16 x i1>
538  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
539  %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x4)
540  %5 = bitcast i16 %x3 to <16 x i1>
541  %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
542  %res3 = add <16 x i32> %3, %6
543  ret <16 x i32> %res3
544}
545
546define <8 x i64> @test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
547; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
548; X86:       # %bb.0:
549; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
550; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
551; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
552; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
553; X86-NEXT:    vpshrdvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x18]
554; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2]
555; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
556; X86-NEXT:    retl # encoding: [0xc3]
557;
558; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
559; X64:       # %bb.0:
560; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
561; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
562; X64-NEXT:    vpshrdvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x1f]
563; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2]
564; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
565; X64-NEXT:    retq # encoding: [0xc3]
566  %x2 = load <8 x i64>, <8 x i64>* %x2p
567  %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
568  %2 = bitcast i8 %x3 to <8 x i1>
569  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
570  %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x4)
571  %5 = bitcast i8 %x3 to <8 x i1>
572  %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
573  %res3 = add <8 x i64> %3, %6
574  ret <8 x i64> %res3
575}
576
577define <32 x i16> @test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
578; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
579; X86:       # %bb.0:
580; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
581; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
582; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
583; X86-NEXT:    vpshrdvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x18]
584; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2]
585; X86-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
586; X86-NEXT:    retl # encoding: [0xc3]
587;
588; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
589; X64:       # %bb.0:
590; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
591; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
592; X64-NEXT:    vpshrdvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x1f]
593; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2]
594; X64-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
595; X64-NEXT:    retq # encoding: [0xc3]
596  %x2 = load <32 x i16>, <32 x i16>* %x2p
597  %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
598  %2 = bitcast i32 %x3 to <32 x i1>
599  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0
600  %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x4)
601  %5 = bitcast i32 %x3 to <32 x i1>
602  %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
603  %res3 = add <32 x i16> %3, %6
604  ret <32 x i16> %res3
605}
606
607define <16 x i32> @test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
608; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
609; X86:       # %bb.0:
610; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
611; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
612; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
613; X86-NEXT:    vpshldvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x18]
614; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2]
615; X86-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
616; X86-NEXT:    retl # encoding: [0xc3]
617;
618; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
619; X64:       # %bb.0:
620; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
621; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
622; X64-NEXT:    vpshldvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x1f]
623; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2]
624; X64-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
625; X64-NEXT:    retq # encoding: [0xc3]
626  %x2 = load <16 x i32>, <16 x i32>* %x2p
627  %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
628  %2 = bitcast i16 %x3 to <16 x i1>
629  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
630  %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
631  %5 = bitcast i16 %x3 to <16 x i1>
632  %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
633  %res3 = add <16 x i32> %3, %6
634  ret <16 x i32> %res3
635}
636
637define <8 x i64> @test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
638; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
639; X86:       # %bb.0:
640; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
641; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
642; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
643; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
644; X86-NEXT:    vpshldvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x18]
645; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2]
646; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
647; X86-NEXT:    retl # encoding: [0xc3]
648;
649; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
650; X64:       # %bb.0:
651; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
652; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
653; X64-NEXT:    vpshldvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x1f]
654; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2]
655; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
656; X64-NEXT:    retq # encoding: [0xc3]
657  %x2 = load <8 x i64>, <8 x i64>* %x2p
658  %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
659  %2 = bitcast i8 %x3 to <8 x i1>
660  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
661  %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4)
662  %5 = bitcast i8 %x3 to <8 x i1>
663  %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
664  %res3 = add <8 x i64> %3, %6
665  ret <8 x i64> %res3
666}
667
668define <32 x i16> @test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
669; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
670; X86:       # %bb.0:
671; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
672; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
673; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
674; X86-NEXT:    vpshldvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x18]
675; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2]
676; X86-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
677; X86-NEXT:    retl # encoding: [0xc3]
678;
679; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
680; X64:       # %bb.0:
681; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
682; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
683; X64-NEXT:    vpshldvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x1f]
684; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2]
685; X64-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
686; X64-NEXT:    retq # encoding: [0xc3]
687  %x2 = load <32 x i16>, <32 x i16>* %x2p
688  %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
689  %2 = bitcast i32 %x3 to <32 x i1>
690  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0
691  %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4)
692  %5 = bitcast i32 %x3 to <32 x i1>
693  %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
694  %res3 = add <32 x i16> %3, %6
695  ret <32 x i16> %res3
696}
697
698declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
699declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
700declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
701declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
702declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
703declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
704declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
705declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
706declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
707declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
708declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
709declare <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
710declare <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
711declare <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
712