1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define i32 @test_int_x86_avx512_kadd_d(<32 x i16> %A, <32 x i16> %B) nounwind {
6; CHECK-LABEL: test_int_x86_avx512_kadd_d:
7; CHECK:       # %bb.0: # %entry
8; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
9; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
10; CHECK-NEXT:    kaddd %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfd,0x4a,0xc1]
11; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
12; CHECK-NEXT:    kortestd %k0, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc0]
13; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
14; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
15; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16entry:
17  %0 = icmp ne <32 x i16> %A, zeroinitializer
18  %1 = icmp ne <32 x i16> %B, zeroinitializer
19  %2 = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> %0, <32 x i1> %1)
20  %3 = bitcast <32 x i1> %2 to i32
21  %4 = icmp eq i32 %3, 0
22  %5 = zext i1 %4 to i32
23  ret i32 %5
24}
25declare <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1>, <32 x i1>)
26
27define i32 @test_int_x86_avx512_kadd_q(<64 x i8> %A, <64 x i8> %B) nounwind {
28; X86-LABEL: test_int_x86_avx512_kadd_q:
29; X86:       # %bb.0: # %entry
30; X86-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
31; X86-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
32; X86-NEXT:    kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1]
33; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
34; X86-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
35; X86-NEXT:    kortestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc1]
36; X86-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
37; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
38; X86-NEXT:    retl # encoding: [0xc3]
39;
40; X64-LABEL: test_int_x86_avx512_kadd_q:
41; X64:       # %bb.0: # %entry
42; X64-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
43; X64-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
44; X64-NEXT:    kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1]
45; X64-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
46; X64-NEXT:    kortestq %k0, %k0 # encoding: [0xc4,0xe1,0xf8,0x98,0xc0]
47; X64-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
48; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
49; X64-NEXT:    retq # encoding: [0xc3]
50entry:
51  %0 = icmp ne <64 x i8> %A, zeroinitializer
52  %1 = icmp ne <64 x i8> %B, zeroinitializer
53  %2 = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> %0, <64 x i1> %1)
54  %3 = bitcast <64 x i1> %2 to i64
55  %4 = icmp eq i64 %3, 0
56  %5 = zext i1 %4 to i32
57  ret i32 %5
58}
59declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>)
60
61define i32 @test_x86_avx512_ktestc_d(<32 x i16> %A, <32 x i16> %B) {
62; CHECK-LABEL: test_x86_avx512_ktestc_d:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
65; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
66; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
67; CHECK-NEXT:    ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
68; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
69; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
70; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
71  %1 = icmp ne <32 x i16> %A, zeroinitializer
72  %2 = icmp ne <32 x i16> %B, zeroinitializer
73  %res = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
74  ret i32 %res
75}
76declare i32 @llvm.x86.avx512.ktestc.d(<32 x i1>, <32 x i1>) nounwind readnone
77
78define i32 @test_x86_avx512_ktestz_d(<32 x i16> %A, <32 x i16> %B) {
79; CHECK-LABEL: test_x86_avx512_ktestz_d:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
82; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
83; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
84; CHECK-NEXT:    ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
85; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
86; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
87; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
88  %1 = icmp ne <32 x i16> %A, zeroinitializer
89  %2 = icmp ne <32 x i16> %B, zeroinitializer
90  %res = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
91  ret i32 %res
92}
93declare i32 @llvm.x86.avx512.ktestz.d(<32 x i1>, <32 x i1>) nounwind readnone
94
95define i32 @test_x86_avx512_ktestc_q(<64 x i8> %A, <64 x i8> %B) {
96; CHECK-LABEL: test_x86_avx512_ktestc_q:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
99; CHECK-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
100; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
101; CHECK-NEXT:    ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
102; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
103; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
104; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
105  %1 = icmp ne <64 x i8> %A, zeroinitializer
106  %2 = icmp ne <64 x i8> %B, zeroinitializer
107  %res = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
108  ret i32 %res
109}
110declare i32 @llvm.x86.avx512.ktestc.q(<64 x i1>, <64 x i1>) nounwind readnone
111
112define i32 @test_x86_avx512_ktestz_q(<64 x i8> %A, <64 x i8> %B) {
113; CHECK-LABEL: test_x86_avx512_ktestz_q:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
116; CHECK-NEXT:    vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
117; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
118; CHECK-NEXT:    ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
119; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
120; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
121; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
122  %1 = icmp ne <64 x i8> %A, zeroinitializer
123  %2 = icmp ne <64 x i8> %B, zeroinitializer
124  %res = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
125  ret i32 %res
126}
127declare i32 @llvm.x86.avx512.ktestz.q(<64 x i1>, <64 x i1>) nounwind readnone
128
129define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
130; CHECK-LABEL: test_mask_packs_epi32_rr_512:
131; CHECK:       # %bb.0:
132; CHECK-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
133; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
134  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
135  ret <32 x i16> %1
136}
137
138define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
139; X86-LABEL: test_mask_packs_epi32_rrk_512:
140; X86:       # %bb.0:
141; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
142; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
143; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
144; X86-NEXT:    retl # encoding: [0xc3]
145;
146; X64-LABEL: test_mask_packs_epi32_rrk_512:
147; X64:       # %bb.0:
148; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
149; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
150; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
151; X64-NEXT:    retq # encoding: [0xc3]
152  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
153  %2 = bitcast i32 %mask to <32 x i1>
154  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
155  ret <32 x i16> %3
156}
157
158define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
159; X86-LABEL: test_mask_packs_epi32_rrkz_512:
160; X86:       # %bb.0:
161; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
162; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
163; X86-NEXT:    retl # encoding: [0xc3]
164;
165; X64-LABEL: test_mask_packs_epi32_rrkz_512:
166; X64:       # %bb.0:
167; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
168; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
169; X64-NEXT:    retq # encoding: [0xc3]
170  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
171  %2 = bitcast i32 %mask to <32 x i1>
172  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
173  ret <32 x i16> %3
174}
175
176define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
177; X86-LABEL: test_mask_packs_epi32_rm_512:
178; X86:       # %bb.0:
179; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
180; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00]
181; X86-NEXT:    retl # encoding: [0xc3]
182;
183; X64-LABEL: test_mask_packs_epi32_rm_512:
184; X64:       # %bb.0:
185; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
186; X64-NEXT:    retq # encoding: [0xc3]
187  %b = load <16 x i32>, <16 x i32>* %ptr_b
188  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
189  ret <32 x i16> %1
190}
191
192define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
193; X86-LABEL: test_mask_packs_epi32_rmk_512:
194; X86:       # %bb.0:
195; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
196; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
197; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08]
198; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
199; X86-NEXT:    retl # encoding: [0xc3]
200;
201; X64-LABEL: test_mask_packs_epi32_rmk_512:
202; X64:       # %bb.0:
203; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
204; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
205; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
206; X64-NEXT:    retq # encoding: [0xc3]
207  %b = load <16 x i32>, <16 x i32>* %ptr_b
208  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
209  %2 = bitcast i32 %mask to <32 x i1>
210  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
211  ret <32 x i16> %3
212}
213
214define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
215; X86-LABEL: test_mask_packs_epi32_rmkz_512:
216; X86:       # %bb.0:
217; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
218; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
219; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00]
220; X86-NEXT:    retl # encoding: [0xc3]
221;
222; X64-LABEL: test_mask_packs_epi32_rmkz_512:
223; X64:       # %bb.0:
224; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
225; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
226; X64-NEXT:    retq # encoding: [0xc3]
227  %b = load <16 x i32>, <16 x i32>* %ptr_b
228  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
229  %2 = bitcast i32 %mask to <32 x i1>
230  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
231  ret <32 x i16> %3
232}
233
234define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
235; X86-LABEL: test_mask_packs_epi32_rmb_512:
236; X86:       # %bb.0:
237; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
238; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00]
239; X86-NEXT:    retl # encoding: [0xc3]
240;
241; X64-LABEL: test_mask_packs_epi32_rmb_512:
242; X64:       # %bb.0:
243; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
244; X64-NEXT:    retq # encoding: [0xc3]
245  %q = load i32, i32* %ptr_b
246  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
247  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
248  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
249  ret <32 x i16> %1
250}
251
252define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
253; X86-LABEL: test_mask_packs_epi32_rmbk_512:
254; X86:       # %bb.0:
255; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
256; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
257; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08]
258; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
259; X86-NEXT:    retl # encoding: [0xc3]
260;
261; X64-LABEL: test_mask_packs_epi32_rmbk_512:
262; X64:       # %bb.0:
263; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
264; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
265; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
266; X64-NEXT:    retq # encoding: [0xc3]
267  %q = load i32, i32* %ptr_b
268  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
269  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
270  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
271  %2 = bitcast i32 %mask to <32 x i1>
272  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
273  ret <32 x i16> %3
274}
275
276define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
277; X86-LABEL: test_mask_packs_epi32_rmbkz_512:
278; X86:       # %bb.0:
279; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
280; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
281; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00]
282; X86-NEXT:    retl # encoding: [0xc3]
283;
284; X64-LABEL: test_mask_packs_epi32_rmbkz_512:
285; X64:       # %bb.0:
286; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
287; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
288; X64-NEXT:    retq # encoding: [0xc3]
289  %q = load i32, i32* %ptr_b
290  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
291  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
292  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
293  %2 = bitcast i32 %mask to <32 x i1>
294  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
295  ret <32 x i16> %3
296}
297
298declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>)
299
300define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
301; CHECK-LABEL: test_mask_packs_epi16_rr_512:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1]
304; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
305  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
306  ret <64 x i8> %1
307}
308
309define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
310; X86-LABEL: test_mask_packs_epi16_rrk_512:
311; X86:       # %bb.0:
312; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
313; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
314; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
315; X86-NEXT:    retl # encoding: [0xc3]
316;
317; X64-LABEL: test_mask_packs_epi16_rrk_512:
318; X64:       # %bb.0:
319; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
320; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
321; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
322; X64-NEXT:    retq # encoding: [0xc3]
323  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
324  %2 = bitcast i64 %mask to <64 x i1>
325  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
326  ret <64 x i8> %3
327}
328
329define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
330; X86-LABEL: test_mask_packs_epi16_rrkz_512:
331; X86:       # %bb.0:
332; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
333; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
334; X86-NEXT:    retl # encoding: [0xc3]
335;
336; X64-LABEL: test_mask_packs_epi16_rrkz_512:
337; X64:       # %bb.0:
338; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
339; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
340; X64-NEXT:    retq # encoding: [0xc3]
341  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
342  %2 = bitcast i64 %mask to <64 x i1>
343  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
344  ret <64 x i8> %3
345}
346
347define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
348; X86-LABEL: test_mask_packs_epi16_rm_512:
349; X86:       # %bb.0:
350; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
351; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00]
352; X86-NEXT:    retl # encoding: [0xc3]
353;
354; X64-LABEL: test_mask_packs_epi16_rm_512:
355; X64:       # %bb.0:
356; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07]
357; X64-NEXT:    retq # encoding: [0xc3]
358  %b = load <32 x i16>, <32 x i16>* %ptr_b
359  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
360  ret <64 x i8> %1
361}
362
363define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
364; X86-LABEL: test_mask_packs_epi16_rmk_512:
365; X86:       # %bb.0:
366; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
367; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
368; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08]
369; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
370; X86-NEXT:    retl # encoding: [0xc3]
371;
372; X64-LABEL: test_mask_packs_epi16_rmk_512:
373; X64:       # %bb.0:
374; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
375; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f]
376; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
377; X64-NEXT:    retq # encoding: [0xc3]
378  %b = load <32 x i16>, <32 x i16>* %ptr_b
379  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
380  %2 = bitcast i64 %mask to <64 x i1>
381  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
382  ret <64 x i8> %3
383}
384
385define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
386; X86-LABEL: test_mask_packs_epi16_rmkz_512:
387; X86:       # %bb.0:
388; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
389; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
390; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00]
391; X86-NEXT:    retl # encoding: [0xc3]
392;
393; X64-LABEL: test_mask_packs_epi16_rmkz_512:
394; X64:       # %bb.0:
395; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
396; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07]
397; X64-NEXT:    retq # encoding: [0xc3]
398  %b = load <32 x i16>, <32 x i16>* %ptr_b
399  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
400  %2 = bitcast i64 %mask to <64 x i1>
401  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
402  ret <64 x i8> %3
403}
404
405declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>)
406
407
408define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
409; CHECK-LABEL: test_mask_packus_epi32_rr_512:
410; CHECK:       # %bb.0:
411; CHECK-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1]
412; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
413  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
414  ret <32 x i16> %1
415}
416
417define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
418; X86-LABEL: test_mask_packus_epi32_rrk_512:
419; X86:       # %bb.0:
420; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
421; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
422; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
423; X86-NEXT:    retl # encoding: [0xc3]
424;
425; X64-LABEL: test_mask_packus_epi32_rrk_512:
426; X64:       # %bb.0:
427; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
428; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
429; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
430; X64-NEXT:    retq # encoding: [0xc3]
431  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
432  %2 = bitcast i32 %mask to <32 x i1>
433  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
434  ret <32 x i16> %3
435}
436
437define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
438; X86-LABEL: test_mask_packus_epi32_rrkz_512:
439; X86:       # %bb.0:
440; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
441; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
442; X86-NEXT:    retl # encoding: [0xc3]
443;
444; X64-LABEL: test_mask_packus_epi32_rrkz_512:
445; X64:       # %bb.0:
446; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
447; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
448; X64-NEXT:    retq # encoding: [0xc3]
449  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
450  %2 = bitcast i32 %mask to <32 x i1>
451  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
452  ret <32 x i16> %3
453}
454
455define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
456; X86-LABEL: test_mask_packus_epi32_rm_512:
457; X86:       # %bb.0:
458; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
459; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00]
460; X86-NEXT:    retl # encoding: [0xc3]
461;
462; X64-LABEL: test_mask_packus_epi32_rm_512:
463; X64:       # %bb.0:
464; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07]
465; X64-NEXT:    retq # encoding: [0xc3]
466  %b = load <16 x i32>, <16 x i32>* %ptr_b
467  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
468  ret <32 x i16> %1
469}
470
471define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
472; X86-LABEL: test_mask_packus_epi32_rmk_512:
473; X86:       # %bb.0:
474; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
475; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
476; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08]
477; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
478; X86-NEXT:    retl # encoding: [0xc3]
479;
480; X64-LABEL: test_mask_packus_epi32_rmk_512:
481; X64:       # %bb.0:
482; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
483; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f]
484; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
485; X64-NEXT:    retq # encoding: [0xc3]
486  %b = load <16 x i32>, <16 x i32>* %ptr_b
487  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
488  %2 = bitcast i32 %mask to <32 x i1>
489  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
490  ret <32 x i16> %3
491}
492
493define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
494; X86-LABEL: test_mask_packus_epi32_rmkz_512:
495; X86:       # %bb.0:
496; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
497; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
498; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00]
499; X86-NEXT:    retl # encoding: [0xc3]
500;
501; X64-LABEL: test_mask_packus_epi32_rmkz_512:
502; X64:       # %bb.0:
503; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
504; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07]
505; X64-NEXT:    retq # encoding: [0xc3]
506  %b = load <16 x i32>, <16 x i32>* %ptr_b
507  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
508  %2 = bitcast i32 %mask to <32 x i1>
509  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
510  ret <32 x i16> %3
511}
512
513define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
514; X86-LABEL: test_mask_packus_epi32_rmb_512:
515; X86:       # %bb.0:
516; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
517; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00]
518; X86-NEXT:    retl # encoding: [0xc3]
519;
520; X64-LABEL: test_mask_packus_epi32_rmb_512:
521; X64:       # %bb.0:
522; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07]
523; X64-NEXT:    retq # encoding: [0xc3]
524  %q = load i32, i32* %ptr_b
525  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
526  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
527  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
528  ret <32 x i16> %1
529}
530
531define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
532; X86-LABEL: test_mask_packus_epi32_rmbk_512:
533; X86:       # %bb.0:
534; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
535; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
536; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08]
537; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
538; X86-NEXT:    retl # encoding: [0xc3]
539;
540; X64-LABEL: test_mask_packus_epi32_rmbk_512:
541; X64:       # %bb.0:
542; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
543; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f]
544; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
545; X64-NEXT:    retq # encoding: [0xc3]
546  %q = load i32, i32* %ptr_b
547  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
548  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
549  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
550  %2 = bitcast i32 %mask to <32 x i1>
551  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
552  ret <32 x i16> %3
553}
554
555define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
556; X86-LABEL: test_mask_packus_epi32_rmbkz_512:
557; X86:       # %bb.0:
558; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
559; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
560; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00]
561; X86-NEXT:    retl # encoding: [0xc3]
562;
563; X64-LABEL: test_mask_packus_epi32_rmbkz_512:
564; X64:       # %bb.0:
565; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
566; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07]
567; X64-NEXT:    retq # encoding: [0xc3]
568  %q = load i32, i32* %ptr_b
569  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
570  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
571  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
572  %2 = bitcast i32 %mask to <32 x i1>
573  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
574  ret <32 x i16> %3
575}
576
577declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>)
578
579define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
580; CHECK-LABEL: test_mask_packus_epi16_rr_512:
581; CHECK:       # %bb.0:
582; CHECK-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1]
583; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
584  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
585  ret <64 x i8> %1
586}
587
588define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
589; X86-LABEL: test_mask_packus_epi16_rrk_512:
590; X86:       # %bb.0:
591; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
592; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
593; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
594; X86-NEXT:    retl # encoding: [0xc3]
595;
596; X64-LABEL: test_mask_packus_epi16_rrk_512:
597; X64:       # %bb.0:
598; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
599; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
600; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
601; X64-NEXT:    retq # encoding: [0xc3]
602  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
603  %2 = bitcast i64 %mask to <64 x i1>
604  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
605  ret <64 x i8> %3
606}
607
608define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
609; X86-LABEL: test_mask_packus_epi16_rrkz_512:
610; X86:       # %bb.0:
611; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
612; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
613; X86-NEXT:    retl # encoding: [0xc3]
614;
615; X64-LABEL: test_mask_packus_epi16_rrkz_512:
616; X64:       # %bb.0:
617; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
618; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
619; X64-NEXT:    retq # encoding: [0xc3]
620  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
621  %2 = bitcast i64 %mask to <64 x i1>
622  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
623  ret <64 x i8> %3
624}
625
626define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
627; X86-LABEL: test_mask_packus_epi16_rm_512:
628; X86:       # %bb.0:
629; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
630; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00]
631; X86-NEXT:    retl # encoding: [0xc3]
632;
633; X64-LABEL: test_mask_packus_epi16_rm_512:
634; X64:       # %bb.0:
635; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07]
636; X64-NEXT:    retq # encoding: [0xc3]
637  %b = load <32 x i16>, <32 x i16>* %ptr_b
638  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
639  ret <64 x i8> %1
640}
641
642define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
643; X86-LABEL: test_mask_packus_epi16_rmk_512:
644; X86:       # %bb.0:
645; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
646; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
647; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08]
648; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
649; X86-NEXT:    retl # encoding: [0xc3]
650;
651; X64-LABEL: test_mask_packus_epi16_rmk_512:
652; X64:       # %bb.0:
653; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
654; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f]
655; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
656; X64-NEXT:    retq # encoding: [0xc3]
657  %b = load <32 x i16>, <32 x i16>* %ptr_b
658  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
659  %2 = bitcast i64 %mask to <64 x i1>
660  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
661  ret <64 x i8> %3
662}
663
664define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
665; X86-LABEL: test_mask_packus_epi16_rmkz_512:
666; X86:       # %bb.0:
667; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
668; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
669; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00]
670; X86-NEXT:    retl # encoding: [0xc3]
671;
672; X64-LABEL: test_mask_packus_epi16_rmkz_512:
673; X64:       # %bb.0:
674; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
675; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07]
676; X64-NEXT:    retq # encoding: [0xc3]
677  %b = load <32 x i16>, <32 x i16>* %ptr_b
678  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
679  %2 = bitcast i64 %mask to <64 x i1>
680  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
681  ret <64 x i8> %3
682}
683
684declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>)
685
686define <32 x i16>@test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
687; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_512:
688; CHECK:       # %bb.0:
689; CHECK-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x75,0xc2]
690; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
691  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
692  ret <32 x i16> %1
693}
694
695define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
696; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
697; X86:       # %bb.0:
698; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
699; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
700; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
701; X86-NEXT:    retl # encoding: [0xc3]
702;
703; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
704; X64:       # %bb.0:
705; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
706; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
707; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
708; X64-NEXT:    retq # encoding: [0xc3]
709  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
710  %2 = bitcast i32 %x3 to <32 x i1>
711  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
712  ret <32 x i16> %3
713}
714
715define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
716; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
717; X86:       # %bb.0:
718; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
719; X86-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2]
720; X86-NEXT:    retl # encoding: [0xc3]
721;
722; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
723; X64:       # %bb.0:
724; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
725; X64-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2]
726; X64-NEXT:    retq # encoding: [0xc3]
727  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
728  %2 = bitcast i32 %x3 to <32 x i1>
729  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
730  ret <32 x i16> %3
731}
732
733declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>)
734
735define <32 x i16>@test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
736; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_512:
737; CHECK:       # %bb.0:
738; CHECK-NEXT:    vpermt2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xc2]
739; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
740  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
741  ret <32 x i16> %1
742}
743
744define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
745; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
746; X86:       # %bb.0:
747; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
748; X86-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
749; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
750; X86-NEXT:    retl # encoding: [0xc3]
751;
752; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
753; X64:       # %bb.0:
754; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
755; X64-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
756; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
757; X64-NEXT:    retq # encoding: [0xc3]
758  %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
759  %2 = bitcast i32 %x3 to <32 x i1>
760  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
761  ret <32 x i16> %3
762}
763
764declare <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8>, <64 x i8>)
765
766define <64 x i8> @test_int_x86_avx512_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
767; CHECK-LABEL: test_int_x86_avx512_pavg_b_512:
768; CHECK:       # %bb.0:
769; CHECK-NEXT:    vpavgb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xc1]
770; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
771  %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1)
772  ret <64 x i8> %1
773}
774
775define <64 x i8> @test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
776; X86-LABEL: test_int_x86_avx512_mask_pavg_b_512:
777; X86:       # %bb.0:
778; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
779; X86-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
780; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
781; X86-NEXT:    retl # encoding: [0xc3]
782;
783; X64-LABEL: test_int_x86_avx512_mask_pavg_b_512:
784; X64:       # %bb.0:
785; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
786; X64-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
787; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
788; X64-NEXT:    retq # encoding: [0xc3]
789  %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1)
790  %2 = bitcast i64 %x3 to <64 x i1>
791  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x2
792  ret <64 x i8> %3
793}
794
795declare <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16>, <32 x i16>)
796
797define <32 x i16> @test_int_x86_avx512_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
798; CHECK-LABEL: test_int_x86_avx512_pavg_w_512:
799; CHECK:       # %bb.0:
800; CHECK-NEXT:    vpavgw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xc1]
801; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
802  %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1)
803  ret <32 x i16> %1
804}
805
806define <32 x i16> @test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
807; X86-LABEL: test_int_x86_avx512_mask_pavg_w_512:
808; X86:       # %bb.0:
809; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
810; X86-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
811; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
812; X86-NEXT:    retl # encoding: [0xc3]
813;
814; X64-LABEL: test_int_x86_avx512_mask_pavg_w_512:
815; X64:       # %bb.0:
816; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
817; X64-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
818; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
819; X64-NEXT:    retq # encoding: [0xc3]
820  %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1)
821  %2 = bitcast i32 %x3 to <32 x i1>
822  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
823  ret <32 x i16> %3
824}
825
826declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
827
828define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) {
829; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512:
830; CHECK:       # %bb.0:
831; CHECK-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1]
832; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
833  %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
834  ret <64 x i8> %res
835}
836
837define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) {
838; X86-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
839; X86:       # %bb.0:
840; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
841; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
842; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
843; X86-NEXT:    retl # encoding: [0xc3]
844;
845; X64-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
846; X64:       # %bb.0:
847; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
848; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
849; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
850; X64-NEXT:    retq # encoding: [0xc3]
851  %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
852  %mask.cast = bitcast i64 %mask to <64 x i1>
853  %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2
854  ret <64 x i8> %res2
855}
856
857define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) {
858; X86-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
859; X86:       # %bb.0:
860; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
861; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1]
862; X86-NEXT:    retl # encoding: [0xc3]
863;
864; X64-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
865; X64:       # %bb.0:
866; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
867; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1]
868; X64-NEXT:    retq # encoding: [0xc3]
869  %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
870  %mask.cast = bitcast i64 %mask to <64 x i1>
871  %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer
872  ret <64 x i8> %res2
873}
874
875declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>)
876
877define <32 x i16> @test_int_x86_avx512_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
878; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_512:
879; CHECK:       # %bb.0:
880; CHECK-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xc1]
881; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
882  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
883  ret <32 x i16> %1
884}
885
886define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
887; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
888; X86:       # %bb.0:
889; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
890; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
891; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
892; X86-NEXT:    retl # encoding: [0xc3]
893;
894; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
895; X64:       # %bb.0:
896; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
897; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
898; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
899; X64-NEXT:    retq # encoding: [0xc3]
900  %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
901  %2 = bitcast i32 %x3 to <32 x i1>
902  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
903  ret <32 x i16> %3
904}
905
906declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>)
907
908define <32 x i16> @test_int_x86_avx512_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
909; CHECK-LABEL: test_int_x86_avx512_pmulh_w_512:
910; CHECK:       # %bb.0:
911; CHECK-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xc1]
912; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
913  %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1)
914  ret <32 x i16> %1
915}
916
917define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
918; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
919; X86:       # %bb.0:
920; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
921; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
922; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
923; X86-NEXT:    retl # encoding: [0xc3]
924;
925; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
926; X64:       # %bb.0:
927; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
928; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
929; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
930; X64-NEXT:    retq # encoding: [0xc3]
931  %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1)
932  %2 = bitcast i32 %x3 to <32 x i1>
933  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
934  ret <32 x i16> %3
935}
936
937declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>)
938
939define <32 x i16> @test_int_x86_avx512_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
940; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_512:
941; CHECK:       # %bb.0:
942; CHECK-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xc1]
943; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
944  %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1)
945  ret <32 x i16> %1
946}
947
948define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
949; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
950; X86:       # %bb.0:
951; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
952; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
953; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
954; X86-NEXT:    retl # encoding: [0xc3]
955;
956; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
957; X64:       # %bb.0:
958; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
959; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
960; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
961; X64-NEXT:    retq # encoding: [0xc3]
962  %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1)
963  %2 = bitcast i32 %x3 to <32 x i1>
964  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
965  ret <32 x i16> %3
966}
967
968define <32 x i8>@test_int_x86_avx512_pmov_wb_512(<32 x i16> %x0) {
969; CHECK-LABEL: test_int_x86_avx512_pmov_wb_512:
970; CHECK:       # %bb.0:
971; CHECK-NEXT:    vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0]
972; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
973  %1 = trunc <32 x i16> %x0 to <32 x i8>
974  ret <32 x i8> %1
975}
976
977define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
978; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
979; X86:       # %bb.0:
980; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
981; X86-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
982; X86-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
983; X86-NEXT:    retl # encoding: [0xc3]
984;
985; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
986; X64:       # %bb.0:
987; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
988; X64-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
989; X64-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
990; X64-NEXT:    retq # encoding: [0xc3]
991  %1 = trunc <32 x i16> %x0 to <32 x i8>
992  %2 = bitcast i32 %x2 to <32 x i1>
993  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1
994  ret <32 x i8> %3
995}
996
997define <32 x i8>@test_int_x86_avx512_maskz_pmov_wb_512(<32 x i16> %x0, i32 %x2) {
998; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_512:
999; X86:       # %bb.0:
1000; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1001; X86-NEXT:    vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
1002; X86-NEXT:    retl # encoding: [0xc3]
1003;
1004; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_512:
1005; X64:       # %bb.0:
1006; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1007; X64-NEXT:    vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
1008; X64-NEXT:    retq # encoding: [0xc3]
1009  %1 = trunc <32 x i16> %x0 to <32 x i8>
1010  %2 = bitcast i32 %x2 to <32 x i1>
1011  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1012  ret <32 x i8> %3
1013}
1014
1015declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1016
1017define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1018; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1019; X86:       # %bb.0:
1020; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1021; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1022; X86-NEXT:    vpmovwb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x00]
1023; X86-NEXT:    vpmovwb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x00]
1024; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1025; X86-NEXT:    retl # encoding: [0xc3]
1026;
1027; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1028; X64:       # %bb.0:
1029; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1030; X64-NEXT:    vpmovwb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x07]
1031; X64-NEXT:    vpmovwb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x07]
1032; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1033; X64-NEXT:    retq # encoding: [0xc3]
1034    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1035    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1036    ret void
1037}
1038
1039declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
1040
1041define <32 x i8>@test_int_x86_avx512_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1) {
1042; CHECK-LABEL: test_int_x86_avx512_pmovs_wb_512:
1043; CHECK:       # %bb.0:
1044; CHECK-NEXT:    vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0]
1045; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1046  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1047  ret <32 x i8> %res
1048}
1049
1050define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1051; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1052; X86:       # %bb.0:
1053; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1054; X86-NEXT:    vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1]
1055; X86-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
1056; X86-NEXT:    retl # encoding: [0xc3]
1057;
1058; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1059; X64:       # %bb.0:
1060; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1061; X64-NEXT:    vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1]
1062; X64-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
1063; X64-NEXT:    retq # encoding: [0xc3]
1064  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1065  ret <32 x i8> %res
1066}
1067
1068define <32 x i8>@test_int_x86_avx512_maskz_pmovs_wb_512(<32 x i16> %x0, i32 %x2) {
1069; X86-LABEL: test_int_x86_avx512_maskz_pmovs_wb_512:
1070; X86:       # %bb.0:
1071; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1072; X86-NEXT:    vpmovswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc0]
1073; X86-NEXT:    retl # encoding: [0xc3]
1074;
1075; X64-LABEL: test_int_x86_avx512_maskz_pmovs_wb_512:
1076; X64:       # %bb.0:
1077; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1078; X64-NEXT:    vpmovswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc0]
1079; X64-NEXT:    retq # encoding: [0xc3]
1080  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1081  ret <32 x i8> %res
1082}
1083
1084declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1085
1086define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1087; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1088; X86:       # %bb.0:
1089; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1090; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1091; X86-NEXT:    vpmovswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x00]
1092; X86-NEXT:    vpmovswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x00]
1093; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1094; X86-NEXT:    retl # encoding: [0xc3]
1095;
1096; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1097; X64:       # %bb.0:
1098; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1099; X64-NEXT:    vpmovswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x07]
1100; X64-NEXT:    vpmovswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x07]
1101; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1102; X64-NEXT:    retq # encoding: [0xc3]
1103    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1104    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1105    ret void
1106}
1107
1108declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
1109
1110define <32 x i8>@test_int_x86_avx512_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1) {
1111; CHECK-LABEL: test_int_x86_avx512_pmovus_wb_512:
1112; CHECK:       # %bb.0:
1113; CHECK-NEXT:    vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0]
1114; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1115  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1116  ret <32 x i8> %res
1117}
1118
1119define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1120; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1121; X86:       # %bb.0:
1122; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1123; X86-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1]
1124; X86-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
1125; X86-NEXT:    retl # encoding: [0xc3]
1126;
1127; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1128; X64:       # %bb.0:
1129; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1130; X64-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1]
1131; X64-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
1132; X64-NEXT:    retq # encoding: [0xc3]
1133  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1134  ret <32 x i8> %res
1135}
1136
1137define <32 x i8>@test_int_x86_avx512_maskz_pmovus_wb_512(<32 x i16> %x0, i32 %x2) {
1138; X86-LABEL: test_int_x86_avx512_maskz_pmovus_wb_512:
1139; X86:       # %bb.0:
1140; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1141; X86-NEXT:    vpmovuswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc0]
1142; X86-NEXT:    retl # encoding: [0xc3]
1143;
1144; X64-LABEL: test_int_x86_avx512_maskz_pmovus_wb_512:
1145; X64:       # %bb.0:
1146; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1147; X64-NEXT:    vpmovuswb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc0]
1148; X64-NEXT:    retq # encoding: [0xc3]
1149  %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1150  ret <32 x i8> %res
1151}
1152
1153declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1154
1155define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1156; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1157; X86:       # %bb.0:
1158; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1159; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1160; X86-NEXT:    vpmovuswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x00]
1161; X86-NEXT:    vpmovuswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x00]
1162; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1163; X86-NEXT:    retl # encoding: [0xc3]
1164;
1165; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1166; X64:       # %bb.0:
1167; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1168; X64-NEXT:    vpmovuswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x07]
1169; X64-NEXT:    vpmovuswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x07]
1170; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1171; X64-NEXT:    retq # encoding: [0xc3]
1172    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1173    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1174    ret void
1175}
1176
1177declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>)
1178
1179define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1) {
1180; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_512:
1181; CHECK:       # %bb.0:
1182; CHECK-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xc1]
1183; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1184  %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1)
1185  ret <32 x i16> %1
1186}
1187
1188define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
1189; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1190; X86:       # %bb.0:
1191; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1192; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
1193; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1194; X86-NEXT:    retl # encoding: [0xc3]
1195;
1196; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1197; X64:       # %bb.0:
1198; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1199; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
1200; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1201; X64-NEXT:    retq # encoding: [0xc3]
1202  %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1)
1203  %2 = bitcast i32 %x3 to <32 x i1>
1204  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1205  ret <32 x i16> %3
1206}
1207
1208declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>)
1209
1210define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1) {
1211; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_512:
1212; CHECK:       # %bb.0:
1213; CHECK-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xc1]
1214; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1215  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1)
1216  ret <16 x i32> %1
1217}
1218
1219define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
1220; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1221; X86:       # %bb.0:
1222; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1223; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
1224; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1225; X86-NEXT:    retl # encoding: [0xc3]
1226;
1227; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1228; X64:       # %bb.0:
1229; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1230; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
1231; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1232; X64-NEXT:    retq # encoding: [0xc3]
1233  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1)
1234  %2 = bitcast i16 %x3 to <16 x i1>
1235  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
1236  ret <16 x i32> %3
1237}
1238
1239declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32)
1240
1241define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
1242; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1243; X86:       # %bb.0:
1244; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1245; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
1246; X86-NEXT:    vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03]
1247; X86-NEXT:    vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04]
1248; X86-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
1249; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1250; X86-NEXT:    retl # encoding: [0xc3]
1251;
1252; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1253; X64:       # %bb.0:
1254; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1255; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
1256; X64-NEXT:    vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03]
1257; X64-NEXT:    vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04]
1258; X64-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
1259; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1260; X64-NEXT:    retq # encoding: [0xc3]
1261  %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2)
1262  %2 = bitcast i32 %x4 to <32 x i1>
1263  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
1264  %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3)
1265  %5 = bitcast i32 %x4 to <32 x i1>
1266  %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
1267  %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4)
1268  %res3 = add <32 x i16> %3, %6
1269  %res4 = add <32 x i16> %res3, %7
1270  ret <32 x i16> %res4
1271}
1272
1273declare  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
1274
1275define  <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
1276; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512:
1277; CHECK:       # %bb.0:
1278; CHECK-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9]
1279; CHECK-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2]
1280; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
1281; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1282  %res = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
1283  %res1 = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
1284  %res2 = add  <8 x i64> %res, %res1
1285  ret  <8 x i64> %res2
1286}
1287
1288declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind readnone
1289
1290define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize {
1291; X86-LABEL: test_x86_avx512_psrlv_w_512_const:
1292; X86:       # %bb.0:
1293; X86-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1294; X86-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1295; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1296; X86-NEXT:    vpsrlvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
1297; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1298; X86-NEXT:    retl # encoding: [0xc3]
1299;
1300; X64-LABEL: test_x86_avx512_psrlv_w_512_const:
1301; X64:       # %bb.0:
1302; X64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1303; X64-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1304; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1305; X64-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
1306; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1307; X64-NEXT:    retq # encoding: [0xc3]
1308  %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,  i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
1309  ret <32 x i16> %res1
1310}
1311
1312define <32 x i16>@test_int_x86_avx512_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1) {
1313; CHECK-LABEL: test_int_x86_avx512_psrlv32hi:
1314; CHECK:       # %bb.0:
1315; CHECK-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xc1]
1316; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1317  %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1318  ret <32 x i16> %1
1319}
1320
1321define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1322; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi:
1323; X86:       # %bb.0:
1324; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1325; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
1326; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1327; X86-NEXT:    retl # encoding: [0xc3]
1328;
1329; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi:
1330; X64:       # %bb.0:
1331; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1332; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
1333; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1334; X64-NEXT:    retq # encoding: [0xc3]
1335  %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1336  %2 = bitcast i32 %x3 to <32 x i1>
1337  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1338  ret <32 x i16> %3
1339}
1340
1341define <32 x i16>@test_int_x86_avx512_maskz_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
1342; X86-LABEL: test_int_x86_avx512_maskz_psrlv32hi:
1343; X86:       # %bb.0:
1344; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1345; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
1346; X86-NEXT:    retl # encoding: [0xc3]
1347;
1348; X64-LABEL: test_int_x86_avx512_maskz_psrlv32hi:
1349; X64:       # %bb.0:
1350; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1351; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
1352; X64-NEXT:    retq # encoding: [0xc3]
1353  %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1354  %2 = bitcast i32 %x3 to <32 x i1>
1355  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
1356  ret <32 x i16> %3
1357}
1358
1359declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>)
1360
1361define <32 x i16>@test_int_x86_avx512_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1) {
1362; CHECK-LABEL: test_int_x86_avx512_psrav32_hi:
1363; CHECK:       # %bb.0:
1364; CHECK-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xc1]
1365; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1366  %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1367  ret <32 x i16> %1
1368}
1369
1370define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1371; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi:
1372; X86:       # %bb.0:
1373; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1374; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
1375; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1376; X86-NEXT:    retl # encoding: [0xc3]
1377;
1378; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi:
1379; X64:       # %bb.0:
1380; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1381; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
1382; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1383; X64-NEXT:    retq # encoding: [0xc3]
1384  %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1385  %2 = bitcast i32 %x3 to <32 x i1>
1386  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1387  ret <32 x i16> %3
1388}
1389
1390define <32 x i16>@test_int_x86_avx512_maskz_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
1391; X86-LABEL: test_int_x86_avx512_maskz_psrav32_hi:
1392; X86:       # %bb.0:
1393; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1394; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
1395; X86-NEXT:    retl # encoding: [0xc3]
1396;
1397; X64-LABEL: test_int_x86_avx512_maskz_psrav32_hi:
1398; X64:       # %bb.0:
1399; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1400; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
1401; X64-NEXT:    retq # encoding: [0xc3]
1402  %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1403  %2 = bitcast i32 %x3 to <32 x i1>
1404  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
1405  ret <32 x i16> %3
1406}
1407
1408define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1409; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
1410; X86:       # %bb.0:
1411; X86-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1412; X86-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1413; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1414; X86-NEXT:    vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A]
1415; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1416; X86-NEXT:    retl # encoding: [0xc3]
1417;
1418; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
1419; X64:       # %bb.0:
1420; X64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1421; X64-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1422; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1423; X64-NEXT:    vpsravw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A]
1424; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1425; X64-NEXT:    retq # encoding: [0xc3]
1426  %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>)
1427  ret <32 x i16> %1
1428}
1429
1430define <32 x i16>@test_int_x86_avx512_psllv32hi(<32 x i16> %x0, <32 x i16> %x1) {
1431; CHECK-LABEL: test_int_x86_avx512_psllv32hi:
1432; CHECK:       # %bb.0:
1433; CHECK-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xc1]
1434; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1435  %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1436  ret <32 x i16> %1
1437}
1438
1439define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1440; X86-LABEL: test_int_x86_avx512_mask_psllv32hi:
1441; X86:       # %bb.0:
1442; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1443; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
1444; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1445; X86-NEXT:    retl # encoding: [0xc3]
1446;
1447; X64-LABEL: test_int_x86_avx512_mask_psllv32hi:
1448; X64:       # %bb.0:
1449; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1450; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
1451; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1452; X64-NEXT:    retq # encoding: [0xc3]
1453  %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1454  %2 = bitcast i32 %x3 to <32 x i1>
1455  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1456  ret <32 x i16> %3
1457}
1458
1459define <32 x i16>@test_int_x86_avx512_maskz_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
1460; X86-LABEL: test_int_x86_avx512_maskz_psllv32hi:
1461; X86:       # %bb.0:
1462; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1463; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
1464; X86-NEXT:    retl # encoding: [0xc3]
1465;
1466; X64-LABEL: test_int_x86_avx512_maskz_psllv32hi:
1467; X64:       # %bb.0:
1468; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1469; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
1470; X64-NEXT:    retq # encoding: [0xc3]
1471  %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1472  %2 = bitcast i32 %x3 to <32 x i1>
1473  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
1474  ret <32 x i16> %3
1475}
1476
1477declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
1478
1479define <32 x i16>@test_int_x86_avx512_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1) {
1480; CHECK-LABEL: test_int_x86_avx512_permvar_hi_512:
1481; CHECK:       # %bb.0:
1482; CHECK-NEXT:    vpermw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xc0]
1483; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1484  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1485  ret <32 x i16> %1
1486}
1487
1488define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1489; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
1490; X86:       # %bb.0:
1491; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1492; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
1493; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1494; X86-NEXT:    retl # encoding: [0xc3]
1495;
1496; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
1497; X64:       # %bb.0:
1498; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1499; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
1500; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1501; X64-NEXT:    retq # encoding: [0xc3]
1502  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1503  %2 = bitcast i32 %x3 to <32 x i1>
1504  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1505  ret <32 x i16> %3
1506}
1507
1508define <32 x i16>@test_int_x86_avx512_maskz_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
1509; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_512:
1510; X86:       # %bb.0:
1511; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1512; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
1513; X86-NEXT:    retl # encoding: [0xc3]
1514;
1515; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_512:
1516; X64:       # %bb.0:
1517; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1518; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
1519; X64-NEXT:    retq # encoding: [0xc3]
1520  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1521  %2 = bitcast i32 %x3 to <32 x i1>
1522  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
1523  ret <32 x i16> %3
1524}
1525
1526define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1527; CHECK-LABEL: test_x86_avx512_psll_w_512:
1528; CHECK:       # %bb.0:
1529; CHECK-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1]
1530; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1531  %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1532  ret <32 x i16> %res
1533}
1534define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1535; X86-LABEL: test_x86_avx512_mask_psll_w_512:
1536; X86:       # %bb.0:
1537; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1538; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1539; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1540; X86-NEXT:    retl # encoding: [0xc3]
1541;
1542; X64-LABEL: test_x86_avx512_mask_psll_w_512:
1543; X64:       # %bb.0:
1544; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1545; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1546; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1547; X64-NEXT:    retq # encoding: [0xc3]
1548  %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1549  %mask.cast = bitcast i32 %mask to <32 x i1>
1550  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1551  ret <32 x i16> %res2
1552}
1553define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1554; X86-LABEL: test_x86_avx512_maskz_psll_w_512:
1555; X86:       # %bb.0:
1556; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1557; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1558; X86-NEXT:    retl # encoding: [0xc3]
1559;
1560; X64-LABEL: test_x86_avx512_maskz_psll_w_512:
1561; X64:       # %bb.0:
1562; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1563; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1564; X64-NEXT:    retq # encoding: [0xc3]
1565  %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1566  %mask.cast = bitcast i32 %mask to <32 x i1>
1567  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1568  ret <32 x i16> %res2
1569}
1570declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1571
1572
1573define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize {
1574; X86-LABEL: test_x86_avx512_psllv_w_512_const:
1575; X86:       # %bb.0:
1576; X86-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1577; X86-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1578; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1579; X86-NEXT:    vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
1580; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1581; X86-NEXT:    retl # encoding: [0xc3]
1582;
1583; X64-LABEL: test_x86_avx512_psllv_w_512_const:
1584; X64:       # %bb.0:
1585; X64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1586; X64-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1587; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1588; X64-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
1589; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1590; X64-NEXT:    retq # encoding: [0xc3]
1591  %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4,  i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,  i16 1, i16 1, i16 -1>)
1592  ret <32 x i16> %res1
1593}
1594declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) nounwind readnone
1595
1596define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) {
1597; CHECK-LABEL: test_x86_avx512_pslli_w_512:
1598; CHECK:       # %bb.0:
1599; CHECK-NEXT:    vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07]
1600; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1601  %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1602  ret <32 x i16> %res
1603}
1604define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1605; X86-LABEL: test_x86_avx512_mask_pslli_w_512:
1606; X86:       # %bb.0:
1607; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1608; X86-NEXT:    vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07]
1609; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1610; X86-NEXT:    retl # encoding: [0xc3]
1611;
1612; X64-LABEL: test_x86_avx512_mask_pslli_w_512:
1613; X64:       # %bb.0:
1614; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1615; X64-NEXT:    vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07]
1616; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1617; X64-NEXT:    retq # encoding: [0xc3]
1618  %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1619  %mask.cast = bitcast i32 %mask to <32 x i1>
1620  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1621  ret <32 x i16> %res2
1622}
1623define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) {
1624; X86-LABEL: test_x86_avx512_maskz_pslli_w_512:
1625; X86:       # %bb.0:
1626; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1627; X86-NEXT:    vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07]
1628; X86-NEXT:    retl # encoding: [0xc3]
1629;
1630; X64-LABEL: test_x86_avx512_maskz_pslli_w_512:
1631; X64:       # %bb.0:
1632; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1633; X64-NEXT:    vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07]
1634; X64-NEXT:    retq # encoding: [0xc3]
1635  %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1636  %mask.cast = bitcast i32 %mask to <32 x i1>
1637  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1638  ret <32 x i16> %res2
1639}
1640declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone
1641
1642
1643define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1644; CHECK-LABEL: test_x86_avx512_psra_w_512:
1645; CHECK:       # %bb.0:
1646; CHECK-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1]
1647; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1648  %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1649  ret <32 x i16> %res
1650}
1651define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1652; X86-LABEL: test_x86_avx512_mask_psra_w_512:
1653; X86:       # %bb.0:
1654; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1655; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1656; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1657; X86-NEXT:    retl # encoding: [0xc3]
1658;
1659; X64-LABEL: test_x86_avx512_mask_psra_w_512:
1660; X64:       # %bb.0:
1661; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1662; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1663; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1664; X64-NEXT:    retq # encoding: [0xc3]
1665  %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1666  %mask.cast = bitcast i32 %mask to <32 x i1>
1667  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1668  ret <32 x i16> %res2
1669}
1670define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1671; X86-LABEL: test_x86_avx512_maskz_psra_w_512:
1672; X86:       # %bb.0:
1673; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1674; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1675; X86-NEXT:    retl # encoding: [0xc3]
1676;
1677; X64-LABEL: test_x86_avx512_maskz_psra_w_512:
1678; X64:       # %bb.0:
1679; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1680; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1681; X64-NEXT:    retq # encoding: [0xc3]
1682  %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1683  %mask.cast = bitcast i32 %mask to <32 x i1>
1684  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1685  ret <32 x i16> %res2
1686}
1687declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1688
1689
1690define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) {
1691; CHECK-LABEL: test_x86_avx512_psrai_w_512:
1692; CHECK:       # %bb.0:
1693; CHECK-NEXT:    vpsraw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x07]
1694; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1695  %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1696  ret <32 x i16> %res
1697}
1698define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1699; X86-LABEL: test_x86_avx512_mask_psrai_w_512:
1700; X86:       # %bb.0:
1701; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1702; X86-NEXT:    vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07]
1703; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1704; X86-NEXT:    retl # encoding: [0xc3]
1705;
1706; X64-LABEL: test_x86_avx512_mask_psrai_w_512:
1707; X64:       # %bb.0:
1708; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1709; X64-NEXT:    vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07]
1710; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1711; X64-NEXT:    retq # encoding: [0xc3]
1712  %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1713  %mask.cast = bitcast i32 %mask to <32 x i1>
1714  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1715  ret <32 x i16> %res2
1716}
1717define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1718; X86-LABEL: test_x86_avx512_maskz_psrai_w_512:
1719; X86:       # %bb.0:
1720; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1721; X86-NEXT:    vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07]
1722; X86-NEXT:    retl # encoding: [0xc3]
1723;
1724; X64-LABEL: test_x86_avx512_maskz_psrai_w_512:
1725; X64:       # %bb.0:
1726; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1727; X64-NEXT:    vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07]
1728; X64-NEXT:    retq # encoding: [0xc3]
1729  %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1730  %mask.cast = bitcast i32 %mask to <32 x i1>
1731  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1732  ret <32 x i16> %res2
1733}
1734declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone
1735
1736
1737define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1738; CHECK-LABEL: test_x86_avx512_psrl_w_512:
1739; CHECK:       # %bb.0:
1740; CHECK-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1]
1741; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1742  %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1743  ret <32 x i16> %res
1744}
1745define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1746; X86-LABEL: test_x86_avx512_mask_psrl_w_512:
1747; X86:       # %bb.0:
1748; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1749; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1750; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1751; X86-NEXT:    retl # encoding: [0xc3]
1752;
1753; X64-LABEL: test_x86_avx512_mask_psrl_w_512:
1754; X64:       # %bb.0:
1755; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1756; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1757; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1758; X64-NEXT:    retq # encoding: [0xc3]
1759  %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1760  %mask.cast = bitcast i32 %mask to <32 x i1>
1761  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1762  ret <32 x i16> %res2
1763}
1764define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1765; X86-LABEL: test_x86_avx512_maskz_psrl_w_512:
1766; X86:       # %bb.0:
1767; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1768; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1769; X86-NEXT:    retl # encoding: [0xc3]
1770;
1771; X64-LABEL: test_x86_avx512_maskz_psrl_w_512:
1772; X64:       # %bb.0:
1773; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1774; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1775; X64-NEXT:    retq # encoding: [0xc3]
1776  %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1777  %mask.cast = bitcast i32 %mask to <32 x i1>
1778  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1779  ret <32 x i16> %res2
1780}
1781declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1782
1783define <32 x i16> @test_x86_avx512_psrl_w_512_load(<32 x i16> %a0, <8 x i16>* %p) {
1784; X86-LABEL: test_x86_avx512_psrl_w_512_load:
1785; X86:       # %bb.0:
1786; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1787; X86-NEXT:    vpsrlw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x00]
1788; X86-NEXT:    retl # encoding: [0xc3]
1789;
1790; X64-LABEL: test_x86_avx512_psrl_w_512_load:
1791; X64:       # %bb.0:
1792; X64-NEXT:    vpsrlw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x07]
1793; X64-NEXT:    retq # encoding: [0xc3]
1794  %a1 = load <8 x i16>, <8 x i16>* %p
1795  %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1796  ret <32 x i16> %res
1797}
1798
1799define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) {
1800; CHECK-LABEL: test_x86_avx512_psrli_w_512:
1801; CHECK:       # %bb.0:
1802; CHECK-NEXT:    vpsrlw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x07]
1803; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1804  %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1805  ret <32 x i16> %res
1806}
1807define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1808; X86-LABEL: test_x86_avx512_mask_psrli_w_512:
1809; X86:       # %bb.0:
1810; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1811; X86-NEXT:    vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07]
1812; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1813; X86-NEXT:    retl # encoding: [0xc3]
1814;
1815; X64-LABEL: test_x86_avx512_mask_psrli_w_512:
1816; X64:       # %bb.0:
1817; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1818; X64-NEXT:    vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07]
1819; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1820; X64-NEXT:    retq # encoding: [0xc3]
1821  %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1822  %mask.cast = bitcast i32 %mask to <32 x i1>
1823  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1824  ret <32 x i16> %res2
1825}
1826define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) {
1827; X86-LABEL: test_x86_avx512_maskz_psrli_w_512:
1828; X86:       # %bb.0:
1829; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1830; X86-NEXT:    vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07]
1831; X86-NEXT:    retl # encoding: [0xc3]
1832;
1833; X64-LABEL: test_x86_avx512_maskz_psrli_w_512:
1834; X64:       # %bb.0:
1835; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1836; X64-NEXT:    vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07]
1837; X64-NEXT:    retq # encoding: [0xc3]
1838  %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1839  %mask.cast = bitcast i32 %mask to <32 x i1>
1840  %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1841  ret <32 x i16> %res2
1842}
1843declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone
1844