1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
6
7define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
8; X86-LABEL: test_int_x86_avx512_kunpck_wd:
9; X86:       # %bb.0:
10; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
11; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12; X86-NEXT:    kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
13; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
14; X86-NEXT:    retl # encoding: [0xc3]
15;
16; X64-LABEL: test_int_x86_avx512_kunpck_wd:
17; X64:       # %bb.0:
18; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
19; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
20; X64-NEXT:    kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
21; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
22; X64-NEXT:    retq # encoding: [0xc3]
23  %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
24  ret i32 %res
25}
26
27declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
28
29define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
30; X86-LABEL: test_int_x86_avx512_kunpck_qd:
31; X86:       # %bb.0:
32; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
33; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
34; X86-NEXT:    retl # encoding: [0xc3]
35;
36; X64-LABEL: test_int_x86_avx512_kunpck_qd:
37; X64:       # %bb.0:
38; X64-NEXT:    kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7]
39; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
40; X64-NEXT:    kunpckdq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4b,0xc1]
41; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
42; X64-NEXT:    retq # encoding: [0xc3]
43  %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
44  ret i64 %res
45}
46
47declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
48
49  define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
50; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
51; X86:       # %bb.0:
52; X86-NEXT:    vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04]
53; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
54; X86-NEXT:    vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1]
55; X86-NEXT:    vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1]
56; X86-NEXT:    vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
57; X86-NEXT:    vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
58; X86-NEXT:    retl # encoding: [0xc3]
59;
60; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
61; X64:       # %bb.0:
62; X64-NEXT:    vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf]
63; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
64; X64-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7]
65; X64-NEXT:    vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7]
66; X64-NEXT:    vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
67; X64-NEXT:    vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
68; X64-NEXT:    retq # encoding: [0xc3]
69    %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
70    %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
71    %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
72    %res3 = add <64 x i8> %res, %res1
73    %res4 = add <64 x i8> %res2, %res3
74    ret <64 x i8> %res4
75  }
76
77declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
78  define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) {
79; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
80; X86:       # %bb.0:
81; X86-NEXT:    vpbroadcastw {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x4c,0x24,0x02]
82; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
83; X86-NEXT:    vmovdqu16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc1]
84; X86-NEXT:    vmovdqu16 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd1]
85; X86-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
86; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
87; X86-NEXT:    retl # encoding: [0xc3]
88;
89; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
90; X64:       # %bb.0:
91; X64-NEXT:    vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf]
92; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
93; X64-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7]
94; X64-NEXT:    vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7]
95; X64-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
96; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
97; X64-NEXT:    retq # encoding: [0xc3]
98    %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
99    %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
100   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
101    %res3 = add <32 x i16> %res, %res1
102   %res4 = add <32 x i16> %res2, %res3
103    ret <32 x i16> %res4
104 }
105
106declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)
107
108define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) {
109; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512:
110; X86:       # %bb.0:
111; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
112; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
113; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
114; X86-NEXT:    vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01]
115; X86-NEXT:    vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
116; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
117; X86-NEXT:    retl # encoding: [0xc3]
118;
119; X64-LABEL: test_int_x86_avx512_mask_storeu_b_512:
120; X64:       # %bb.0:
121; X64-NEXT:    kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca]
122; X64-NEXT:    vmovdqu8 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x07]
123; X64-NEXT:    vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
124; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
125; X64-NEXT:    retq # encoding: [0xc3]
126  call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2)
127  call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1)
128  ret void
129}
130
131declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32)
132
133define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) {
134; X86-LABEL: test_int_x86_avx512_mask_storeu_w_512:
135; X86:       # %bb.0:
136; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
137; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
138; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
139; X86-NEXT:    vmovdqu16 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x01]
140; X86-NEXT:    vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
141; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
142; X86-NEXT:    retl # encoding: [0xc3]
143;
144; X64-LABEL: test_int_x86_avx512_mask_storeu_w_512:
145; X64:       # %bb.0:
146; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
147; X64-NEXT:    vmovdqu16 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x07]
148; X64-NEXT:    vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
149; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
150; X64-NEXT:    retq # encoding: [0xc3]
151  call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2)
152  call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1)
153  ret void
154}
155
156declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32)
157
158define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) {
159; X86-LABEL: test_int_x86_avx512_mask_loadu_w_512:
160; X86:       # %bb.0:
161; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
162; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
163; X86-NEXT:    vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
164; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
165; X86-NEXT:    vmovdqu16 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x00]
166; X86-NEXT:    vmovdqu16 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x09]
167; X86-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
168; X86-NEXT:    retl # encoding: [0xc3]
169;
170; X64-LABEL: test_int_x86_avx512_mask_loadu_w_512:
171; X64:       # %bb.0:
172; X64-NEXT:    vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
173; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
174; X64-NEXT:    vmovdqu16 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x06]
175; X64-NEXT:    vmovdqu16 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x0f]
176; X64-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
177; X64-NEXT:    retq # encoding: [0xc3]
178  %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1)
179  %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask)
180  %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask)
181  %res2 = add <32 x i16> %res, %res1
182  ret <32 x i16> %res2
183}
184
185declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64)
186
187define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) {
188; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512:
189; X86:       # %bb.0:
190; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
191; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
192; X86-NEXT:    vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
193; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
194; X86-NEXT:    vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00]
195; X86-NEXT:    vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09]
196; X86-NEXT:    vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
197; X86-NEXT:    retl # encoding: [0xc3]
198;
199; X64-LABEL: test_int_x86_avx512_mask_loadu_b_512:
200; X64:       # %bb.0:
201; X64-NEXT:    vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
202; X64-NEXT:    kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca]
203; X64-NEXT:    vmovdqu8 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x06]
204; X64-NEXT:    vmovdqu8 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x0f]
205; X64-NEXT:    vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
206; X64-NEXT:    retq # encoding: [0xc3]
207  %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1)
208  %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask)
209  %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask)
210  %res2 = add <64 x i8> %res, %res1
211  ret <64 x i8> %res2
212}
213
214declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
215
216define <8 x i64>@test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) {
217; CHECK-LABEL: test_int_x86_avx512_psll_dq_512:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vpslldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x08]
220; CHECK-NEXT:    # zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55]
221; CHECK-NEXT:    vpslldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xf8,0x04]
222; CHECK-NEXT:    # zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59]
223; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
224; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
225  %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
226  %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
227  %res2 = add <8 x i64> %res, %res1
228  ret <8 x i64> %res2
229}
230
231define <8 x i64>@test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) {
232; X86-LABEL: test_int_x86_avx512_psll_load_dq_512:
233; X86:       # %bb.0:
234; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
235; X86-NEXT:    vpslldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x38,0x04]
236; X86-NEXT:    # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
237; X86-NEXT:    retl # encoding: [0xc3]
238;
239; X64-LABEL: test_int_x86_avx512_psll_load_dq_512:
240; X64:       # %bb.0:
241; X64-NEXT:    vpslldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x3f,0x04]
242; X64-NEXT:    # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
243; X64-NEXT:    retq # encoding: [0xc3]
244  %x0 = load <8 x i64>, <8 x i64> *%p0
245  %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
246  ret <8 x i64> %res
247}
248
249declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
250
251define <8 x i64>@test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) {
252; CHECK-LABEL: test_int_x86_avx512_psrl_dq_512:
253; CHECK:       # %bb.0:
254; CHECK-NEXT:    vpsrldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x08]
255; CHECK-NEXT:    # zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero
256; CHECK-NEXT:    vpsrldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xd8,0x04]
257; CHECK-NEXT:    # zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
258; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
259; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
260  %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
261  %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
262  %res2 = add <8 x i64> %res, %res1
263  ret <8 x i64> %res2
264}
265
266define <8 x i64>@test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) {
267; X86-LABEL: test_int_x86_avx512_psrl_load_dq_512:
268; X86:       # %bb.0:
269; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
270; X86-NEXT:    vpsrldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x18,0x04]
271; X86-NEXT:    # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
272; X86-NEXT:    retl # encoding: [0xc3]
273;
274; X64-LABEL: test_int_x86_avx512_psrl_load_dq_512:
275; X64:       # %bb.0:
276; X64-NEXT:    vpsrldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x1f,0x04]
277; X64-NEXT:    # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
278; X64-NEXT:    retq # encoding: [0xc3]
279  %x0 = load <8 x i64>, <8 x i64> *%p0
280  %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
281  ret <8 x i64> %res
282}
283
284declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
285
286define <64 x i8>@test_int_x86_avx512_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3) {
287; CHECK-LABEL: test_int_x86_avx512_palignr_512:
288; CHECK:       # %bb.0:
289; CHECK-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xc1,0x02]
290; CHECK-NEXT:    # zmm0 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
291; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
292  %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
293  ret <64 x i8> %res
294}
295
296define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
297; X86-LABEL: test_int_x86_avx512_mask_palignr_512:
298; X86:       # %bb.0:
299; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
300; X86-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02]
301; X86-NEXT:    # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
302; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
303; X86-NEXT:    retl # encoding: [0xc3]
304;
305; X64-LABEL: test_int_x86_avx512_mask_palignr_512:
306; X64:       # %bb.0:
307; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
308; X64-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02]
309; X64-NEXT:    # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
310; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
311; X64-NEXT:    retq # encoding: [0xc3]
312  %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
313  ret <64 x i8> %res
314}
315
316define <64 x i8>@test_int_x86_avx512_maskz_palignr_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x4) {
317; X86-LABEL: test_int_x86_avx512_maskz_palignr_512:
318; X86:       # %bb.0:
319; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
320; X86-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02]
321; X86-NEXT:    # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
322; X86-NEXT:    retl # encoding: [0xc3]
323;
324; X64-LABEL: test_int_x86_avx512_maskz_palignr_512:
325; X64:       # %bb.0:
326; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
327; X64-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02]
328; X64-NEXT:    # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
329; X64-NEXT:    retq # encoding: [0xc3]
330  %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
331  ret <64 x i8> %res
332}
333
334declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32)
335
336define <32 x i16>@test_int_x86_avx512_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2) {
337; CHECK-LABEL: test_int_x86_avx512_pshufh_w_512:
338; CHECK:       # %bb.0:
339; CHECK-NEXT:    vpshufhw $3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xc0,0x03]
340; CHECK-NEXT:    # zmm0 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
341; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
342  %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
343  ret <32 x i16> %res
344}
345
346define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
347; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
348; X86:       # %bb.0:
349; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
350; X86-NEXT:    vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03]
351; X86-NEXT:    # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
352; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
353; X86-NEXT:    retl # encoding: [0xc3]
354;
355; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
356; X64:       # %bb.0:
357; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
358; X64-NEXT:    vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03]
359; X64-NEXT:    # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
360; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
361; X64-NEXT:    retq # encoding: [0xc3]
362  %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
363  ret <32 x i16> %res
364}
365
366define <32 x i16>@test_int_x86_avx512_maskz_pshufh_w_512(<32 x i16> %x0, i32 %x3) {
367; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_512:
368; X86:       # %bb.0:
369; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
370; X86-NEXT:    vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03]
371; X86-NEXT:    # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
372; X86-NEXT:    retl # encoding: [0xc3]
373;
374; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_512:
375; X64:       # %bb.0:
376; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
377; X64-NEXT:    vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03]
378; X64-NEXT:    # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
379; X64-NEXT:    retq # encoding: [0xc3]
380  %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
381  ret <32 x i16> %res
382}
383
384declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32)
385
386define <32 x i16>@test_int_x86_avx512_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2) {
387; CHECK-LABEL: test_int_x86_avx512_pshufl_w_512:
388; CHECK:       # %bb.0:
389; CHECK-NEXT:    vpshuflw $3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xc0,0x03]
390; CHECK-NEXT:    # zmm0 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
391; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
392  %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
393  ret <32 x i16> %res
394}
395
396define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
397; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
398; X86:       # %bb.0:
399; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
400; X86-NEXT:    vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03]
401; X86-NEXT:    # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
402; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
403; X86-NEXT:    retl # encoding: [0xc3]
404;
405; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
406; X64:       # %bb.0:
407; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
408; X64-NEXT:    vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03]
409; X64-NEXT:    # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
410; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
411; X64-NEXT:    retq # encoding: [0xc3]
412  %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
413  ret <32 x i16> %res
414}
415
416define <32 x i16>@test_int_x86_avx512_maskz_pshufl_w_512(<32 x i16> %x0, i32 %x3) {
417; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_512:
418; X86:       # %bb.0:
419; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
420; X86-NEXT:    vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03]
421; X86-NEXT:    # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
422; X86-NEXT:    retl # encoding: [0xc3]
423;
424; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_512:
425; X64:       # %bb.0:
426; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
427; X64-NEXT:    vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03]
428; X64-NEXT:    # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
429; X64-NEXT:    retq # encoding: [0xc3]
430  %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
431  ret <32 x i16> %res
432}
433
434define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
435; X86-LABEL: test_pcmpeq_b:
436; X86:       # %bb.0:
437; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
438; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
439; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
440; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
441; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
442; X86-NEXT:    retl # encoding: [0xc3]
443;
444; X64-LABEL: test_pcmpeq_b:
445; X64:       # %bb.0:
446; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
447; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
448; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
449; X64-NEXT:    retq # encoding: [0xc3]
450  %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
451  ret i64 %res
452}
453
454define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
455; X86-LABEL: test_mask_pcmpeq_b:
456; X86:       # %bb.0:
457; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
458; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
459; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
460; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
461; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
462; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08]
463; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
464; X86-NEXT:    retl # encoding: [0xc3]
465;
466; X64-LABEL: test_mask_pcmpeq_b:
467; X64:       # %bb.0:
468; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
469; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
470; X64-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
471; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
472; X64-NEXT:    retq # encoding: [0xc3]
473  %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
474  ret i64 %res
475}
476
477declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
478
479define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
480; CHECK-LABEL: test_pcmpeq_w:
481; CHECK:       # %bb.0:
482; CHECK-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
483; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
484; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
485; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
486  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
487  ret i32 %res
488}
489
490define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
491; X86-LABEL: test_mask_pcmpeq_w:
492; X86:       # %bb.0:
493; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
494; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
495; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
496; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
497; X86-NEXT:    retl # encoding: [0xc3]
498;
499; X64-LABEL: test_mask_pcmpeq_w:
500; X64:       # %bb.0:
501; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
502; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
503; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
504; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
505; X64-NEXT:    retq # encoding: [0xc3]
506  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
507  ret i32 %res
508}
509
510declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
511
512define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
513; X86-LABEL: test_pcmpgt_b:
514; X86:       # %bb.0:
515; X86-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
516; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
517; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
518; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
519; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
520; X86-NEXT:    retl # encoding: [0xc3]
521;
522; X64-LABEL: test_pcmpgt_b:
523; X64:       # %bb.0:
524; X64-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
525; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
526; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
527; X64-NEXT:    retq # encoding: [0xc3]
528  %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
529  ret i64 %res
530}
531
532define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
533; X86-LABEL: test_mask_pcmpgt_b:
534; X86:       # %bb.0:
535; X86-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
536; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
537; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
538; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
539; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
540; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08]
541; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
542; X86-NEXT:    retl # encoding: [0xc3]
543;
544; X64-LABEL: test_mask_pcmpgt_b:
545; X64:       # %bb.0:
546; X64-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
547; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
548; X64-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
549; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
550; X64-NEXT:    retq # encoding: [0xc3]
551  %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
552  ret i64 %res
553}
554
555declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
556
557define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
558; CHECK-LABEL: test_pcmpgt_w:
559; CHECK:       # %bb.0:
560; CHECK-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
561; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
562; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
563; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
564  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
565  ret i32 %res
566}
567
568define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
569; X86-LABEL: test_mask_pcmpgt_w:
570; X86:       # %bb.0:
571; X86-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
572; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
573; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
574; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
575; X86-NEXT:    retl # encoding: [0xc3]
576;
577; X64-LABEL: test_mask_pcmpgt_w:
578; X64:       # %bb.0:
579; X64-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
580; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
581; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
582; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
583; X64-NEXT:    retq # encoding: [0xc3]
584  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
585  ret i32 %res
586}
587
588declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
589
590declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
591
592define <64 x i8>@test_int_x86_avx512_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
593; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_512:
594; CHECK:       # %bb.0:
595; CHECK-NEXT:    vpunpckhbw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xc1]
596; CHECK-NEXT:    # zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
597; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
598  %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
599  ret <64 x i8> %res
600}
601
602define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
603; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
604; X86:       # %bb.0:
605; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
606; X86-NEXT:    vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1]
607; X86-NEXT:    # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
608; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
609; X86-NEXT:    retl # encoding: [0xc3]
610;
611; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
612; X64:       # %bb.0:
613; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
614; X64-NEXT:    vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1]
615; X64-NEXT:    # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
616; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
617; X64-NEXT:    retq # encoding: [0xc3]
618  %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
619  ret <64 x i8> %res
620}
621
622declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
623
624define <64 x i8>@test_int_x86_avx512_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
625; CHECK-LABEL: test_int_x86_avx512_punpcklb_w_512:
626; CHECK:       # %bb.0:
627; CHECK-NEXT:    vpunpcklbw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xc1]
628; CHECK-NEXT:    # zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
629; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
630  %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
631  ret <64 x i8> %res
632}
633
634define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
635; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
636; X86:       # %bb.0:
637; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
638; X86-NEXT:    vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1]
639; X86-NEXT:    # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
640; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
641; X86-NEXT:    retl # encoding: [0xc3]
642;
643; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
644; X64:       # %bb.0:
645; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
646; X64-NEXT:    vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1]
647; X64-NEXT:    # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
648; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
649; X64-NEXT:    retq # encoding: [0xc3]
650  %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
651  ret <64 x i8> %res
652}
653
654declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
655
656define <32 x i16>@test_int_x86_avx512_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
657; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_512:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vpunpckhwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xc1]
660; CHECK-NEXT:    # zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
661; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
662  %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
663  ret <32 x i16> %res
664}
665
666define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
667; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
668; X86:       # %bb.0:
669; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
670; X86-NEXT:    vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1]
671; X86-NEXT:    # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
672; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
673; X86-NEXT:    retl # encoding: [0xc3]
674;
675; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
676; X64:       # %bb.0:
677; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
678; X64-NEXT:    vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1]
679; X64-NEXT:    # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
680; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
681; X64-NEXT:    retq # encoding: [0xc3]
682  %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
683  ret <32 x i16> %res
684}
685
686declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
687
688define <32 x i16>@test_int_x86_avx512_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
689; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_512:
690; CHECK:       # %bb.0:
691; CHECK-NEXT:    vpunpcklwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xc1]
692; CHECK-NEXT:    # zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
693; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
694  %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
695  ret <32 x i16> %res
696}
697
698define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
699; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
700; X86:       # %bb.0:
701; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
702; X86-NEXT:    vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1]
703; X86-NEXT:    # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
704; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
705; X86-NEXT:    retl # encoding: [0xc3]
706;
707; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
708; X64:       # %bb.0:
709; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
710; X64-NEXT:    vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1]
711; X64-NEXT:    # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
712; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
713; X64-NEXT:    retq # encoding: [0xc3]
714  %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
715  ret <32 x i16> %res
716}
717
718declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
719
720define <64 x i8>@test_int_x86_avx512_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
721; CHECK-LABEL: test_int_x86_avx512_pmaxs_b_512:
722; CHECK:       # %bb.0:
723; CHECK-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xc1]
724; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
725  %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
726  ret <64 x i8> %res
727}
728
729define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
730; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
731; X86:       # %bb.0:
732; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
733; X86-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1]
734; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
735; X86-NEXT:    retl # encoding: [0xc3]
736;
737; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
738; X64:       # %bb.0:
739; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
740; X64-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1]
741; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
742; X64-NEXT:    retq # encoding: [0xc3]
743  %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
744  ret <64 x i8> %res
745}
746
747declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
748
749define <32 x i16>@test_int_x86_avx512_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
750; CHECK-LABEL: test_int_x86_avx512_pmaxs_w_512:
751; CHECK:       # %bb.0:
752; CHECK-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xc1]
753; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
754  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
755  ret <32 x i16> %res
756}
757
758define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
759; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
760; X86:       # %bb.0:
761; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
762; X86-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1]
763; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
764; X86-NEXT:    retl # encoding: [0xc3]
765;
766; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
767; X64:       # %bb.0:
768; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
769; X64-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1]
770; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
771; X64-NEXT:    retq # encoding: [0xc3]
772  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
773  ret <32 x i16> %res
774}
775
776declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
777
778define <64 x i8>@test_int_x86_avx512_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
779; CHECK-LABEL: test_int_x86_avx512_pmaxu_b_512:
780; CHECK:       # %bb.0:
781; CHECK-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xc1]
782; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
783  %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
784  ret <64 x i8> %res
785}
786
787define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
788; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
789; X86:       # %bb.0:
790; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
791; X86-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1]
792; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
793; X86-NEXT:    retl # encoding: [0xc3]
794;
795; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
796; X64:       # %bb.0:
797; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
798; X64-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1]
799; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
800; X64-NEXT:    retq # encoding: [0xc3]
801  %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
802  ret <64 x i8> %res
803}
804
805declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
806
807define <32 x i16>@test_int_x86_avx512_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
808; CHECK-LABEL: test_int_x86_avx512_pmaxu_w_512:
809; CHECK:       # %bb.0:
810; CHECK-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xc1]
811; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
812  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
813  ret <32 x i16> %res
814}
815
816define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
817; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
818; X86:       # %bb.0:
819; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
820; X86-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1]
821; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
822; X86-NEXT:    retl # encoding: [0xc3]
823;
824; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
825; X64:       # %bb.0:
826; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
827; X64-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1]
828; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
829; X64-NEXT:    retq # encoding: [0xc3]
830  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
831  ret <32 x i16> %res
832}
833
834declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
835
836define <64 x i8>@test_int_x86_avx512_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
837; CHECK-LABEL: test_int_x86_avx512_pmins_b_512:
838; CHECK:       # %bb.0:
839; CHECK-NEXT:    vpminsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xc1]
840; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
841  %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
842  ret <64 x i8> %res
843}
844
845define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
846; X86-LABEL: test_int_x86_avx512_mask_pmins_b_512:
847; X86:       # %bb.0:
848; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
849; X86-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1]
850; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
851; X86-NEXT:    retl # encoding: [0xc3]
852;
853; X64-LABEL: test_int_x86_avx512_mask_pmins_b_512:
854; X64:       # %bb.0:
855; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
856; X64-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1]
857; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
858; X64-NEXT:    retq # encoding: [0xc3]
859  %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
860  ret <64 x i8> %res
861}
862
863declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
864
865define <32 x i16>@test_int_x86_avx512_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
866; CHECK-LABEL: test_int_x86_avx512_pmins_w_512:
867; CHECK:       # %bb.0:
868; CHECK-NEXT:    vpminsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xc1]
869; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
870  %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
871  ret <32 x i16> %res
872}
873
874define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
875; X86-LABEL: test_int_x86_avx512_mask_pmins_w_512:
876; X86:       # %bb.0:
877; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
878; X86-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1]
879; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
880; X86-NEXT:    retl # encoding: [0xc3]
881;
882; X64-LABEL: test_int_x86_avx512_mask_pmins_w_512:
883; X64:       # %bb.0:
884; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
885; X64-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1]
886; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
887; X64-NEXT:    retq # encoding: [0xc3]
888  %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
889  ret <32 x i16> %res
890}
891
892declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
893
894define <64 x i8>@test_int_x86_avx512_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
895; CHECK-LABEL: test_int_x86_avx512_pminu_b_512:
896; CHECK:       # %bb.0:
897; CHECK-NEXT:    vpminub %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xc1]
898; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
899  %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
900  ret <64 x i8> %res
901}
902
903define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
904; X86-LABEL: test_int_x86_avx512_mask_pminu_b_512:
905; X86:       # %bb.0:
906; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
907; X86-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1]
908; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
909; X86-NEXT:    retl # encoding: [0xc3]
910;
911; X64-LABEL: test_int_x86_avx512_mask_pminu_b_512:
912; X64:       # %bb.0:
913; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
914; X64-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1]
915; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
916; X64-NEXT:    retq # encoding: [0xc3]
917  %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
918  ret <64 x i8> %res
919}
920
921declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
922
923define <32 x i16>@test_int_x86_avx512_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
924; CHECK-LABEL: test_int_x86_avx512_pminu_w_512:
925; CHECK:       # %bb.0:
926; CHECK-NEXT:    vpminuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xc1]
927; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
928  %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
929  ret <32 x i16> %res
930}
931
932define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
933; X86-LABEL: test_int_x86_avx512_mask_pminu_w_512:
934; X86:       # %bb.0:
935; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
936; X86-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1]
937; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
938; X86-NEXT:    retl # encoding: [0xc3]
939;
940; X64-LABEL: test_int_x86_avx512_mask_pminu_w_512:
941; X64:       # %bb.0:
942; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
943; X64-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1]
944; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
945; X64-NEXT:    retq # encoding: [0xc3]
946  %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
947  ret <32 x i16> %res
948}
949
950declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32)
951
952define <32 x i16>@test_int_x86_avx512_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1) {
953; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_512:
954; CHECK:       # %bb.0:
955; CHECK-NEXT:    vpmovzxbw %ymm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xc0]
956; CHECK-NEXT:    # zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
957; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
958  %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
959  ret <32 x i16> %res
960}
961
962define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
963; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
964; X86:       # %bb.0:
965; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
966; X86-NEXT:    vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8]
967; X86-NEXT:    # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
968; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
969; X86-NEXT:    retl # encoding: [0xc3]
970;
971; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
972; X64:       # %bb.0:
973; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
974; X64-NEXT:    vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8]
975; X64-NEXT:    # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
976; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
977; X64-NEXT:    retq # encoding: [0xc3]
978  %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
979  ret <32 x i16> %res
980}
981
982define <32 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_512(<32 x i8> %x0, i32 %x2) {
983; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_512:
984; X86:       # %bb.0:
985; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
986; X86-NEXT:    vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0]
987; X86-NEXT:    # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
988; X86-NEXT:    retl # encoding: [0xc3]
989;
990; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_512:
991; X64:       # %bb.0:
992; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
993; X64-NEXT:    vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0]
994; X64-NEXT:    # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
995; X64-NEXT:    retq # encoding: [0xc3]
996  %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
997  ret <32 x i16> %res
998}
999
1000declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32)
1001
1002define <32 x i16>@test_int_x86_avx512_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1) {
1003; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_512:
1004; CHECK:       # %bb.0:
1005; CHECK-NEXT:    vpmovsxbw %ymm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xc0]
1006; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1007  %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
1008  ret <32 x i16> %res
1009}
1010
1011define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
1012; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
1013; X86:       # %bb.0:
1014; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1015; X86-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8]
1016; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1017; X86-NEXT:    retl # encoding: [0xc3]
1018;
1019; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
1020; X64:       # %bb.0:
1021; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1022; X64-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8]
1023; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1024; X64-NEXT:    retq # encoding: [0xc3]
1025  %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
1026  ret <32 x i16> %res
1027}
1028
1029define <32 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_512(<32 x i8> %x0, i32 %x2) {
1030; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_512:
1031; X86:       # %bb.0:
1032; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1033; X86-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0]
1034; X86-NEXT:    retl # encoding: [0xc3]
1035;
1036; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_512:
1037; X64:       # %bb.0:
1038; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1039; X64-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0]
1040; X64-NEXT:    retq # encoding: [0xc3]
1041  %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
1042  ret <32 x i16> %res
1043}
1044
1045declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
1046
1047define <32 x i16>@test_int_x86_avx512_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) {
1048; CHECK-LABEL: test_int_x86_avx512_psrl_w_512:
1049; CHECK:       # %bb.0:
1050; CHECK-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1]
1051; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1052  %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
1053  ret <32 x i16> %res
1054}
1055
1056define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1057; X86-LABEL: test_int_x86_avx512_mask_psrl_w_512:
1058; X86:       # %bb.0:
1059; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1060; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1061; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1062; X86-NEXT:    retl # encoding: [0xc3]
1063;
1064; X64-LABEL: test_int_x86_avx512_mask_psrl_w_512:
1065; X64:       # %bb.0:
1066; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1067; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1068; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1069; X64-NEXT:    retq # encoding: [0xc3]
1070  %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
1071  ret <32 x i16> %res
1072}
1073
1074define <32 x i16>@test_int_x86_avx512_maskz_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) {
1075; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_512:
1076; X86:       # %bb.0:
1077; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1078; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1079; X86-NEXT:    retl # encoding: [0xc3]
1080;
1081; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_512:
1082; X64:       # %bb.0:
1083; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1084; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1085; X64-NEXT:    retq # encoding: [0xc3]
1086  %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1087  ret <32 x i16> %res
1088}
1089
1090declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32)
1091
1092define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
1093; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
1094; X86:       # %bb.0:
1095; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1096; X86-NEXT:    vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03]
1097; X86-NEXT:    vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04]
1098; X86-NEXT:    vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05]
1099; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1100; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1101; X86-NEXT:    retl # encoding: [0xc3]
1102;
1103; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
1104; X64:       # %bb.0:
1105; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1106; X64-NEXT:    vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03]
1107; X64-NEXT:    vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04]
1108; X64-NEXT:    vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05]
1109; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1110; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1111; X64-NEXT:    retq # encoding: [0xc3]
1112  %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
1113  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 4, <32 x i16> %x2, i32 -1)
1114  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 5, <32 x i16> zeroinitializer, i32 %x3)
1115  %res3 = add <32 x i16> %res, %res1
1116  %res4 = add <32 x i16> %res3, %res2
1117  ret <32 x i16> %res4
1118}
1119
1120declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
1121
1122define <32 x i16>@test_int_x86_avx512_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) {
1123; CHECK-LABEL: test_int_x86_avx512_psra_w_512:
1124; CHECK:       # %bb.0:
1125; CHECK-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1]
1126; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1127  %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
1128  ret <32 x i16> %res
1129}
1130
1131define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1132; X86-LABEL: test_int_x86_avx512_mask_psra_w_512:
1133; X86:       # %bb.0:
1134; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1135; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1136; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1137; X86-NEXT:    retl # encoding: [0xc3]
1138;
1139; X64-LABEL: test_int_x86_avx512_mask_psra_w_512:
1140; X64:       # %bb.0:
1141; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1142; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1143; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1144; X64-NEXT:    retq # encoding: [0xc3]
1145  %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
1146  ret <32 x i16> %res
1147}
1148
1149define <32 x i16>@test_int_x86_avx512_maskz_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) {
1150; X86-LABEL: test_int_x86_avx512_maskz_psra_w_512:
1151; X86:       # %bb.0:
1152; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1153; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1154; X86-NEXT:    retl # encoding: [0xc3]
1155;
1156; X64-LABEL: test_int_x86_avx512_maskz_psra_w_512:
1157; X64:       # %bb.0:
1158; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1159; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1160; X64-NEXT:    retq # encoding: [0xc3]
1161  %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1162  ret <32 x i16> %res
1163}
1164
1165declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32)
1166
1167define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
1168; X86-LABEL: test_int_x86_avx512_mask_psra_wi_512:
1169; X86:       # %bb.0:
1170; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1171; X86-NEXT:    vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03]
1172; X86-NEXT:    vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04]
1173; X86-NEXT:    vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05]
1174; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1175; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1176; X86-NEXT:    retl # encoding: [0xc3]
1177;
1178; X64-LABEL: test_int_x86_avx512_mask_psra_wi_512:
1179; X64:       # %bb.0:
1180; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1181; X64-NEXT:    vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03]
1182; X64-NEXT:    vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04]
1183; X64-NEXT:    vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05]
1184; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1185; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1186; X64-NEXT:    retq # encoding: [0xc3]
1187  %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
1188  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3)
1189  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1)
1190  %res3 = add <32 x i16> %res, %res1
1191  %res4 = add <32 x i16> %res3, %res2
1192  ret <32 x i16> %res4
1193}
1194
1195declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
1196
1197define <32 x i16>@test_int_x86_avx512_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) {
1198; CHECK-LABEL: test_int_x86_avx512_psll_w_512:
1199; CHECK:       # %bb.0:
1200; CHECK-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1]
1201; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1202  %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
1203  ret <32 x i16> %res
1204}
1205
1206define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1207; X86-LABEL: test_int_x86_avx512_mask_psll_w_512:
1208; X86:       # %bb.0:
1209; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1210; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1211; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1212; X86-NEXT:    retl # encoding: [0xc3]
1213;
1214; X64-LABEL: test_int_x86_avx512_mask_psll_w_512:
1215; X64:       # %bb.0:
1216; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1217; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1218; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1219; X64-NEXT:    retq # encoding: [0xc3]
1220  %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
1221  ret <32 x i16> %res
1222}
1223
1224define <32 x i16>@test_int_x86_avx512_maskz_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) {
1225; X86-LABEL: test_int_x86_avx512_maskz_psll_w_512:
1226; X86:       # %bb.0:
1227; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1228; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1229; X86-NEXT:    retl # encoding: [0xc3]
1230;
1231; X64-LABEL: test_int_x86_avx512_maskz_psll_w_512:
1232; X64:       # %bb.0:
1233; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1234; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1235; X64-NEXT:    retq # encoding: [0xc3]
1236  %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1237  ret <32 x i16> %res
1238}
1239
1240declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32)
1241
1242define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
1243; X86-LABEL: test_int_x86_avx512_mask_psll_wi_512:
1244; X86:       # %bb.0:
1245; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1246; X86-NEXT:    vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03]
1247; X86-NEXT:    vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04]
1248; X86-NEXT:    vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05]
1249; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1250; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1251; X86-NEXT:    retl # encoding: [0xc3]
1252;
1253; X64-LABEL: test_int_x86_avx512_mask_psll_wi_512:
1254; X64:       # %bb.0:
1255; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1256; X64-NEXT:    vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03]
1257; X64-NEXT:    vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04]
1258; X64-NEXT:    vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05]
1259; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1260; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1261; X64-NEXT:    retq # encoding: [0xc3]
1262  %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
1263  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3)
1264  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1)
1265  %res3 = add <32 x i16> %res, %res1
1266  %res4 = add <32 x i16> %res3, %res2
1267  ret <32 x i16> %res4
1268}
1269
1270declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1271
1272define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
1273; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512:
1274; CHECK:       # %bb.0:
1275; CHECK-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1]
1276; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1277  %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1278  ret <64 x i8> %res
1279}
1280
1281define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1282; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1283; X86:       # %bb.0:
1284; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1285; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
1286; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1287; X86-NEXT:    retl # encoding: [0xc3]
1288;
1289; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1290; X64:       # %bb.0:
1291; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1292; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
1293; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1294; X64-NEXT:    retq # encoding: [0xc3]
1295  %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1296  ret <64 x i8> %res
1297}
1298
1299
1300declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64)
1301
1302define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) {
1303; X86-LABEL: test_int_x86_avx512_cvtmask2b_512:
1304; X86:       # %bb.0:
1305; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf8,0x90,0x44,0x24,0x04]
1306; X86-NEXT:    vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
1307; X86-NEXT:    retl # encoding: [0xc3]
1308;
1309; X64-LABEL: test_int_x86_avx512_cvtmask2b_512:
1310; X64:       # %bb.0:
1311; X64-NEXT:    kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7]
1312; X64-NEXT:    vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
1313; X64-NEXT:    retq # encoding: [0xc3]
1314  %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0)
1315  ret <64 x i8> %res
1316}
1317
1318declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
1319
1320define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) {
1321; X86-LABEL: test_int_x86_avx512_cvtmask2w_512:
1322; X86:       # %bb.0:
1323; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
1324; X86-NEXT:    vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
1325; X86-NEXT:    retl # encoding: [0xc3]
1326;
1327; X64-LABEL: test_int_x86_avx512_cvtmask2w_512:
1328; X64:       # %bb.0:
1329; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
1330; X64-NEXT:    vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
1331; X64-NEXT:    retq # encoding: [0xc3]
1332  %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
1333  ret <32 x i16> %res
1334}
1335define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
1336; CHECK-LABEL: test_mask_packs_epi32_rr_512:
1337; CHECK:       # %bb.0:
1338; CHECK-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
1339; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1340  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1341  ret <32 x i16> %res
1342}
1343
1344define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
1345; X86-LABEL: test_mask_packs_epi32_rrk_512:
1346; X86:       # %bb.0:
1347; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1348; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
1349; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1350; X86-NEXT:    retl # encoding: [0xc3]
1351;
1352; X64-LABEL: test_mask_packs_epi32_rrk_512:
1353; X64:       # %bb.0:
1354; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1355; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
1356; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1357; X64-NEXT:    retq # encoding: [0xc3]
1358  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1359  ret <32 x i16> %res
1360}
1361
1362define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
1363; X86-LABEL: test_mask_packs_epi32_rrkz_512:
1364; X86:       # %bb.0:
1365; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1366; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
1367; X86-NEXT:    retl # encoding: [0xc3]
1368;
1369; X64-LABEL: test_mask_packs_epi32_rrkz_512:
1370; X64:       # %bb.0:
1371; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1372; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
1373; X64-NEXT:    retq # encoding: [0xc3]
1374  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1375  ret <32 x i16> %res
1376}
1377
1378define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
1379; X86-LABEL: test_mask_packs_epi32_rm_512:
1380; X86:       # %bb.0:
1381; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1382; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00]
1383; X86-NEXT:    retl # encoding: [0xc3]
1384;
1385; X64-LABEL: test_mask_packs_epi32_rm_512:
1386; X64:       # %bb.0:
1387; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
1388; X64-NEXT:    retq # encoding: [0xc3]
1389  %b = load <16 x i32>, <16 x i32>* %ptr_b
1390  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1391  ret <32 x i16> %res
1392}
1393
1394define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1395; X86-LABEL: test_mask_packs_epi32_rmk_512:
1396; X86:       # %bb.0:
1397; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1398; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1399; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08]
1400; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1401; X86-NEXT:    retl # encoding: [0xc3]
1402;
1403; X64-LABEL: test_mask_packs_epi32_rmk_512:
1404; X64:       # %bb.0:
1405; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1406; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
1407; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1408; X64-NEXT:    retq # encoding: [0xc3]
1409  %b = load <16 x i32>, <16 x i32>* %ptr_b
1410  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1411  ret <32 x i16> %res
1412}
1413
1414define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
1415; X86-LABEL: test_mask_packs_epi32_rmkz_512:
1416; X86:       # %bb.0:
1417; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1418; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1419; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00]
1420; X86-NEXT:    retl # encoding: [0xc3]
1421;
1422; X64-LABEL: test_mask_packs_epi32_rmkz_512:
1423; X64:       # %bb.0:
1424; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1425; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
1426; X64-NEXT:    retq # encoding: [0xc3]
1427  %b = load <16 x i32>, <16 x i32>* %ptr_b
1428  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1429  ret <32 x i16> %res
1430}
1431
1432define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
1433; X86-LABEL: test_mask_packs_epi32_rmb_512:
1434; X86:       # %bb.0:
1435; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1436; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00]
1437; X86-NEXT:    retl # encoding: [0xc3]
1438;
1439; X64-LABEL: test_mask_packs_epi32_rmb_512:
1440; X64:       # %bb.0:
1441; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
1442; X64-NEXT:    retq # encoding: [0xc3]
1443  %q = load i32, i32* %ptr_b
1444  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1445  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1446  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1447  ret <32 x i16> %res
1448}
1449
1450define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1451; X86-LABEL: test_mask_packs_epi32_rmbk_512:
1452; X86:       # %bb.0:
1453; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1454; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1455; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08]
1456; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1457; X86-NEXT:    retl # encoding: [0xc3]
1458;
1459; X64-LABEL: test_mask_packs_epi32_rmbk_512:
1460; X64:       # %bb.0:
1461; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1462; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
1463; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1464; X64-NEXT:    retq # encoding: [0xc3]
1465  %q = load i32, i32* %ptr_b
1466  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1467  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1468  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1469  ret <32 x i16> %res
1470}
1471
1472define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1473; X86-LABEL: test_mask_packs_epi32_rmbkz_512:
1474; X86:       # %bb.0:
1475; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1476; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1477; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00]
1478; X86-NEXT:    retl # encoding: [0xc3]
1479;
1480; X64-LABEL: test_mask_packs_epi32_rmbkz_512:
1481; X64:       # %bb.0:
1482; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1483; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
1484; X64-NEXT:    retq # encoding: [0xc3]
1485  %q = load i32, i32* %ptr_b
1486  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1487  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1488  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1489  ret <32 x i16> %res
1490}
1491
1492declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1493
1494define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1495; CHECK-LABEL: test_mask_packs_epi16_rr_512:
1496; CHECK:       # %bb.0:
1497; CHECK-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1]
1498; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1499  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1500  ret <64 x i8> %res
1501}
1502
1503define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1504; X86-LABEL: test_mask_packs_epi16_rrk_512:
1505; X86:       # %bb.0:
1506; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1507; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
1508; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1509; X86-NEXT:    retl # encoding: [0xc3]
1510;
1511; X64-LABEL: test_mask_packs_epi16_rrk_512:
1512; X64:       # %bb.0:
1513; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1514; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
1515; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1516; X64-NEXT:    retq # encoding: [0xc3]
1517  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1518  ret <64 x i8> %res
1519}
1520
1521define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1522; X86-LABEL: test_mask_packs_epi16_rrkz_512:
1523; X86:       # %bb.0:
1524; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1525; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
1526; X86-NEXT:    retl # encoding: [0xc3]
1527;
1528; X64-LABEL: test_mask_packs_epi16_rrkz_512:
1529; X64:       # %bb.0:
1530; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1531; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
1532; X64-NEXT:    retq # encoding: [0xc3]
1533  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1534  ret <64 x i8> %res
1535}
1536
1537define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1538; X86-LABEL: test_mask_packs_epi16_rm_512:
1539; X86:       # %bb.0:
1540; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1541; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00]
1542; X86-NEXT:    retl # encoding: [0xc3]
1543;
1544; X64-LABEL: test_mask_packs_epi16_rm_512:
1545; X64:       # %bb.0:
1546; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07]
1547; X64-NEXT:    retq # encoding: [0xc3]
1548  %b = load <32 x i16>, <32 x i16>* %ptr_b
1549  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1550  ret <64 x i8> %res
1551}
1552
1553define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1554; X86-LABEL: test_mask_packs_epi16_rmk_512:
1555; X86:       # %bb.0:
1556; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1557; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1558; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08]
1559; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1560; X86-NEXT:    retl # encoding: [0xc3]
1561;
1562; X64-LABEL: test_mask_packs_epi16_rmk_512:
1563; X64:       # %bb.0:
1564; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1565; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f]
1566; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1567; X64-NEXT:    retq # encoding: [0xc3]
1568  %b = load <32 x i16>, <32 x i16>* %ptr_b
1569  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1570  ret <64 x i8> %res
1571}
1572
1573define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1574; X86-LABEL: test_mask_packs_epi16_rmkz_512:
1575; X86:       # %bb.0:
1576; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1577; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1578; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00]
1579; X86-NEXT:    retl # encoding: [0xc3]
1580;
1581; X64-LABEL: test_mask_packs_epi16_rmkz_512:
1582; X64:       # %bb.0:
1583; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1584; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07]
1585; X64-NEXT:    retq # encoding: [0xc3]
1586  %b = load <32 x i16>, <32 x i16>* %ptr_b
1587  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1588  ret <64 x i8> %res
1589}
1590
1591declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1592
1593
1594define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
1595; CHECK-LABEL: test_mask_packus_epi32_rr_512:
1596; CHECK:       # %bb.0:
1597; CHECK-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1]
1598; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1599  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1600  ret <32 x i16> %res
1601}
1602
1603define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
1604; X86-LABEL: test_mask_packus_epi32_rrk_512:
1605; X86:       # %bb.0:
1606; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1607; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
1608; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1609; X86-NEXT:    retl # encoding: [0xc3]
1610;
1611; X64-LABEL: test_mask_packus_epi32_rrk_512:
1612; X64:       # %bb.0:
1613; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1614; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
1615; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1616; X64-NEXT:    retq # encoding: [0xc3]
1617  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1618  ret <32 x i16> %res
1619}
1620
1621define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
1622; X86-LABEL: test_mask_packus_epi32_rrkz_512:
1623; X86:       # %bb.0:
1624; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1625; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
1626; X86-NEXT:    retl # encoding: [0xc3]
1627;
1628; X64-LABEL: test_mask_packus_epi32_rrkz_512:
1629; X64:       # %bb.0:
1630; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1631; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
1632; X64-NEXT:    retq # encoding: [0xc3]
1633  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1634  ret <32 x i16> %res
1635}
1636
1637define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
1638; X86-LABEL: test_mask_packus_epi32_rm_512:
1639; X86:       # %bb.0:
1640; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1641; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00]
1642; X86-NEXT:    retl # encoding: [0xc3]
1643;
1644; X64-LABEL: test_mask_packus_epi32_rm_512:
1645; X64:       # %bb.0:
1646; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07]
1647; X64-NEXT:    retq # encoding: [0xc3]
1648  %b = load <16 x i32>, <16 x i32>* %ptr_b
1649  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1650  ret <32 x i16> %res
1651}
1652
1653define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1654; X86-LABEL: test_mask_packus_epi32_rmk_512:
1655; X86:       # %bb.0:
1656; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1657; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1658; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08]
1659; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1660; X86-NEXT:    retl # encoding: [0xc3]
1661;
1662; X64-LABEL: test_mask_packus_epi32_rmk_512:
1663; X64:       # %bb.0:
1664; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1665; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f]
1666; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1667; X64-NEXT:    retq # encoding: [0xc3]
1668  %b = load <16 x i32>, <16 x i32>* %ptr_b
1669  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1670  ret <32 x i16> %res
1671}
1672
1673define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
1674; X86-LABEL: test_mask_packus_epi32_rmkz_512:
1675; X86:       # %bb.0:
1676; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1677; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1678; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00]
1679; X86-NEXT:    retl # encoding: [0xc3]
1680;
1681; X64-LABEL: test_mask_packus_epi32_rmkz_512:
1682; X64:       # %bb.0:
1683; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1684; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07]
1685; X64-NEXT:    retq # encoding: [0xc3]
1686  %b = load <16 x i32>, <16 x i32>* %ptr_b
1687  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1688  ret <32 x i16> %res
1689}
1690
1691define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
1692; X86-LABEL: test_mask_packus_epi32_rmb_512:
1693; X86:       # %bb.0:
1694; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1695; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00]
1696; X86-NEXT:    retl # encoding: [0xc3]
1697;
1698; X64-LABEL: test_mask_packus_epi32_rmb_512:
1699; X64:       # %bb.0:
1700; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07]
1701; X64-NEXT:    retq # encoding: [0xc3]
1702  %q = load i32, i32* %ptr_b
1703  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1704  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1705  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1706  ret <32 x i16> %res
1707}
1708
1709define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1710; X86-LABEL: test_mask_packus_epi32_rmbk_512:
1711; X86:       # %bb.0:
1712; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1713; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1714; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08]
1715; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1716; X86-NEXT:    retl # encoding: [0xc3]
1717;
1718; X64-LABEL: test_mask_packus_epi32_rmbk_512:
1719; X64:       # %bb.0:
1720; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1721; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f]
1722; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1723; X64-NEXT:    retq # encoding: [0xc3]
1724  %q = load i32, i32* %ptr_b
1725  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1726  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1727  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1728  ret <32 x i16> %res
1729}
1730
1731define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1732; X86-LABEL: test_mask_packus_epi32_rmbkz_512:
1733; X86:       # %bb.0:
1734; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1735; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1736; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00]
1737; X86-NEXT:    retl # encoding: [0xc3]
1738;
1739; X64-LABEL: test_mask_packus_epi32_rmbkz_512:
1740; X64:       # %bb.0:
1741; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1742; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07]
1743; X64-NEXT:    retq # encoding: [0xc3]
1744  %q = load i32, i32* %ptr_b
1745  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1746  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1747  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1748  ret <32 x i16> %res
1749}
1750
1751declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1752
1753define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1754; CHECK-LABEL: test_mask_packus_epi16_rr_512:
1755; CHECK:       # %bb.0:
1756; CHECK-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1]
1757; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1758  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1759  ret <64 x i8> %res
1760}
1761
1762define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1763; X86-LABEL: test_mask_packus_epi16_rrk_512:
1764; X86:       # %bb.0:
1765; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1766; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
1767; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1768; X86-NEXT:    retl # encoding: [0xc3]
1769;
1770; X64-LABEL: test_mask_packus_epi16_rrk_512:
1771; X64:       # %bb.0:
1772; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1773; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
1774; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1775; X64-NEXT:    retq # encoding: [0xc3]
1776  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1777  ret <64 x i8> %res
1778}
1779
1780define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1781; X86-LABEL: test_mask_packus_epi16_rrkz_512:
1782; X86:       # %bb.0:
1783; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1784; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
1785; X86-NEXT:    retl # encoding: [0xc3]
1786;
1787; X64-LABEL: test_mask_packus_epi16_rrkz_512:
1788; X64:       # %bb.0:
1789; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1790; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
1791; X64-NEXT:    retq # encoding: [0xc3]
1792  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1793  ret <64 x i8> %res
1794}
1795
1796define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1797; X86-LABEL: test_mask_packus_epi16_rm_512:
1798; X86:       # %bb.0:
1799; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1800; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00]
1801; X86-NEXT:    retl # encoding: [0xc3]
1802;
1803; X64-LABEL: test_mask_packus_epi16_rm_512:
1804; X64:       # %bb.0:
1805; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07]
1806; X64-NEXT:    retq # encoding: [0xc3]
1807  %b = load <32 x i16>, <32 x i16>* %ptr_b
1808  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1809  ret <64 x i8> %res
1810}
1811
1812define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1813; X86-LABEL: test_mask_packus_epi16_rmk_512:
1814; X86:       # %bb.0:
1815; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1816; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1817; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08]
1818; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1819; X86-NEXT:    retl # encoding: [0xc3]
1820;
1821; X64-LABEL: test_mask_packus_epi16_rmk_512:
1822; X64:       # %bb.0:
1823; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1824; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f]
1825; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1826; X64-NEXT:    retq # encoding: [0xc3]
1827  %b = load <32 x i16>, <32 x i16>* %ptr_b
1828  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1829  ret <64 x i8> %res
1830}
1831
1832define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1833; X86-LABEL: test_mask_packus_epi16_rmkz_512:
1834; X86:       # %bb.0:
1835; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1836; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1837; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00]
1838; X86-NEXT:    retl # encoding: [0xc3]
1839;
1840; X64-LABEL: test_mask_packus_epi16_rmkz_512:
1841; X64:       # %bb.0:
1842; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1843; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07]
1844; X64-NEXT:    retq # encoding: [0xc3]
1845  %b = load <32 x i16>, <32 x i16>* %ptr_b
1846  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1847  ret <64 x i8> %res
1848}
1849
1850declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1851
1852define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
1853; X86-LABEL: test_cmp_b_512:
1854; X86:       # %bb.0:
1855; X86-NEXT:    pushl %edi # encoding: [0x57]
1856; X86-NEXT:    .cfi_def_cfa_offset 8
1857; X86-NEXT:    pushl %esi # encoding: [0x56]
1858; X86-NEXT:    .cfi_def_cfa_offset 12
1859; X86-NEXT:    .cfi_offset %esi, -12
1860; X86-NEXT:    .cfi_offset %edi, -8
1861; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
1862; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1863; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
1864; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
1865; X86-NEXT:    vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
1866; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1867; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1868; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1869; X86-NEXT:    addl %ecx, %esi # encoding: [0x01,0xce]
1870; X86-NEXT:    adcl %eax, %edx # encoding: [0x11,0xc2]
1871; X86-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
1872; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1873; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
1874; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
1875; X86-NEXT:    addl %esi, %ecx # encoding: [0x01,0xf1]
1876; X86-NEXT:    adcl %edx, %eax # encoding: [0x11,0xd0]
1877; X86-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
1878; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1879; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1880; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1881; X86-NEXT:    addl %ecx, %esi # encoding: [0x01,0xce]
1882; X86-NEXT:    adcl %eax, %edx # encoding: [0x11,0xc2]
1883; X86-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
1884; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1885; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
1886; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
1887; X86-NEXT:    addl %esi, %edi # encoding: [0x01,0xf7]
1888; X86-NEXT:    adcl %edx, %ecx # encoding: [0x11,0xd1]
1889; X86-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
1890; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1891; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1892; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1893; X86-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
1894; X86-NEXT:    adcl %ecx, %edx # encoding: [0x11,0xca]
1895; X86-NEXT:    addl $-1, %eax # encoding: [0x83,0xc0,0xff]
1896; X86-NEXT:    adcl $-1, %edx # encoding: [0x83,0xd2,0xff]
1897; X86-NEXT:    popl %esi # encoding: [0x5e]
1898; X86-NEXT:    .cfi_def_cfa_offset 8
1899; X86-NEXT:    popl %edi # encoding: [0x5f]
1900; X86-NEXT:    .cfi_def_cfa_offset 4
1901; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1902; X86-NEXT:    retl # encoding: [0xc3]
1903;
1904; X64-LABEL: test_cmp_b_512:
1905; X64:       # %bb.0:
1906; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
1907; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1908; X64-NEXT:    vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
1909; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1910; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1911; X64-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
1912; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1913; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
1914; X64-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
1915; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1916; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1917; X64-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
1918; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1919; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
1920; X64-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
1921; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1922; X64-NEXT:    leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
1923; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1924; X64-NEXT:    retq # encoding: [0xc3]
1925  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
1926  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
1927  %ret1 = add i64 %res0, %res1
1928  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
1929  %ret2 = add i64 %ret1, %res2
1930  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
1931  %ret3 = add i64 %ret2, %res3
1932  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
1933  %ret4 = add i64 %ret3, %res4
1934  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
1935  %ret5 = add i64 %ret4, %res5
1936  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
1937  %ret6 = add i64 %ret5, %res6
1938  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
1939  %ret7 = add i64 %ret6, %res7
1940  ret i64 %ret7
1941}
1942
1943define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
1944; X86-LABEL: test_mask_cmp_b_512:
1945; X86:       # %bb.0:
1946; X86-NEXT:    pushl %ebp # encoding: [0x55]
1947; X86-NEXT:    .cfi_def_cfa_offset 8
1948; X86-NEXT:    pushl %ebx # encoding: [0x53]
1949; X86-NEXT:    .cfi_def_cfa_offset 12
1950; X86-NEXT:    pushl %edi # encoding: [0x57]
1951; X86-NEXT:    .cfi_def_cfa_offset 16
1952; X86-NEXT:    pushl %esi # encoding: [0x56]
1953; X86-NEXT:    .cfi_def_cfa_offset 20
1954; X86-NEXT:    .cfi_offset %esi, -20
1955; X86-NEXT:    .cfi_offset %edi, -16
1956; X86-NEXT:    .cfi_offset %ebx, -12
1957; X86-NEXT:    .cfi_offset %ebp, -8
1958; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
1959; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
1960; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
1961; X86-NEXT:    kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
1962; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1963; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1964; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1965; X86-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
1966; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1967; X86-NEXT:    kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
1968; X86-NEXT:    vpcmpgtb %zmm0, %zmm1, %k2 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xd0]
1969; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1970; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1971; X86-NEXT:    kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
1972; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1973; X86-NEXT:    kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
1974; X86-NEXT:    addl %edx, %ebx # encoding: [0x01,0xd3]
1975; X86-NEXT:    adcl %eax, %edi # encoding: [0x11,0xc7]
1976; X86-NEXT:    vpcmpleb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x02]
1977; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1978; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1979; X86-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
1980; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1981; X86-NEXT:    kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
1982; X86-NEXT:    addl %ebx, %edx # encoding: [0x01,0xda]
1983; X86-NEXT:    adcl %edi, %eax # encoding: [0x11,0xf8]
1984; X86-NEXT:    vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
1985; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1986; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1987; X86-NEXT:    kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
1988; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1989; X86-NEXT:    kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
1990; X86-NEXT:    addl %edx, %ebx # encoding: [0x01,0xd3]
1991; X86-NEXT:    adcl %eax, %edi # encoding: [0x11,0xc7]
1992; X86-NEXT:    vpcmpnltb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x05]
1993; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1994; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1995; X86-NEXT:    kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
1996; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1997; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
1998; X86-NEXT:    addl %ebx, %ecx # encoding: [0x01,0xd9]
1999; X86-NEXT:    adcl %edi, %ebp # encoding: [0x11,0xfd]
2000; X86-NEXT:    vpcmpgtb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xd1]
2001; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2002; X86-NEXT:    kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
2003; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2004; X86-NEXT:    kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
2005; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2006; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2007; X86-NEXT:    adcl %ebp, %edx # encoding: [0x11,0xea]
2008; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
2009; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
2010; X86-NEXT:    popl %esi # encoding: [0x5e]
2011; X86-NEXT:    .cfi_def_cfa_offset 16
2012; X86-NEXT:    popl %edi # encoding: [0x5f]
2013; X86-NEXT:    .cfi_def_cfa_offset 12
2014; X86-NEXT:    popl %ebx # encoding: [0x5b]
2015; X86-NEXT:    .cfi_def_cfa_offset 8
2016; X86-NEXT:    popl %ebp # encoding: [0x5d]
2017; X86-NEXT:    .cfi_def_cfa_offset 4
2018; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2019; X86-NEXT:    retl # encoding: [0xc3]
2020;
2021; X64-LABEL: test_mask_cmp_b_512:
2022; X64:       # %bb.0:
2023; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2024; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
2025; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2026; X64-NEXT:    vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
2027; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2028; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2029; X64-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
2030; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2031; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
2032; X64-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
2033; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2034; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2035; X64-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
2036; X64-NEXT:    kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
2037; X64-NEXT:    addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
2038; X64-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
2039; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2040; X64-NEXT:    addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
2041; X64-NEXT:    addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2042; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2043; X64-NEXT:    retq # encoding: [0xc3]
2044  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
2045  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
2046  %ret1 = add i64 %res0, %res1
2047  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
2048  %ret2 = add i64 %ret1, %res2
2049  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
2050  %ret3 = add i64 %ret2, %res3
2051  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
2052  %ret4 = add i64 %ret3, %res4
2053  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
2054  %ret5 = add i64 %ret4, %res5
2055  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
2056  %ret6 = add i64 %ret5, %res6
2057  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
2058  %ret7 = add i64 %ret6, %res7
2059  ret i64 %ret7
2060}
2061
2062declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
2063
2064define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
2065; X86-LABEL: test_ucmp_b_512:
2066; X86:       # %bb.0:
2067; X86-NEXT:    pushl %edi # encoding: [0x57]
2068; X86-NEXT:    .cfi_def_cfa_offset 8
2069; X86-NEXT:    pushl %esi # encoding: [0x56]
2070; X86-NEXT:    .cfi_def_cfa_offset 12
2071; X86-NEXT:    .cfi_offset %esi, -12
2072; X86-NEXT:    .cfi_offset %edi, -8
2073; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
2074; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2075; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
2076; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2077; X86-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
2078; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2079; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2080; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2081; X86-NEXT:    addl %ecx, %esi # encoding: [0x01,0xce]
2082; X86-NEXT:    adcl %eax, %edx # encoding: [0x11,0xc2]
2083; X86-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
2084; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2085; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
2086; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2087; X86-NEXT:    addl %esi, %ecx # encoding: [0x01,0xf1]
2088; X86-NEXT:    adcl %edx, %eax # encoding: [0x11,0xd0]
2089; X86-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
2090; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2091; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2092; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2093; X86-NEXT:    addl %ecx, %esi # encoding: [0x01,0xce]
2094; X86-NEXT:    adcl %eax, %edx # encoding: [0x11,0xc2]
2095; X86-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
2096; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2097; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2098; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
2099; X86-NEXT:    addl %esi, %edi # encoding: [0x01,0xf7]
2100; X86-NEXT:    adcl %edx, %ecx # encoding: [0x11,0xd1]
2101; X86-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
2102; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2103; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2104; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2105; X86-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
2106; X86-NEXT:    adcl %ecx, %edx # encoding: [0x11,0xca]
2107; X86-NEXT:    addl $-1, %eax # encoding: [0x83,0xc0,0xff]
2108; X86-NEXT:    adcl $-1, %edx # encoding: [0x83,0xd2,0xff]
2109; X86-NEXT:    popl %esi # encoding: [0x5e]
2110; X86-NEXT:    .cfi_def_cfa_offset 8
2111; X86-NEXT:    popl %edi # encoding: [0x5f]
2112; X86-NEXT:    .cfi_def_cfa_offset 4
2113; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2114; X86-NEXT:    retl # encoding: [0xc3]
2115;
2116; X64-LABEL: test_ucmp_b_512:
2117; X64:       # %bb.0:
2118; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
2119; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2120; X64-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
2121; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2122; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2123; X64-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
2124; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2125; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
2126; X64-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
2127; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2128; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2129; X64-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
2130; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2131; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
2132; X64-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
2133; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2134; X64-NEXT:    leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
2135; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2136; X64-NEXT:    retq # encoding: [0xc3]
2137  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
2138  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
2139  %ret1 = add i64 %res0, %res1
2140  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
2141  %ret2 = add i64 %ret1, %res2
2142  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
2143  %ret3 = add i64 %ret2, %res3
2144  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
2145  %ret4 = add i64 %ret3, %res4
2146  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
2147  %ret5 = add i64 %ret4, %res5
2148  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
2149  %ret6 = add i64 %ret5, %res6
2150  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
2151  %ret7 = add i64 %ret6, %res7
2152  ret i64 %ret7
2153}
2154
2155define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
2156; X86-LABEL: test_mask_x86_avx512_ucmp_b_512:
2157; X86:       # %bb.0:
2158; X86-NEXT:    pushl %ebp # encoding: [0x55]
2159; X86-NEXT:    .cfi_def_cfa_offset 8
2160; X86-NEXT:    pushl %ebx # encoding: [0x53]
2161; X86-NEXT:    .cfi_def_cfa_offset 12
2162; X86-NEXT:    pushl %edi # encoding: [0x57]
2163; X86-NEXT:    .cfi_def_cfa_offset 16
2164; X86-NEXT:    pushl %esi # encoding: [0x56]
2165; X86-NEXT:    .cfi_def_cfa_offset 20
2166; X86-NEXT:    .cfi_offset %esi, -20
2167; X86-NEXT:    .cfi_offset %edi, -16
2168; X86-NEXT:    .cfi_offset %ebx, -12
2169; X86-NEXT:    .cfi_offset %ebp, -8
2170; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
2171; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
2172; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
2173; X86-NEXT:    kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
2174; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2175; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2176; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2177; X86-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
2178; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2179; X86-NEXT:    kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
2180; X86-NEXT:    vpcmpltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x01]
2181; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2182; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2183; X86-NEXT:    kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
2184; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2185; X86-NEXT:    kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
2186; X86-NEXT:    addl %edx, %ebx # encoding: [0x01,0xd3]
2187; X86-NEXT:    adcl %eax, %edi # encoding: [0x11,0xc7]
2188; X86-NEXT:    vpcmpleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x02]
2189; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2190; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2191; X86-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
2192; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2193; X86-NEXT:    kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
2194; X86-NEXT:    addl %ebx, %edx # encoding: [0x01,0xda]
2195; X86-NEXT:    adcl %edi, %eax # encoding: [0x11,0xf8]
2196; X86-NEXT:    vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
2197; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2198; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2199; X86-NEXT:    kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
2200; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2201; X86-NEXT:    kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
2202; X86-NEXT:    addl %edx, %ebx # encoding: [0x01,0xd3]
2203; X86-NEXT:    adcl %eax, %edi # encoding: [0x11,0xc7]
2204; X86-NEXT:    vpcmpnltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x05]
2205; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2206; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2207; X86-NEXT:    kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
2208; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2209; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
2210; X86-NEXT:    addl %ebx, %ecx # encoding: [0x01,0xd9]
2211; X86-NEXT:    adcl %edi, %ebp # encoding: [0x11,0xfd]
2212; X86-NEXT:    vpcmpnleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x06]
2213; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2214; X86-NEXT:    kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
2215; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2216; X86-NEXT:    kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
2217; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2218; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2219; X86-NEXT:    adcl %ebp, %edx # encoding: [0x11,0xea]
2220; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
2221; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
2222; X86-NEXT:    popl %esi # encoding: [0x5e]
2223; X86-NEXT:    .cfi_def_cfa_offset 16
2224; X86-NEXT:    popl %edi # encoding: [0x5f]
2225; X86-NEXT:    .cfi_def_cfa_offset 12
2226; X86-NEXT:    popl %ebx # encoding: [0x5b]
2227; X86-NEXT:    .cfi_def_cfa_offset 8
2228; X86-NEXT:    popl %ebp # encoding: [0x5d]
2229; X86-NEXT:    .cfi_def_cfa_offset 4
2230; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2231; X86-NEXT:    retl # encoding: [0xc3]
2232;
2233; X64-LABEL: test_mask_x86_avx512_ucmp_b_512:
2234; X64:       # %bb.0:
2235; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2236; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
2237; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2238; X64-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
2239; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2240; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2241; X64-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
2242; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2243; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
2244; X64-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
2245; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2246; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2247; X64-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
2248; X64-NEXT:    kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
2249; X64-NEXT:    addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
2250; X64-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
2251; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2252; X64-NEXT:    addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
2253; X64-NEXT:    addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2254; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2255; X64-NEXT:    retq # encoding: [0xc3]
2256  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
2257  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
2258  %ret1 = add i64 %res0, %res1
2259  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
2260  %ret2 = add i64 %ret1, %res2
2261  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
2262  %ret3 = add i64 %ret2, %res3
2263  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
2264  %ret4 = add i64 %ret3, %res4
2265  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
2266  %ret5 = add i64 %ret4, %res5
2267  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
2268  %ret6 = add i64 %ret5, %res6
2269  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
2270  %ret7 = add i64 %ret6, %res7
2271  ret i64 %ret7
2272}
2273
2274declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
2275
2276define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
2277; X86-LABEL: test_cmp_w_512:
2278; X86:       # %bb.0:
2279; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2280; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2281; X86-NEXT:    vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
2282; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2283; X86-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2284; X86-NEXT:    vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
2285; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2286; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2287; X86-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2288; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2289; X86-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2290; X86-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
2291; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2292; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2293; X86-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
2294; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2295; X86-NEXT:    leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2296; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2297; X86-NEXT:    retl # encoding: [0xc3]
2298;
2299; X64-LABEL: test_cmp_w_512:
2300; X64:       # %bb.0:
2301; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2302; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2303; X64-NEXT:    vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
2304; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2305; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2306; X64-NEXT:    vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
2307; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2308; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2309; X64-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2310; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2311; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2312; X64-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
2313; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2314; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2315; X64-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
2316; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2317; X64-NEXT:    leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2318; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2319; X64-NEXT:    retq # encoding: [0xc3]
2320  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
2321  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
2322  %ret1 = add i32 %res0, %res1
2323  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
2324  %ret2 = add i32 %ret1, %res2
2325  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
2326  %ret3 = add i32 %ret2, %res3
2327  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
2328  %ret4 = add i32 %ret3, %res4
2329  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
2330  %ret5 = add i32 %ret4, %res5
2331  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
2332  %ret6 = add i32 %ret5, %res6
2333  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
2334  %ret7 = add i32 %ret6, %res7
2335  ret i32 %ret7
2336}
2337
2338define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
2339; X86-LABEL: test_mask_cmp_w_512:
2340; X86:       # %bb.0:
2341; X86-NEXT:    pushl %esi # encoding: [0x56]
2342; X86-NEXT:    .cfi_def_cfa_offset 8
2343; X86-NEXT:    .cfi_offset %esi, -8
2344; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
2345; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2346; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2347; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2348; X86-NEXT:    vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
2349; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2350; X86-NEXT:    addl %eax, %edx # encoding: [0x01,0xc2]
2351; X86-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
2352; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2353; X86-NEXT:    addl %edx, %eax # encoding: [0x01,0xd0]
2354; X86-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2355; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2356; X86-NEXT:    addl %eax, %edx # encoding: [0x01,0xc2]
2357; X86-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
2358; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2359; X86-NEXT:    addl %edx, %esi # encoding: [0x01,0xd6]
2360; X86-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
2361; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2362; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
2363; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2364; X86-NEXT:    popl %esi # encoding: [0x5e]
2365; X86-NEXT:    .cfi_def_cfa_offset 4
2366; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2367; X86-NEXT:    retl # encoding: [0xc3]
2368;
2369; X64-LABEL: test_mask_cmp_w_512:
2370; X64:       # %bb.0:
2371; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2372; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2373; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2374; X64-NEXT:    vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
2375; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2376; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2377; X64-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
2378; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2379; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2380; X64-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2381; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2382; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2383; X64-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
2384; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2385; X64-NEXT:    addl %ecx, %edx # encoding: [0x01,0xca]
2386; X64-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
2387; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2388; X64-NEXT:    addl %edx, %eax # encoding: [0x01,0xd0]
2389; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
2390; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2391; X64-NEXT:    retq # encoding: [0xc3]
2392  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
2393  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
2394  %ret1 = add i32 %res0, %res1
2395  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
2396  %ret2 = add i32 %ret1, %res2
2397  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
2398  %ret3 = add i32 %ret2, %res3
2399  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
2400  %ret4 = add i32 %ret3, %res4
2401  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
2402  %ret5 = add i32 %ret4, %res5
2403  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
2404  %ret6 = add i32 %ret5, %res6
2405  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
2406  %ret7 = add i32 %ret6, %res7
2407  ret i32 %ret7
2408}
2409
2410declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
2411
2412define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
2413; X86-LABEL: test_ucmp_w_512:
2414; X86:       # %bb.0:
2415; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2416; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2417; X86-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
2418; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2419; X86-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2420; X86-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
2421; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2422; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2423; X86-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2424; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2425; X86-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2426; X86-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
2427; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2428; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2429; X86-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
2430; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2431; X86-NEXT:    leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2432; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2433; X86-NEXT:    retl # encoding: [0xc3]
2434;
2435; X64-LABEL: test_ucmp_w_512:
2436; X64:       # %bb.0:
2437; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2438; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2439; X64-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
2440; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2441; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2442; X64-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
2443; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2444; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2445; X64-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2446; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2447; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2448; X64-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
2449; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2450; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2451; X64-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
2452; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2453; X64-NEXT:    leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2454; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2455; X64-NEXT:    retq # encoding: [0xc3]
2456  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
2457  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
2458  %ret1 = add i32 %res0, %res1
2459  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
2460  %ret2 = add i32 %ret1, %res2
2461  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
2462  %ret3 = add i32 %ret2, %res3
2463  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
2464  %ret4 = add i32 %ret3, %res4
2465  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
2466  %ret5 = add i32 %ret4, %res5
2467  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
2468  %ret6 = add i32 %ret5, %res6
2469  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
2470  %ret7 = add i32 %ret6, %res7
2471  ret i32 %ret7
2472}
2473
2474define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
2475; X86-LABEL: test_mask_ucmp_w_512:
2476; X86:       # %bb.0:
2477; X86-NEXT:    pushl %esi # encoding: [0x56]
2478; X86-NEXT:    .cfi_def_cfa_offset 8
2479; X86-NEXT:    .cfi_offset %esi, -8
2480; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
2481; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2482; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2483; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2484; X86-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
2485; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2486; X86-NEXT:    addl %eax, %edx # encoding: [0x01,0xc2]
2487; X86-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
2488; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2489; X86-NEXT:    addl %edx, %eax # encoding: [0x01,0xd0]
2490; X86-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2491; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2492; X86-NEXT:    addl %eax, %edx # encoding: [0x01,0xc2]
2493; X86-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
2494; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2495; X86-NEXT:    addl %edx, %esi # encoding: [0x01,0xd6]
2496; X86-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
2497; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2498; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
2499; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2500; X86-NEXT:    popl %esi # encoding: [0x5e]
2501; X86-NEXT:    .cfi_def_cfa_offset 4
2502; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2503; X86-NEXT:    retl # encoding: [0xc3]
2504;
2505; X64-LABEL: test_mask_ucmp_w_512:
2506; X64:       # %bb.0:
2507; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2508; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2509; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2510; X64-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
2511; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2512; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2513; X64-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
2514; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2515; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2516; X64-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2517; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2518; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
2519; X64-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
2520; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2521; X64-NEXT:    addl %ecx, %edx # encoding: [0x01,0xca]
2522; X64-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
2523; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2524; X64-NEXT:    addl %edx, %eax # encoding: [0x01,0xd0]
2525; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
2526; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2527; X64-NEXT:    retq # encoding: [0xc3]
2528  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
2529  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
2530  %ret1 = add i32 %res0, %res1
2531  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
2532  %ret2 = add i32 %ret1, %res2
2533  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
2534  %ret3 = add i32 %ret2, %res3
2535  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
2536  %ret4 = add i32 %ret3, %res4
2537  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
2538  %ret5 = add i32 %ret4, %res5
2539  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
2540  %ret6 = add i32 %ret5, %res6
2541  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
2542  %ret7 = add i32 %ret6, %res7
2543  ret i32 %ret7
2544}
2545
2546declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
2547
2548
2549declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2550
2551define <64 x i8>@mm512_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2552; CHECK-LABEL: mm512_avg_epu8:
2553; CHECK:       # %bb.0:
2554; CHECK-NEXT:    vpavgb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xc1]
2555; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2556  %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2557  ret <64 x i8> %res
2558}
2559
2560define <64 x i8>@mm512_mask_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2561; X86-LABEL: mm512_mask_avg_epu8:
2562; X86:       # %bb.0:
2563; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
2564; X86-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
2565; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2566; X86-NEXT:    retl # encoding: [0xc3]
2567;
2568; X64-LABEL: mm512_mask_avg_epu8:
2569; X64:       # %bb.0:
2570; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2571; X64-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
2572; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2573; X64-NEXT:    retq # encoding: [0xc3]
2574  %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2575  ret <64 x i8> %res
2576}
2577
2578declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2579
2580define <32 x i16>@mm512_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2581; CHECK-LABEL: mm512_avg_epu16:
2582; CHECK:       # %bb.0:
2583; CHECK-NEXT:    vpavgw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xc1]
2584; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2585  %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2586  ret <32 x i16> %res
2587}
2588
2589define <32 x i16>@mm512_mask_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2590; X86-LABEL: mm512_mask_avg_epu16:
2591; X86:       # %bb.0:
2592; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2593; X86-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
2594; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2595; X86-NEXT:    retl # encoding: [0xc3]
2596;
2597; X64-LABEL: mm512_mask_avg_epu16:
2598; X64:       # %bb.0:
2599; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2600; X64-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
2601; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2602; X64-NEXT:    retq # encoding: [0xc3]
2603  %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2604  ret <32 x i16> %res
2605}
2606
2607declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
2608
2609define <32 x i16>@test_int_x86_avx512_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1) {
2610; CHECK-LABEL: test_int_x86_avx512_pabs_w_512:
2611; CHECK:       # %bb.0:
2612; CHECK-NEXT:    vpabsw %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xc0]
2613; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2614  %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
2615  ret <32 x i16> %res
2616}
2617
2618define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2619; X86-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2620; X86:       # %bb.0:
2621; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2622; X86-NEXT:    vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8]
2623; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2624; X86-NEXT:    retl # encoding: [0xc3]
2625;
2626; X64-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2627; X64:       # %bb.0:
2628; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2629; X64-NEXT:    vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8]
2630; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2631; X64-NEXT:    retq # encoding: [0xc3]
2632  %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2633  ret <32 x i16> %res
2634}
2635
2636declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
2637
2638define <64 x i8>@test_int_x86_avx512_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1) {
2639; CHECK-LABEL: test_int_x86_avx512_pabs_b_512:
2640; CHECK:       # %bb.0:
2641; CHECK-NEXT:    vpabsb %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xc0]
2642; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2643  %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
2644  ret <64 x i8> %res
2645}
2646
2647define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2648; X86-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2649; X86:       # %bb.0:
2650; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
2651; X86-NEXT:    vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8]
2652; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2653; X86-NEXT:    retl # encoding: [0xc3]
2654;
2655; X64-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2656; X64:       # %bb.0:
2657; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2658; X64-NEXT:    vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8]
2659; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2660; X64-NEXT:    retq # encoding: [0xc3]
2661  %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2662  ret <64 x i8> %res
2663}
2664
2665declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64)
2666
2667define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2668; X86-LABEL: test_int_x86_avx512_ptestm_b_512:
2669; X86:       # %bb.0:
2670; X86-NEXT:    pushl %esi # encoding: [0x56]
2671; X86-NEXT:    .cfi_def_cfa_offset 8
2672; X86-NEXT:    .cfi_offset %esi, -8
2673; X86-NEXT:    vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1]
2674; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2675; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2676; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
2677; X86-NEXT:    andl %ecx, %edx # encoding: [0x21,0xca]
2678; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2679; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2680; X86-NEXT:    andl %esi, %eax # encoding: [0x21,0xf0]
2681; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
2682; X86-NEXT:    adcl %ecx, %edx # encoding: [0x11,0xca]
2683; X86-NEXT:    popl %esi # encoding: [0x5e]
2684; X86-NEXT:    .cfi_def_cfa_offset 4
2685; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2686; X86-NEXT:    retl # encoding: [0xc3]
2687;
2688; X64-LABEL: test_int_x86_avx512_ptestm_b_512:
2689; X64:       # %bb.0:
2690; X64-NEXT:    vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1]
2691; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2692; X64-NEXT:    andq %rax, %rdi # encoding: [0x48,0x21,0xc7]
2693; X64-NEXT:    addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2694; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2695; X64-NEXT:    retq # encoding: [0xc3]
2696  %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2697  %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
2698  %res2 = add i64 %res, %res1
2699  ret i64 %res2
2700}
2701
2702declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32)
2703
2704define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2705; X86-LABEL: test_int_x86_avx512_ptestm_w_512:
2706; X86:       # %bb.0:
2707; X86-NEXT:    vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1]
2708; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2709; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2710; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
2711; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2712; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2713; X86-NEXT:    retl # encoding: [0xc3]
2714;
2715; X64-LABEL: test_int_x86_avx512_ptestm_w_512:
2716; X64:       # %bb.0:
2717; X64-NEXT:    vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1]
2718; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2719; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
2720; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
2721; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2722; X64-NEXT:    retq # encoding: [0xc3]
2723  %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2724  %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2725  %res2 = add i32 %res, %res1
2726  ret i32 %res2
2727}
2728
2729declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2)
2730
2731define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2732; X86-LABEL: test_int_x86_avx512_ptestnm_b_512:
2733; X86:       # %bb.0:
2734; X86-NEXT:    pushl %esi # encoding: [0x56]
2735; X86-NEXT:    .cfi_def_cfa_offset 8
2736; X86-NEXT:    .cfi_offset %esi, -8
2737; X86-NEXT:    vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1]
2738; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2739; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2740; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
2741; X86-NEXT:    andl %ecx, %edx # encoding: [0x21,0xca]
2742; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2743; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2744; X86-NEXT:    andl %esi, %eax # encoding: [0x21,0xf0]
2745; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
2746; X86-NEXT:    adcl %ecx, %edx # encoding: [0x11,0xca]
2747; X86-NEXT:    popl %esi # encoding: [0x5e]
2748; X86-NEXT:    .cfi_def_cfa_offset 4
2749; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2750; X86-NEXT:    retl # encoding: [0xc3]
2751;
2752; X64-LABEL: test_int_x86_avx512_ptestnm_b_512:
2753; X64:       # %bb.0:
2754; X64-NEXT:    vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1]
2755; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2756; X64-NEXT:    andq %rax, %rdi # encoding: [0x48,0x21,0xc7]
2757; X64-NEXT:    addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2758; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2759; X64-NEXT:    retq # encoding: [0xc3]
2760  %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2761  %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
2762  %res2 = add i64 %res, %res1
2763  ret i64 %res2
2764}
2765
2766declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2)
2767
2768define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2769; X86-LABEL: test_int_x86_avx512_ptestnm_w_512:
2770; X86:       # %bb.0:
2771; X86-NEXT:    vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1]
2772; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2773; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2774; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
2775; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
2776; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2777; X86-NEXT:    retl # encoding: [0xc3]
2778;
2779; X64-LABEL: test_int_x86_avx512_ptestnm_w_512:
2780; X64:       # %bb.0:
2781; X64-NEXT:    vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1]
2782; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2783; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
2784; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
2785; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2786; X64-NEXT:    retq # encoding: [0xc3]
2787  %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2788  %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2789  %res2 = add i32 %res, %res1
2790  ret i32 %res2
2791}
2792
2793declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
2794
2795define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
2796; X86-LABEL: test_int_x86_avx512_cvtb2mask_512:
2797; X86:       # %bb.0:
2798; X86-NEXT:    vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
2799; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2800; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2801; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2802; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2803; X86-NEXT:    retl # encoding: [0xc3]
2804;
2805; X64-LABEL: test_int_x86_avx512_cvtb2mask_512:
2806; X64:       # %bb.0:
2807; X64-NEXT:    vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
2808; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2809; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2810; X64-NEXT:    retq # encoding: [0xc3]
2811    %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
2812    ret i64 %res
2813}
2814
2815declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
2816
2817define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
2818; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_512:
2819; CHECK:       # %bb.0:
2820; CHECK-NEXT:    vpmovw2m %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x29,0xc0]
2821; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2822; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2823; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2824    %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
2825    ret i32 %res
2826}
2827
2828declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2829
2830define <32 x i16>@test_int_x86_avx512_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
2831; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_512:
2832; CHECK:       # %bb.0:
2833; CHECK-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xc1]
2834; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2835  %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2836  ret <32 x i16> %res
2837}
2838
2839define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2840; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2841; X86:       # %bb.0:
2842; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2843; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
2844; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2845; X86-NEXT:    retl # encoding: [0xc3]
2846;
2847; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2848; X64:       # %bb.0:
2849; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2850; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
2851; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2852; X64-NEXT:    retq # encoding: [0xc3]
2853  %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2854  ret <32 x i16> %res
2855}
2856
2857declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2858
2859define <32 x i16>@test_int_x86_avx512_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
2860; CHECK-LABEL: test_int_x86_avx512_pmulh_w_512:
2861; CHECK:       # %bb.0:
2862; CHECK-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xc1]
2863; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2864  %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2865  ret <32 x i16> %res
2866}
2867
2868define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2869; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2870; X86:       # %bb.0:
2871; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2872; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
2873; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2874; X86-NEXT:    retl # encoding: [0xc3]
2875;
2876; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2877; X64:       # %bb.0:
2878; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2879; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
2880; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2881; X64-NEXT:    retq # encoding: [0xc3]
2882  %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2883  ret <32 x i16> %res
2884}
2885
2886declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2887
2888define <32 x i16>@test_int_x86_avx512_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
2889; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_512:
2890; CHECK:       # %bb.0:
2891; CHECK-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xc1]
2892; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2893  %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2894  ret <32 x i16> %res
2895}
2896
2897define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2898; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2899; X86:       # %bb.0:
2900; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2901; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
2902; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2903; X86-NEXT:    retl # encoding: [0xc3]
2904;
2905; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2906; X64:       # %bb.0:
2907; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2908; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
2909; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2910; X64-NEXT:    retq # encoding: [0xc3]
2911  %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2912  ret <32 x i16> %res
2913}
2914
2915declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
2916
2917define <32 x i16>@test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2) {
2918; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_512:
2919; CHECK:       # %bb.0:
2920; CHECK-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xc1]
2921; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2922  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
2923  ret <32 x i16> %res
2924}
2925
2926define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
2927; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2928; X86:       # %bb.0:
2929; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2930; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
2931; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2932; X86-NEXT:    retl # encoding: [0xc3]
2933;
2934; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2935; X64:       # %bb.0:
2936; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2937; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
2938; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2939; X64-NEXT:    retq # encoding: [0xc3]
2940  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
2941  ret <32 x i16> %res
2942}
2943
2944declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
2945
2946define <16 x i32>@test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2) {
2947; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_512:
2948; CHECK:       # %bb.0:
2949; CHECK-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xc1]
2950; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2951  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
2952  ret <16 x i32> %res
2953}
2954
2955define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
2956; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2957; X86:       # %bb.0:
2958; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2959; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
2960; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2961; X86-NEXT:    retl # encoding: [0xc3]
2962;
2963; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2964; X64:       # %bb.0:
2965; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2966; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
2967; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2968; X64-NEXT:    retq # encoding: [0xc3]
2969  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
2970  ret <16 x i32> %res
2971}
2972
2973declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2974
2975define <32 x i16>@test_int_x86_avx512_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
2976; CHECK-LABEL: test_int_x86_avx512_permvar_hi_512:
2977; CHECK:       # %bb.0:
2978; CHECK-NEXT:    vpermw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xc0]
2979; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2980  %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2981  ret <32 x i16> %res
2982}
2983
2984define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2985; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2986; X86:       # %bb.0:
2987; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2988; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
2989; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2990; X86-NEXT:    retl # encoding: [0xc3]
2991;
2992; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2993; X64:       # %bb.0:
2994; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2995; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
2996; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2997; X64-NEXT:    retq # encoding: [0xc3]
2998  %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2999  ret <32 x i16> %res
3000}
3001
3002define <32 x i16>@test_int_x86_avx512_maskz_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
3003; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_512:
3004; X86:       # %bb.0:
3005; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3006; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
3007; X86-NEXT:    retl # encoding: [0xc3]
3008;
3009; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_512:
3010; X64:       # %bb.0:
3011; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3012; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
3013; X64-NEXT:    retq # encoding: [0xc3]
3014  %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
3015  ret <32 x i16> %res
3016}
3017
3018declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3019
3020define <32 x i16>@test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
3021; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_512:
3022; CHECK:       # %bb.0:
3023; CHECK-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x75,0xc2]
3024; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3025  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
3026  ret <32 x i16> %res
3027}
3028
3029define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
3030; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
3031; X86:       # %bb.0:
3032; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3033; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
3034; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3035; X86-NEXT:    retl # encoding: [0xc3]
3036;
3037; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
3038; X64:       # %bb.0:
3039; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3040; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
3041; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3042; X64-NEXT:    retq # encoding: [0xc3]
3043  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
3044  ret <32 x i16> %res
3045}
3046
3047declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3048
3049define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
3050; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
3051; X86:       # %bb.0:
3052; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3053; X86-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2]
3054; X86-NEXT:    retl # encoding: [0xc3]
3055;
3056; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
3057; X64:       # %bb.0:
3058; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3059; X64-NEXT:    vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2]
3060; X64-NEXT:    retq # encoding: [0xc3]
3061  %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
3062  ret <32 x i16> %res
3063}
3064
3065declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3066
3067define <32 x i16>@test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
3068; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_512:
3069; CHECK:       # %bb.0:
3070; CHECK-NEXT:    vpermt2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xc2]
3071; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3072  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
3073  ret <32 x i16> %res
3074}
3075
3076define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
3077; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
3078; X86:       # %bb.0:
3079; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3080; X86-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
3081; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3082; X86-NEXT:    retl # encoding: [0xc3]
3083;
3084; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
3085; X64:       # %bb.0:
3086; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3087; X64-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
3088; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3089; X64-NEXT:    retq # encoding: [0xc3]
3090  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
3091  ret <32 x i16> %res
3092}
3093
3094declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
3095
3096define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
3097; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
3098; X86:       # %bb.0:
3099; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3100; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
3101; X86-NEXT:    vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03]
3102; X86-NEXT:    vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04]
3103; X86-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
3104; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
3105; X86-NEXT:    retl # encoding: [0xc3]
3106;
3107; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
3108; X64:       # %bb.0:
3109; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3110; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
3111; X64-NEXT:    vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03]
3112; X64-NEXT:    vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04]
3113; X64-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
3114; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
3115; X64-NEXT:    retq # encoding: [0xc3]
3116  %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
3117  %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3, <32 x i16> zeroinitializer, i32 %x4)
3118  %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4, <32 x i16> %x3, i32 -1)
3119  %res3 = add <32 x i16> %res, %res1
3120  %res4 = add <32 x i16> %res3, %res2
3121  ret <32 x i16> %res4
3122}
3123
3124define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3125; CHECK-LABEL: test_mask_adds_epu16_rr_512:
3126; CHECK:       # %bb.0:
3127; CHECK-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1]
3128; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3129  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3130  ret <32 x i16> %res
3131}
3132
3133define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3134; X86-LABEL: test_mask_adds_epu16_rrk_512:
3135; X86:       # %bb.0:
3136; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3137; X86-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1]
3138; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3139; X86-NEXT:    retl # encoding: [0xc3]
3140;
3141; X64-LABEL: test_mask_adds_epu16_rrk_512:
3142; X64:       # %bb.0:
3143; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3144; X64-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1]
3145; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3146; X64-NEXT:    retq # encoding: [0xc3]
3147  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3148  ret <32 x i16> %res
3149}
3150
3151define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3152; X86-LABEL: test_mask_adds_epu16_rrkz_512:
3153; X86:       # %bb.0:
3154; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3155; X86-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1]
3156; X86-NEXT:    retl # encoding: [0xc3]
3157;
3158; X64-LABEL: test_mask_adds_epu16_rrkz_512:
3159; X64:       # %bb.0:
3160; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3161; X64-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1]
3162; X64-NEXT:    retq # encoding: [0xc3]
3163  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3164  ret <32 x i16> %res
3165}
3166
3167define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3168; X86-LABEL: test_mask_adds_epu16_rm_512:
3169; X86:       # %bb.0:
3170; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3171; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x00]
3172; X86-NEXT:    retl # encoding: [0xc3]
3173;
3174; X64-LABEL: test_mask_adds_epu16_rm_512:
3175; X64:       # %bb.0:
3176; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x07]
3177; X64-NEXT:    retq # encoding: [0xc3]
3178  %b = load <32 x i16>, <32 x i16>* %ptr_b
3179  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3180  ret <32 x i16> %res
3181}
3182
3183define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3184; X86-LABEL: test_mask_adds_epu16_rmk_512:
3185; X86:       # %bb.0:
3186; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3187; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3188; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x08]
3189; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3190; X86-NEXT:    retl # encoding: [0xc3]
3191;
3192; X64-LABEL: test_mask_adds_epu16_rmk_512:
3193; X64:       # %bb.0:
3194; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3195; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x0f]
3196; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3197; X64-NEXT:    retq # encoding: [0xc3]
3198  %b = load <32 x i16>, <32 x i16>* %ptr_b
3199  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3200  ret <32 x i16> %res
3201}
3202
3203define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3204; X86-LABEL: test_mask_adds_epu16_rmkz_512:
3205; X86:       # %bb.0:
3206; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3207; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3208; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x00]
3209; X86-NEXT:    retl # encoding: [0xc3]
3210;
3211; X64-LABEL: test_mask_adds_epu16_rmkz_512:
3212; X64:       # %bb.0:
3213; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3214; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x07]
3215; X64-NEXT:    retq # encoding: [0xc3]
3216  %b = load <32 x i16>, <32 x i16>* %ptr_b
3217  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3218  ret <32 x i16> %res
3219}
3220
3221declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3222
3223define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3224; CHECK-LABEL: test_mask_subs_epu16_rr_512:
3225; CHECK:       # %bb.0:
3226; CHECK-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1]
3227; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3228  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3229  ret <32 x i16> %res
3230}
3231
3232define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3233; X86-LABEL: test_mask_subs_epu16_rrk_512:
3234; X86:       # %bb.0:
3235; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3236; X86-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1]
3237; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3238; X86-NEXT:    retl # encoding: [0xc3]
3239;
3240; X64-LABEL: test_mask_subs_epu16_rrk_512:
3241; X64:       # %bb.0:
3242; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3243; X64-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1]
3244; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3245; X64-NEXT:    retq # encoding: [0xc3]
3246  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3247  ret <32 x i16> %res
3248}
3249
3250define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3251; X86-LABEL: test_mask_subs_epu16_rrkz_512:
3252; X86:       # %bb.0:
3253; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3254; X86-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1]
3255; X86-NEXT:    retl # encoding: [0xc3]
3256;
3257; X64-LABEL: test_mask_subs_epu16_rrkz_512:
3258; X64:       # %bb.0:
3259; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3260; X64-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1]
3261; X64-NEXT:    retq # encoding: [0xc3]
3262  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3263  ret <32 x i16> %res
3264}
3265
3266define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3267; X86-LABEL: test_mask_subs_epu16_rm_512:
3268; X86:       # %bb.0:
3269; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3270; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x00]
3271; X86-NEXT:    retl # encoding: [0xc3]
3272;
3273; X64-LABEL: test_mask_subs_epu16_rm_512:
3274; X64:       # %bb.0:
3275; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x07]
3276; X64-NEXT:    retq # encoding: [0xc3]
3277  %b = load <32 x i16>, <32 x i16>* %ptr_b
3278  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3279  ret <32 x i16> %res
3280}
3281
3282define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3283; X86-LABEL: test_mask_subs_epu16_rmk_512:
3284; X86:       # %bb.0:
3285; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3286; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3287; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x08]
3288; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3289; X86-NEXT:    retl # encoding: [0xc3]
3290;
3291; X64-LABEL: test_mask_subs_epu16_rmk_512:
3292; X64:       # %bb.0:
3293; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3294; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x0f]
3295; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3296; X64-NEXT:    retq # encoding: [0xc3]
3297  %b = load <32 x i16>, <32 x i16>* %ptr_b
3298  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3299  ret <32 x i16> %res
3300}
3301
3302define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3303; X86-LABEL: test_mask_subs_epu16_rmkz_512:
3304; X86:       # %bb.0:
3305; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3306; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3307; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x00]
3308; X86-NEXT:    retl # encoding: [0xc3]
3309;
3310; X64-LABEL: test_mask_subs_epu16_rmkz_512:
3311; X64:       # %bb.0:
3312; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3313; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x07]
3314; X64-NEXT:    retq # encoding: [0xc3]
3315  %b = load <32 x i16>, <32 x i16>* %ptr_b
3316  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3317  ret <32 x i16> %res
3318}
3319
3320declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3321
3322define <64 x i8> @test_mask_adds_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) {
3323; CHECK-LABEL: test_mask_adds_epu8_rr_512:
3324; CHECK:       # %bb.0:
3325; CHECK-NEXT:    vpaddusb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0xc1]
3326; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3327  %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3328  ret <64 x i8> %res
3329}
3330
3331define <64 x i8> @test_mask_adds_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
3332; X86-LABEL: test_mask_adds_epu8_rrk_512:
3333; X86:       # %bb.0:
3334; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3335; X86-NEXT:    vpaddusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0xd1]
3336; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3337; X86-NEXT:    retl # encoding: [0xc3]
3338;
3339; X64-LABEL: test_mask_adds_epu8_rrk_512:
3340; X64:       # %bb.0:
3341; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3342; X64-NEXT:    vpaddusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0xd1]
3343; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3344; X64-NEXT:    retq # encoding: [0xc3]
3345  %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3346  ret <64 x i8> %res
3347}
3348
3349define <64 x i8> @test_mask_adds_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
3350; X86-LABEL: test_mask_adds_epu8_rrkz_512:
3351; X86:       # %bb.0:
3352; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3353; X86-NEXT:    vpaddusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0xc1]
3354; X86-NEXT:    retl # encoding: [0xc3]
3355;
3356; X64-LABEL: test_mask_adds_epu8_rrkz_512:
3357; X64:       # %bb.0:
3358; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3359; X64-NEXT:    vpaddusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0xc1]
3360; X64-NEXT:    retq # encoding: [0xc3]
3361  %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3362  ret <64 x i8> %res
3363}
3364
3365define <64 x i8> @test_mask_adds_epu8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
3366; X86-LABEL: test_mask_adds_epu8_rm_512:
3367; X86:       # %bb.0:
3368; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3369; X86-NEXT:    vpaddusb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0x00]
3370; X86-NEXT:    retl # encoding: [0xc3]
3371;
3372; X64-LABEL: test_mask_adds_epu8_rm_512:
3373; X64:       # %bb.0:
3374; X64-NEXT:    vpaddusb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0x07]
3375; X64-NEXT:    retq # encoding: [0xc3]
3376  %b = load <64 x i8>, <64 x i8>* %ptr_b
3377  %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3378  ret <64 x i8> %res
3379}
3380
3381define <64 x i8> @test_mask_adds_epu8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
3382; X86-LABEL: test_mask_adds_epu8_rmk_512:
3383; X86:       # %bb.0:
3384; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3385; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3386; X86-NEXT:    vpaddusb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0x08]
3387; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3388; X86-NEXT:    retl # encoding: [0xc3]
3389;
3390; X64-LABEL: test_mask_adds_epu8_rmk_512:
3391; X64:       # %bb.0:
3392; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3393; X64-NEXT:    vpaddusb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0x0f]
3394; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3395; X64-NEXT:    retq # encoding: [0xc3]
3396  %b = load <64 x i8>, <64 x i8>* %ptr_b
3397  %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3398  ret <64 x i8> %res
3399}
3400
3401define <64 x i8> @test_mask_adds_epu8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
3402; X86-LABEL: test_mask_adds_epu8_rmkz_512:
3403; X86:       # %bb.0:
3404; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3405; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3406; X86-NEXT:    vpaddusb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0x00]
3407; X86-NEXT:    retl # encoding: [0xc3]
3408;
3409; X64-LABEL: test_mask_adds_epu8_rmkz_512:
3410; X64:       # %bb.0:
3411; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3412; X64-NEXT:    vpaddusb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0x07]
3413; X64-NEXT:    retq # encoding: [0xc3]
3414  %b = load <64 x i8>, <64 x i8>* %ptr_b
3415  %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3416  ret <64 x i8> %res
3417}
3418
3419declare <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3420
3421define <64 x i8> @test_mask_subs_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) {
3422; CHECK-LABEL: test_mask_subs_epu8_rr_512:
3423; CHECK:       # %bb.0:
3424; CHECK-NEXT:    vpsubusb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0xc1]
3425; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3426  %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3427  ret <64 x i8> %res
3428}
3429
3430define <64 x i8> @test_mask_subs_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
3431; X86-LABEL: test_mask_subs_epu8_rrk_512:
3432; X86:       # %bb.0:
3433; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3434; X86-NEXT:    vpsubusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0xd1]
3435; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3436; X86-NEXT:    retl # encoding: [0xc3]
3437;
3438; X64-LABEL: test_mask_subs_epu8_rrk_512:
3439; X64:       # %bb.0:
3440; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3441; X64-NEXT:    vpsubusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0xd1]
3442; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3443; X64-NEXT:    retq # encoding: [0xc3]
3444  %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3445  ret <64 x i8> %res
3446}
3447
3448define <64 x i8> @test_mask_subs_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
3449; X86-LABEL: test_mask_subs_epu8_rrkz_512:
3450; X86:       # %bb.0:
3451; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3452; X86-NEXT:    vpsubusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0xc1]
3453; X86-NEXT:    retl # encoding: [0xc3]
3454;
3455; X64-LABEL: test_mask_subs_epu8_rrkz_512:
3456; X64:       # %bb.0:
3457; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3458; X64-NEXT:    vpsubusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0xc1]
3459; X64-NEXT:    retq # encoding: [0xc3]
3460  %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3461  ret <64 x i8> %res
3462}
3463
3464define <64 x i8> @test_mask_subs_epu8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
3465; X86-LABEL: test_mask_subs_epu8_rm_512:
3466; X86:       # %bb.0:
3467; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3468; X86-NEXT:    vpsubusb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0x00]
3469; X86-NEXT:    retl # encoding: [0xc3]
3470;
3471; X64-LABEL: test_mask_subs_epu8_rm_512:
3472; X64:       # %bb.0:
3473; X64-NEXT:    vpsubusb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0x07]
3474; X64-NEXT:    retq # encoding: [0xc3]
3475  %b = load <64 x i8>, <64 x i8>* %ptr_b
3476  %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3477  ret <64 x i8> %res
3478}
3479
3480define <64 x i8> @test_mask_subs_epu8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
3481; X86-LABEL: test_mask_subs_epu8_rmk_512:
3482; X86:       # %bb.0:
3483; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3484; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3485; X86-NEXT:    vpsubusb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0x08]
3486; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3487; X86-NEXT:    retl # encoding: [0xc3]
3488;
3489; X64-LABEL: test_mask_subs_epu8_rmk_512:
3490; X64:       # %bb.0:
3491; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3492; X64-NEXT:    vpsubusb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0x0f]
3493; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3494; X64-NEXT:    retq # encoding: [0xc3]
3495  %b = load <64 x i8>, <64 x i8>* %ptr_b
3496  %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3497  ret <64 x i8> %res
3498}
3499
3500define <64 x i8> @test_mask_subs_epu8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
3501; X86-LABEL: test_mask_subs_epu8_rmkz_512:
3502; X86:       # %bb.0:
3503; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3504; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3505; X86-NEXT:    vpsubusb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0x00]
3506; X86-NEXT:    retl # encoding: [0xc3]
3507;
3508; X64-LABEL: test_mask_subs_epu8_rmkz_512:
3509; X64:       # %bb.0:
3510; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3511; X64-NEXT:    vpsubusb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0x07]
3512; X64-NEXT:    retq # encoding: [0xc3]
3513  %b = load <64 x i8>, <64 x i8>* %ptr_b
3514  %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3515  ret <64 x i8> %res
3516}
3517
3518declare <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3519
3520define <32 x i16> @test_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3521; CHECK-LABEL: test_adds_epi16_rr_512:
3522; CHECK:       # %bb.0:
3523; CHECK-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
3524; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3525  %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3526  ret <32 x i16> %1
3527}
3528
3529define <32 x i16> @test_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3530; X86-LABEL: test_adds_epi16_rrk_512:
3531; X86:       # %bb.0:
3532; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3533; X86-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3534; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3535; X86-NEXT:    retl # encoding: [0xc3]
3536;
3537; X64-LABEL: test_adds_epi16_rrk_512:
3538; X64:       # %bb.0:
3539; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3540; X64-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3541; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3542; X64-NEXT:    retq # encoding: [0xc3]
3543  %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3544  %2 = bitcast i32 %mask to <32 x i1>
3545  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3546  ret <32 x i16> %3
3547}
3548
3549define <32 x i16> @test_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3550; X86-LABEL: test_adds_epi16_rrkz_512:
3551; X86:       # %bb.0:
3552; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3553; X86-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3554; X86-NEXT:    retl # encoding: [0xc3]
3555;
3556; X64-LABEL: test_adds_epi16_rrkz_512:
3557; X64:       # %bb.0:
3558; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3559; X64-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3560; X64-NEXT:    retq # encoding: [0xc3]
3561  %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3562  %2 = bitcast i32 %mask to <32 x i1>
3563  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3564  ret <32 x i16> %3
3565}
3566
3567define <32 x i16> @test_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3568; X86-LABEL: test_adds_epi16_rm_512:
3569; X86:       # %bb.0:
3570; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3571; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00]
3572; X86-NEXT:    retl # encoding: [0xc3]
3573;
3574; X64-LABEL: test_adds_epi16_rm_512:
3575; X64:       # %bb.0:
3576; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
3577; X64-NEXT:    retq # encoding: [0xc3]
3578  %b = load <32 x i16>, <32 x i16>* %ptr_b
3579  %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3580  ret <32 x i16> %1
3581}
3582
3583define <32 x i16> @test_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3584; X86-LABEL: test_adds_epi16_rmk_512:
3585; X86:       # %bb.0:
3586; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3587; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3588; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08]
3589; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3590; X86-NEXT:    retl # encoding: [0xc3]
3591;
3592; X64-LABEL: test_adds_epi16_rmk_512:
3593; X64:       # %bb.0:
3594; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3595; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f]
3596; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3597; X64-NEXT:    retq # encoding: [0xc3]
3598  %b = load <32 x i16>, <32 x i16>* %ptr_b
3599  %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3600  %2 = bitcast i32 %mask to <32 x i1>
3601  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3602  ret <32 x i16> %3
3603}
3604
3605define <32 x i16> @test_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3606; X86-LABEL: test_adds_epi16_rmkz_512:
3607; X86:       # %bb.0:
3608; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3609; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3610; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00]
3611; X86-NEXT:    retl # encoding: [0xc3]
3612;
3613; X64-LABEL: test_adds_epi16_rmkz_512:
3614; X64:       # %bb.0:
3615; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3616; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
3617; X64-NEXT:    retq # encoding: [0xc3]
3618  %b = load <32 x i16>, <32 x i16>* %ptr_b
3619  %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3620  %2 = bitcast i32 %mask to <32 x i1>
3621  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3622  ret <32 x i16> %3
3623}
3624
3625declare <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16>, <32 x i16>)
3626
3627define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3628; CHECK-LABEL: test_mask_adds_epi16_rr_512:
3629; CHECK:       # %bb.0:
3630; CHECK-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
3631; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3632  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3633  ret <32 x i16> %res
3634}
3635
3636define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3637; X86-LABEL: test_mask_adds_epi16_rrk_512:
3638; X86:       # %bb.0:
3639; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3640; X86-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3641; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3642; X86-NEXT:    retl # encoding: [0xc3]
3643;
3644; X64-LABEL: test_mask_adds_epi16_rrk_512:
3645; X64:       # %bb.0:
3646; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3647; X64-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3648; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3649; X64-NEXT:    retq # encoding: [0xc3]
3650  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3651  ret <32 x i16> %res
3652}
3653
3654define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3655; X86-LABEL: test_mask_adds_epi16_rrkz_512:
3656; X86:       # %bb.0:
3657; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3658; X86-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3659; X86-NEXT:    retl # encoding: [0xc3]
3660;
3661; X64-LABEL: test_mask_adds_epi16_rrkz_512:
3662; X64:       # %bb.0:
3663; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3664; X64-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3665; X64-NEXT:    retq # encoding: [0xc3]
3666  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3667  ret <32 x i16> %res
3668}
3669
3670define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3671; X86-LABEL: test_mask_adds_epi16_rm_512:
3672; X86:       # %bb.0:
3673; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3674; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00]
3675; X86-NEXT:    retl # encoding: [0xc3]
3676;
3677; X64-LABEL: test_mask_adds_epi16_rm_512:
3678; X64:       # %bb.0:
3679; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
3680; X64-NEXT:    retq # encoding: [0xc3]
3681  %b = load <32 x i16>, <32 x i16>* %ptr_b
3682  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3683  ret <32 x i16> %res
3684}
3685
3686define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3687; X86-LABEL: test_mask_adds_epi16_rmk_512:
3688; X86:       # %bb.0:
3689; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3690; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3691; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08]
3692; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3693; X86-NEXT:    retl # encoding: [0xc3]
3694;
3695; X64-LABEL: test_mask_adds_epi16_rmk_512:
3696; X64:       # %bb.0:
3697; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3698; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f]
3699; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3700; X64-NEXT:    retq # encoding: [0xc3]
3701  %b = load <32 x i16>, <32 x i16>* %ptr_b
3702  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3703  ret <32 x i16> %res
3704}
3705
3706define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3707; X86-LABEL: test_mask_adds_epi16_rmkz_512:
3708; X86:       # %bb.0:
3709; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3710; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3711; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00]
3712; X86-NEXT:    retl # encoding: [0xc3]
3713;
3714; X64-LABEL: test_mask_adds_epi16_rmkz_512:
3715; X64:       # %bb.0:
3716; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3717; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
3718; X64-NEXT:    retq # encoding: [0xc3]
3719  %b = load <32 x i16>, <32 x i16>* %ptr_b
3720  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3721  ret <32 x i16> %res
3722}
3723
3724declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3725
3726define <32 x i16> @test_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3727; CHECK-LABEL: test_subs_epi16_rr_512:
3728; CHECK:       # %bb.0:
3729; CHECK-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
3730; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3731  %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3732  ret <32 x i16> %1
3733}
3734
3735define <32 x i16> @test_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3736; X86-LABEL: test_subs_epi16_rrk_512:
3737; X86:       # %bb.0:
3738; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3739; X86-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3740; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3741; X86-NEXT:    retl # encoding: [0xc3]
3742;
3743; X64-LABEL: test_subs_epi16_rrk_512:
3744; X64:       # %bb.0:
3745; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3746; X64-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3747; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3748; X64-NEXT:    retq # encoding: [0xc3]
3749  %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3750  %2 = bitcast i32 %mask to <32 x i1>
3751  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3752  ret <32 x i16> %3
3753}
3754
3755define <32 x i16> @test_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3756; X86-LABEL: test_subs_epi16_rrkz_512:
3757; X86:       # %bb.0:
3758; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3759; X86-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3760; X86-NEXT:    retl # encoding: [0xc3]
3761;
3762; X64-LABEL: test_subs_epi16_rrkz_512:
3763; X64:       # %bb.0:
3764; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3765; X64-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3766; X64-NEXT:    retq # encoding: [0xc3]
3767  %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3768  %2 = bitcast i32 %mask to <32 x i1>
3769  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3770  ret <32 x i16> %3
3771}
3772
3773define <32 x i16> @test_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3774; X86-LABEL: test_subs_epi16_rm_512:
3775; X86:       # %bb.0:
3776; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3777; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00]
3778; X86-NEXT:    retl # encoding: [0xc3]
3779;
3780; X64-LABEL: test_subs_epi16_rm_512:
3781; X64:       # %bb.0:
3782; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
3783; X64-NEXT:    retq # encoding: [0xc3]
3784  %b = load <32 x i16>, <32 x i16>* %ptr_b
3785  %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3786  ret <32 x i16> %1
3787}
3788
3789define <32 x i16> @test_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3790; X86-LABEL: test_subs_epi16_rmk_512:
3791; X86:       # %bb.0:
3792; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3793; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3794; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08]
3795; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3796; X86-NEXT:    retl # encoding: [0xc3]
3797;
3798; X64-LABEL: test_subs_epi16_rmk_512:
3799; X64:       # %bb.0:
3800; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3801; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f]
3802; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3803; X64-NEXT:    retq # encoding: [0xc3]
3804  %b = load <32 x i16>, <32 x i16>* %ptr_b
3805  %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3806  %2 = bitcast i32 %mask to <32 x i1>
3807  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3808  ret <32 x i16> %3
3809}
3810
3811define <32 x i16> @test_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3812; X86-LABEL: test_subs_epi16_rmkz_512:
3813; X86:       # %bb.0:
3814; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3815; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3816; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00]
3817; X86-NEXT:    retl # encoding: [0xc3]
3818;
3819; X64-LABEL: test_subs_epi16_rmkz_512:
3820; X64:       # %bb.0:
3821; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3822; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
3823; X64-NEXT:    retq # encoding: [0xc3]
3824  %b = load <32 x i16>, <32 x i16>* %ptr_b
3825  %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3826  %2 = bitcast i32 %mask to <32 x i1>
3827  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3828  ret <32 x i16> %3
3829}
3830
3831declare <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16>, <32 x i16>)
3832
3833define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3834; CHECK-LABEL: test_mask_subs_epi16_rr_512:
3835; CHECK:       # %bb.0:
3836; CHECK-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
3837; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3838  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3839  ret <32 x i16> %res
3840}
3841
3842define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3843; X86-LABEL: test_mask_subs_epi16_rrk_512:
3844; X86:       # %bb.0:
3845; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3846; X86-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3847; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3848; X86-NEXT:    retl # encoding: [0xc3]
3849;
3850; X64-LABEL: test_mask_subs_epi16_rrk_512:
3851; X64:       # %bb.0:
3852; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3853; X64-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3854; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3855; X64-NEXT:    retq # encoding: [0xc3]
3856  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3857  ret <32 x i16> %res
3858}
3859
3860define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3861; X86-LABEL: test_mask_subs_epi16_rrkz_512:
3862; X86:       # %bb.0:
3863; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3864; X86-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3865; X86-NEXT:    retl # encoding: [0xc3]
3866;
3867; X64-LABEL: test_mask_subs_epi16_rrkz_512:
3868; X64:       # %bb.0:
3869; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3870; X64-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3871; X64-NEXT:    retq # encoding: [0xc3]
3872  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3873  ret <32 x i16> %res
3874}
3875
3876define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3877; X86-LABEL: test_mask_subs_epi16_rm_512:
3878; X86:       # %bb.0:
3879; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3880; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00]
3881; X86-NEXT:    retl # encoding: [0xc3]
3882;
3883; X64-LABEL: test_mask_subs_epi16_rm_512:
3884; X64:       # %bb.0:
3885; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
3886; X64-NEXT:    retq # encoding: [0xc3]
3887  %b = load <32 x i16>, <32 x i16>* %ptr_b
3888  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3889  ret <32 x i16> %res
3890}
3891
3892define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3893; X86-LABEL: test_mask_subs_epi16_rmk_512:
3894; X86:       # %bb.0:
3895; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3896; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3897; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08]
3898; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3899; X86-NEXT:    retl # encoding: [0xc3]
3900;
3901; X64-LABEL: test_mask_subs_epi16_rmk_512:
3902; X64:       # %bb.0:
3903; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3904; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f]
3905; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3906; X64-NEXT:    retq # encoding: [0xc3]
3907  %b = load <32 x i16>, <32 x i16>* %ptr_b
3908  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3909  ret <32 x i16> %res
3910}
3911
3912define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3913; X86-LABEL: test_mask_subs_epi16_rmkz_512:
3914; X86:       # %bb.0:
3915; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3916; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3917; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00]
3918; X86-NEXT:    retl # encoding: [0xc3]
3919;
3920; X64-LABEL: test_mask_subs_epi16_rmkz_512:
3921; X64:       # %bb.0:
3922; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3923; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
3924; X64-NEXT:    retq # encoding: [0xc3]
3925  %b = load <32 x i16>, <32 x i16>* %ptr_b
3926  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3927  ret <32 x i16> %res
3928}
3929
3930declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3931
3932define <64 x i8> @test_mask_adds_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
3933; CHECK-LABEL: test_mask_adds_epi8_rr_512:
3934; CHECK:       # %bb.0:
3935; CHECK-NEXT:    vpaddsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0xc1]
3936; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3937  %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3938  ret <64 x i8> %res
3939}
3940
3941define <64 x i8> @test_mask_adds_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
3942; X86-LABEL: test_mask_adds_epi8_rrk_512:
3943; X86:       # %bb.0:
3944; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3945; X86-NEXT:    vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
3946; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3947; X86-NEXT:    retl # encoding: [0xc3]
3948;
3949; X64-LABEL: test_mask_adds_epi8_rrk_512:
3950; X64:       # %bb.0:
3951; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3952; X64-NEXT:    vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
3953; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3954; X64-NEXT:    retq # encoding: [0xc3]
3955  %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3956  ret <64 x i8> %res
3957}
3958
3959define <64 x i8> @test_mask_adds_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
3960; X86-LABEL: test_mask_adds_epi8_rrkz_512:
3961; X86:       # %bb.0:
3962; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3963; X86-NEXT:    vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
3964; X86-NEXT:    retl # encoding: [0xc3]
3965;
3966; X64-LABEL: test_mask_adds_epi8_rrkz_512:
3967; X64:       # %bb.0:
3968; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3969; X64-NEXT:    vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
3970; X64-NEXT:    retq # encoding: [0xc3]
3971  %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3972  ret <64 x i8> %res
3973}
3974
3975define <64 x i8> @test_mask_adds_epi8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
3976; X86-LABEL: test_mask_adds_epi8_rm_512:
3977; X86:       # %bb.0:
3978; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3979; X86-NEXT:    vpaddsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x00]
3980; X86-NEXT:    retl # encoding: [0xc3]
3981;
3982; X64-LABEL: test_mask_adds_epi8_rm_512:
3983; X64:       # %bb.0:
3984; X64-NEXT:    vpaddsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x07]
3985; X64-NEXT:    retq # encoding: [0xc3]
3986  %b = load <64 x i8>, <64 x i8>* %ptr_b
3987  %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3988  ret <64 x i8> %res
3989}
3990
3991define <64 x i8> @test_mask_adds_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
3992; X86-LABEL: test_mask_adds_epi8_rmk_512:
3993; X86:       # %bb.0:
3994; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3995; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3996; X86-NEXT:    vpaddsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x08]
3997; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3998; X86-NEXT:    retl # encoding: [0xc3]
3999;
4000; X64-LABEL: test_mask_adds_epi8_rmk_512:
4001; X64:       # %bb.0:
4002; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
4003; X64-NEXT:    vpaddsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x0f]
4004; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4005; X64-NEXT:    retq # encoding: [0xc3]
4006  %b = load <64 x i8>, <64 x i8>* %ptr_b
4007  %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
4008  ret <64 x i8> %res
4009}
4010
4011define <64 x i8> @test_mask_adds_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
4012; X86-LABEL: test_mask_adds_epi8_rmkz_512:
4013; X86:       # %bb.0:
4014; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4015; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
4016; X86-NEXT:    vpaddsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x00]
4017; X86-NEXT:    retl # encoding: [0xc3]
4018;
4019; X64-LABEL: test_mask_adds_epi8_rmkz_512:
4020; X64:       # %bb.0:
4021; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
4022; X64-NEXT:    vpaddsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x07]
4023; X64-NEXT:    retq # encoding: [0xc3]
4024  %b = load <64 x i8>, <64 x i8>* %ptr_b
4025  %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
4026  ret <64 x i8> %res
4027}
4028
4029declare <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
4030
4031define <64 x i8> @test_mask_subs_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
4032; CHECK-LABEL: test_mask_subs_epi8_rr_512:
4033; CHECK:       # %bb.0:
4034; CHECK-NEXT:    vpsubsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0xc1]
4035; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4036  %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
4037  ret <64 x i8> %res
4038}
4039
4040define <64 x i8> @test_mask_subs_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
4041; X86-LABEL: test_mask_subs_epi8_rrk_512:
4042; X86:       # %bb.0:
4043; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
4044; X86-NEXT:    vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
4045; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4046; X86-NEXT:    retl # encoding: [0xc3]
4047;
4048; X64-LABEL: test_mask_subs_epi8_rrk_512:
4049; X64:       # %bb.0:
4050; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
4051; X64-NEXT:    vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
4052; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4053; X64-NEXT:    retq # encoding: [0xc3]
4054  %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
4055  ret <64 x i8> %res
4056}
4057
4058define <64 x i8> @test_mask_subs_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
4059; X86-LABEL: test_mask_subs_epi8_rrkz_512:
4060; X86:       # %bb.0:
4061; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
4062; X86-NEXT:    vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
4063; X86-NEXT:    retl # encoding: [0xc3]
4064;
4065; X64-LABEL: test_mask_subs_epi8_rrkz_512:
4066; X64:       # %bb.0:
4067; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
4068; X64-NEXT:    vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
4069; X64-NEXT:    retq # encoding: [0xc3]
4070  %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
4071  ret <64 x i8> %res
4072}
4073
4074define <64 x i8> @test_mask_subs_epi8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
4075; X86-LABEL: test_mask_subs_epi8_rm_512:
4076; X86:       # %bb.0:
4077; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4078; X86-NEXT:    vpsubsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x00]
4079; X86-NEXT:    retl # encoding: [0xc3]
4080;
4081; X64-LABEL: test_mask_subs_epi8_rm_512:
4082; X64:       # %bb.0:
4083; X64-NEXT:    vpsubsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x07]
4084; X64-NEXT:    retq # encoding: [0xc3]
4085  %b = load <64 x i8>, <64 x i8>* %ptr_b
4086  %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
4087  ret <64 x i8> %res
4088}
4089
4090define <64 x i8> @test_mask_subs_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
4091; X86-LABEL: test_mask_subs_epi8_rmk_512:
4092; X86:       # %bb.0:
4093; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4094; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
4095; X86-NEXT:    vpsubsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x08]
4096; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4097; X86-NEXT:    retl # encoding: [0xc3]
4098;
4099; X64-LABEL: test_mask_subs_epi8_rmk_512:
4100; X64:       # %bb.0:
4101; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
4102; X64-NEXT:    vpsubsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x0f]
4103; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4104; X64-NEXT:    retq # encoding: [0xc3]
4105  %b = load <64 x i8>, <64 x i8>* %ptr_b
4106  %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
4107  ret <64 x i8> %res
4108}
4109
4110define <64 x i8> @test_mask_subs_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
4111; X86-LABEL: test_mask_subs_epi8_rmkz_512:
4112; X86:       # %bb.0:
4113; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4114; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
4115; X86-NEXT:    vpsubsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x00]
4116; X86-NEXT:    retl # encoding: [0xc3]
4117;
4118; X64-LABEL: test_mask_subs_epi8_rmkz_512:
4119; X64:       # %bb.0:
4120; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
4121; X64-NEXT:    vpsubsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x07]
4122; X64-NEXT:    retq # encoding: [0xc3]
4123  %b = load <64 x i8>, <64 x i8>* %ptr_b
4124  %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
4125  ret <64 x i8> %res
4126}
4127
4128declare <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
4129
4130declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
4131
4132define <32 x i16>@test_int_x86_avx512_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
4133; CHECK-LABEL: test_int_x86_avx512_psrlv32hi:
4134; CHECK:       # %bb.0:
4135; CHECK-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xc1]
4136; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4137  %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
4138  ret <32 x i16> %res
4139}
4140
4141define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
4142; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi:
4143; X86:       # %bb.0:
4144; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4145; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
4146; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4147; X86-NEXT:    retl # encoding: [0xc3]
4148;
4149; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi:
4150; X64:       # %bb.0:
4151; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4152; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
4153; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4154; X64-NEXT:    retq # encoding: [0xc3]
4155  %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
4156  ret <32 x i16> %res
4157}
4158
4159define <32 x i16>@test_int_x86_avx512_maskz_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
4160; X86-LABEL: test_int_x86_avx512_maskz_psrlv32hi:
4161; X86:       # %bb.0:
4162; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4163; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
4164; X86-NEXT:    retl # encoding: [0xc3]
4165;
4166; X64-LABEL: test_int_x86_avx512_maskz_psrlv32hi:
4167; X64:       # %bb.0:
4168; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4169; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
4170; X64-NEXT:    retq # encoding: [0xc3]
4171  %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
4172  ret <32 x i16> %res
4173}
4174
4175declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
4176
4177define <32 x i16>@test_int_x86_avx512_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
4178; CHECK-LABEL: test_int_x86_avx512_psrav32_hi:
4179; CHECK:       # %bb.0:
4180; CHECK-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xc1]
4181; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4182  %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
4183  ret <32 x i16> %res
4184}
4185
4186define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
4187; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi:
4188; X86:       # %bb.0:
4189; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4190; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
4191; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4192; X86-NEXT:    retl # encoding: [0xc3]
4193;
4194; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi:
4195; X64:       # %bb.0:
4196; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4197; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
4198; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4199; X64-NEXT:    retq # encoding: [0xc3]
4200  %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
4201  ret <32 x i16> %res
4202}
4203
4204define <32 x i16>@test_int_x86_avx512_maskz_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
4205; X86-LABEL: test_int_x86_avx512_maskz_psrav32_hi:
4206; X86:       # %bb.0:
4207; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4208; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
4209; X86-NEXT:    retl # encoding: [0xc3]
4210;
4211; X64-LABEL: test_int_x86_avx512_maskz_psrav32_hi:
4212; X64:       # %bb.0:
4213; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4214; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
4215; X64-NEXT:    retq # encoding: [0xc3]
4216  %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
4217  ret <32 x i16> %res
4218}
4219
4220declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
4221
4222define <32 x i16>@test_int_x86_avx512_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) {
4223; CHECK-LABEL: test_int_x86_avx512_psllv32hi:
4224; CHECK:       # %bb.0:
4225; CHECK-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xc1]
4226; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4227  %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
4228  ret <32 x i16> %res
4229}
4230
4231define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
4232; X86-LABEL: test_int_x86_avx512_mask_psllv32hi:
4233; X86:       # %bb.0:
4234; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4235; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
4236; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4237; X86-NEXT:    retl # encoding: [0xc3]
4238;
4239; X64-LABEL: test_int_x86_avx512_mask_psllv32hi:
4240; X64:       # %bb.0:
4241; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4242; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
4243; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4244; X64-NEXT:    retq # encoding: [0xc3]
4245  %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
4246  ret <32 x i16> %res
4247}
4248
4249define <32 x i16>@test_int_x86_avx512_maskz_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) {
4250; X86-LABEL: test_int_x86_avx512_maskz_psllv32hi:
4251; X86:       # %bb.0:
4252; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4253; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
4254; X86-NEXT:    retl # encoding: [0xc3]
4255;
4256; X64-LABEL: test_int_x86_avx512_maskz_psllv32hi:
4257; X64:       # %bb.0:
4258; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4259; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
4260; X64-NEXT:    retq # encoding: [0xc3]
4261  %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
4262  ret <32 x i16> %res
4263}
4264
4265declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
4266
4267define <32 x i8>@test_int_x86_avx512_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1) {
4268; CHECK-LABEL: test_int_x86_avx512_pmov_wb_512:
4269; CHECK:       # %bb.0:
4270; CHECK-NEXT:    vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0]
4271; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4272  %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
4273  ret <32 x i8> %res
4274}
4275
4276define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
4277; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
4278; X86:       # %bb.0:
4279; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4280; X86-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
4281; X86-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
4282; X86-NEXT:    retl # encoding: [0xc3]
4283;
4284; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
4285; X64:       # %bb.0:
4286; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4287; X64-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
4288; X64-NEXT:    vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
4289; X64-NEXT:    retq # encoding: [0xc3]
4290  %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
4291  ret <32 x i8> %res
4292}
4293
4294define <32 x i8>@test_int_x86_avx512_maskz_pmov_wb_512(<32 x i16> %x0, i32 %x2) {
4295; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_512:
4296; X86:       # %bb.0:
4297; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4298; X86-NEXT:    vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
4299; X86-NEXT:    retl # encoding: [0xc3]
4300;
4301; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_512:
4302; X64:       # %bb.0:
4303; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4304; X64-NEXT:    vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
4305; X64-NEXT:    retq # encoding: [0xc3]
4306  %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
4307  ret <32 x i8> %res
4308}
4309