1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
6
7define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) {
8; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
9; X86:       # %bb.0:
10; X86-NEXT:    vpbroadcastb {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x4c,0x24,0x04]
11; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12; X86-NEXT:    vmovdqu8 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0xc1]
13; X86-NEXT:    vmovdqu8 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd1]
14; X86-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2]
15; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
16; X86-NEXT:    retl # encoding: [0xc3]
17;
18; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
19; X64:       # %bb.0:
20; X64-NEXT:    vpbroadcastb %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf]
21; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
22; X64-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
23; X64-NEXT:    vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7]
24; X64-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2]
25; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
26; X64-NEXT:    retq # encoding: [0xc3]
27  %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
28  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
29  %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
30  %res3 = add <16 x i8> %res, %res1
31  %res4 = add <16 x i8> %res2, %res3
32  ret <16 x i8> %res4
33}
34
35
36declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8)
37
38define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) {
39; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
40; X86:       # %bb.0:
41; X86-NEXT:    vpbroadcastw {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x4c,0x24,0x04]
42; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
43; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
44; X86-NEXT:    vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
45; X86-NEXT:    vmovdqu16 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd1]
46; X86-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
47; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
48; X86-NEXT:    retl # encoding: [0xc3]
49;
50; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
51; X64:       # %bb.0:
52; X64-NEXT:    vpbroadcastw %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf]
53; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
54; X64-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
55; X64-NEXT:    vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7]
56; X64-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
57; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
58; X64-NEXT:    retq # encoding: [0xc3]
59  %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
60  %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
61  %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
62  %res3 = add <8 x i16> %res, %res1
63  %res4 = add <8 x i16> %res2, %res3
64  ret <8 x i16> %res4
65}
66
67
68declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32)
69
70define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) {
71; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
72; X86:       # %bb.0:
73; X86-NEXT:    vpbroadcastb {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x4c,0x24,0x04]
74; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
75; X86-NEXT:    vmovdqu8 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0xc1]
76; X86-NEXT:    vmovdqu8 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd1]
77; X86-NEXT:    vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2]
78; X86-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
79; X86-NEXT:    retl # encoding: [0xc3]
80;
81; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
82; X64:       # %bb.0:
83; X64-NEXT:    vpbroadcastb %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf]
84; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
85; X64-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
86; X64-NEXT:    vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7]
87; X64-NEXT:    vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2]
88; X64-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
89; X64-NEXT:    retq # encoding: [0xc3]
90  %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
91  %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
92  %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
93  %res3 = add <32 x i8> %res, %res1
94  %res4 = add <32 x i8> %res2, %res3
95  ret <32 x i8> %res4
96}
97
98
99
100declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16)
101
102define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) {
103; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
104; X86:       # %bb.0:
105; X86-NEXT:    vpbroadcastw {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x4c,0x24,0x04]
106; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
107; X86-NEXT:    vmovdqu16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0xc1]
108; X86-NEXT:    vmovdqu16 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd1]
109; X86-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
110; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
111; X86-NEXT:    retl # encoding: [0xc3]
112;
113; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
114; X64:       # %bb.0:
115; X64-NEXT:    vpbroadcastw %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf]
116; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
117; X64-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
118; X64-NEXT:    vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7]
119; X64-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
120; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
121; X64-NEXT:    retq # encoding: [0xc3]
122  %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
123  %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
124  %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
125  %res3 = add <16 x i16> %res, %res1
126  %res4 = add <16 x i16> %res2, %res3
127  ret <16 x i16> %res4
128}
129
130declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
131
132define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1) {
133; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xc0]
136; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
137  %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
138  ret <32 x i8> %res
139}
140
141define <32 x i8>@test_int_x86_avx512_mask_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
142; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_256:
143; X86:       # %bb.0:
144; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
145; X86-NEXT:    vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
146; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
147; X86-NEXT:    retl # encoding: [0xc3]
148;
149; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_256:
150; X64:       # %bb.0:
151; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
152; X64-NEXT:    vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
153; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
154; X64-NEXT:    retq # encoding: [0xc3]
155  %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
156  ret <32 x i8> %res
157}
158
159define <32 x i8>@test_int_x86_avx512_maskz_pbroadcastb_256(<16 x i8> %x0, i32 %mask) {
160; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256:
161; X86:       # %bb.0:
162; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
163; X86-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
164; X86-NEXT:    retl # encoding: [0xc3]
165;
166; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_256:
167; X64:       # %bb.0:
168; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
169; X64-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
170; X64-NEXT:    retq # encoding: [0xc3]
171  %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
172  ret <32 x i8> %res
173}
174
175declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
176
177define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1) {
178; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xc0]
181; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
182  %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
183  ret <16 x i8> %res
184}
185
186define <16 x i8>@test_int_x86_avx512_mask_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
187; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_128:
188; X86:       # %bb.0:
189; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
190; X86-NEXT:    vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
191; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
192; X86-NEXT:    retl # encoding: [0xc3]
193;
194; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_128:
195; X64:       # %bb.0:
196; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
197; X64-NEXT:    vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
198; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
199; X64-NEXT:    retq # encoding: [0xc3]
200  %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
201  ret <16 x i8> %res
202}
203
204define <16 x i8>@test_int_x86_avx512_maskz_pbroadcastb_128(<16 x i8> %x0, i16 %mask) {
205; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128:
206; X86:       # %bb.0:
207; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
208; X86-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
209; X86-NEXT:    retl # encoding: [0xc3]
210;
211; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_128:
212; X64:       # %bb.0:
213; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
214; X64-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
215; X64-NEXT:    retq # encoding: [0xc3]
216  %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
217  ret <16 x i8> %res
218}
219
220declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
221
222define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1) {
223; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256:
224; CHECK:       # %bb.0:
225; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xc0]
226; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
227  %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
228  ret <16 x i16> %res
229}
230
231define <16 x i16>@test_int_x86_avx512_mask_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
232; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_256:
233; X86:       # %bb.0:
234; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
235; X86-NEXT:    vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
236; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
237; X86-NEXT:    retl # encoding: [0xc3]
238;
239; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_256:
240; X64:       # %bb.0:
241; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
242; X64-NEXT:    vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
243; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
244; X64-NEXT:    retq # encoding: [0xc3]
245  %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
246  ret <16 x i16> %res
247}
248
249define <16 x i16>@test_int_x86_avx512_maskz_pbroadcastw_256(<8 x i16> %x0, i16 %mask) {
250; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256:
251; X86:       # %bb.0:
252; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
253; X86-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
254; X86-NEXT:    retl # encoding: [0xc3]
255;
256; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_256:
257; X64:       # %bb.0:
258; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
259; X64-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
260; X64-NEXT:    retq # encoding: [0xc3]
261  %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
262  ret <16 x i16> %res
263}
264
265declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
266
267define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1) {
268; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128:
269; CHECK:       # %bb.0:
270; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xc0]
271; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
272  %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
273  ret <8 x i16> %res
274}
275
276define <8 x i16>@test_int_x86_avx512_mask_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
277; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_128:
278; X86:       # %bb.0:
279; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
280; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
281; X86-NEXT:    vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
282; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
283; X86-NEXT:    retl # encoding: [0xc3]
284;
285; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_128:
286; X64:       # %bb.0:
287; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
288; X64-NEXT:    vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
289; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
290; X64-NEXT:    retq # encoding: [0xc3]
291  %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
292  ret <8 x i16> %res
293}
294
295define <8 x i16>@test_int_x86_avx512_maskz_pbroadcastw_128(<8 x i16> %x0, i8 %mask) {
296; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128:
297; X86:       # %bb.0:
298; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
299; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
300; X86-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
301; X86-NEXT:    retl # encoding: [0xc3]
302;
303; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_128:
304; X64:       # %bb.0:
305; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
306; X64-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
307; X64-NEXT:    retq # encoding: [0xc3]
308  %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
309  ret <8 x i16> %res
310}
311
312declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
313
314define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1) {
315; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512:
316; CHECK:       # %bb.0:
317; CHECK-NEXT:    vpbroadcastb %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0]
318; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
319  %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
320  ret <64 x i8> %res
321}
322
323define <64 x i8>@test_int_x86_avx512_mask_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
324; X86-LABEL: test_int_x86_avx512_mask_pbroadcastb_512:
325; X86:       # %bb.0:
326; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
327; X86-NEXT:    vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
328; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
329; X86-NEXT:    retl # encoding: [0xc3]
330;
331; X64-LABEL: test_int_x86_avx512_mask_pbroadcastb_512:
332; X64:       # %bb.0:
333; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
334; X64-NEXT:    vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
335; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
336; X64-NEXT:    retq # encoding: [0xc3]
337  %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
338  ret <64 x i8> %res
339}
340
341define <64 x i8>@test_int_x86_avx512_maskz_pbroadcastb_512(<16 x i8> %x0, i64 %mask) {
342; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512:
343; X86:       # %bb.0:
344; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
345; X86-NEXT:    vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
346; X86-NEXT:    retl # encoding: [0xc3]
347;
348; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastb_512:
349; X64:       # %bb.0:
350; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
351; X64-NEXT:    vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
352; X64-NEXT:    retq # encoding: [0xc3]
353  %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
354  ret <64 x i8> %res
355}
356
357declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
358
359define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1) {
360; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512:
361; CHECK:       # %bb.0:
362; CHECK-NEXT:    vpbroadcastw %xmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0]
363; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
364  %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
365  ret <32 x i16> %res
366}
367
368define <32 x i16>@test_int_x86_avx512_mask_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
369; X86-LABEL: test_int_x86_avx512_mask_pbroadcastw_512:
370; X86:       # %bb.0:
371; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
372; X86-NEXT:    vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
373; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
374; X86-NEXT:    retl # encoding: [0xc3]
375;
376; X64-LABEL: test_int_x86_avx512_mask_pbroadcastw_512:
377; X64:       # %bb.0:
378; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
379; X64-NEXT:    vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
380; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
381; X64-NEXT:    retq # encoding: [0xc3]
382  %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
383  ret <32 x i16> %res
384}
385
386define <32 x i16>@test_int_x86_avx512_maskz_pbroadcastw_512(<8 x i16> %x0, i32 %mask) {
387; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512:
388; X86:       # %bb.0:
389; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
390; X86-NEXT:    vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
391; X86-NEXT:    retl # encoding: [0xc3]
392;
393; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastw_512:
394; X64:       # %bb.0:
395; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
396; X64-NEXT:    vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
397; X64-NEXT:    retq # encoding: [0xc3]
398  %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
399  ret <32 x i16> %res
400}
401
402declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16)
403
404define void@test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) {
405; X86-LABEL: test_int_x86_avx512_mask_storeu_b_128:
406; X86:       # %bb.0:
407; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
408; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
409; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
410; X86-NEXT:    vmovdqu8 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x01]
411; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
412; X86-NEXT:    retl # encoding: [0xc3]
413;
414; X64-LABEL: test_int_x86_avx512_mask_storeu_b_128:
415; X64:       # %bb.0:
416; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
417; X64-NEXT:    vmovdqu8 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07]
418; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
419; X64-NEXT:    retq # encoding: [0xc3]
420  call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2)
421  call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1)
422  ret void
423}
424
425declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32)
426
427define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) {
428; X86-LABEL: test_int_x86_avx512_mask_storeu_b_256:
429; X86:       # %bb.0:
430; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
431; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
432; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
433; X86-NEXT:    vmovdqu8 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x01]
434; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
435; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
436; X86-NEXT:    retl # encoding: [0xc3]
437;
438; X64-LABEL: test_int_x86_avx512_mask_storeu_b_256:
439; X64:       # %bb.0:
440; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
441; X64-NEXT:    vmovdqu8 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07]
442; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
443; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
444; X64-NEXT:    retq # encoding: [0xc3]
445  call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2)
446  call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1)
447  ret void
448}
449
450declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8)
451
452define void@test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) {
453; X86-LABEL: test_int_x86_avx512_mask_storeu_w_128:
454; X86:       # %bb.0:
455; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
456; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
457; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
458; X86-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
459; X86-NEXT:    vmovdqu16 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x01]
460; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
461; X86-NEXT:    retl # encoding: [0xc3]
462;
463; X64-LABEL: test_int_x86_avx512_mask_storeu_w_128:
464; X64:       # %bb.0:
465; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
466; X64-NEXT:    vmovdqu16 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07]
467; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
468; X64-NEXT:    retq # encoding: [0xc3]
469  call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2)
470  call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1)
471  ret void
472}
473
474declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16)
475
476define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) {
477; X86-LABEL: test_int_x86_avx512_mask_storeu_w_256:
478; X86:       # %bb.0:
479; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
480; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
481; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
482; X86-NEXT:    vmovdqu16 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x01]
483; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
484; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
485; X86-NEXT:    retl # encoding: [0xc3]
486;
487; X64-LABEL: test_int_x86_avx512_mask_storeu_w_256:
488; X64:       # %bb.0:
489; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
490; X64-NEXT:    vmovdqu16 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07]
491; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
492; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
493; X64-NEXT:    retq # encoding: [0xc3]
494  call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2)
495  call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1)
496  ret void
497}
498
499declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8*, <8 x i16>, i8)
500
501define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) {
502; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128:
503; X86:       # %bb.0:
504; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
505; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
506; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
507; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
508; X86-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
509; X86-NEXT:    vmovdqu16 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x00]
510; X86-NEXT:    vmovdqu16 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x09]
511; X86-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
512; X86-NEXT:    retl # encoding: [0xc3]
513;
514; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128:
515; X64:       # %bb.0:
516; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
517; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
518; X64-NEXT:    vmovdqu16 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06]
519; X64-NEXT:    vmovdqu16 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f]
520; X64-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
521; X64-NEXT:    retq # encoding: [0xc3]
522    %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> %x1, i8 -1)
523    %res = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask)
524    %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> zeroinitializer, i8 %mask)
525    %res2 = add <8 x i16> %res, %res1
526    ret <8 x i16> %res2
527}
528
529declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8*, <16 x i16>, i16)
530
531define <16 x i16>@test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) {
532; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256:
533; X86:       # %bb.0:
534; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
535; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
536; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
537; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
538; X86-NEXT:    vmovdqu16 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x00]
539; X86-NEXT:    vmovdqu16 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x09]
540; X86-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
541; X86-NEXT:    retl # encoding: [0xc3]
542;
543; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256:
544; X64:       # %bb.0:
545; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
546; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
547; X64-NEXT:    vmovdqu16 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06]
548; X64-NEXT:    vmovdqu16 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f]
549; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
550; X64-NEXT:    retq # encoding: [0xc3]
551    %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> %x1, i16 -1)
552    %res = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask)
553    %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> zeroinitializer, i16 %mask)
554    %res2 = add <16 x i16> %res, %res1
555    ret <16 x i16> %res2
556}
557
558declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8*, <16 x i8>, i16)
559
560define <16 x i8>@test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) {
561; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128:
562; X86:       # %bb.0:
563; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
564; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
565; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
566; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
567; X86-NEXT:    vmovdqu8 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x00]
568; X86-NEXT:    vmovdqu8 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x09]
569; X86-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
570; X86-NEXT:    retl # encoding: [0xc3]
571;
572; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128:
573; X64:       # %bb.0:
574; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
575; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
576; X64-NEXT:    vmovdqu8 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06]
577; X64-NEXT:    vmovdqu8 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f]
578; X64-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
579; X64-NEXT:    retq # encoding: [0xc3]
580    %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> %x1, i16 -1)
581    %res = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask)
582    %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> zeroinitializer, i16 %mask)
583    %res2 = add <16 x i8> %res, %res1
584    ret <16 x i8> %res2
585}
586
587declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8*, <32 x i8>, i32)
588
589define <32 x i8>@test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) {
590; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256:
591; X86:       # %bb.0:
592; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
593; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
594; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
595; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
596; X86-NEXT:    vmovdqu8 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x00]
597; X86-NEXT:    vmovdqu8 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x09]
598; X86-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1]
599; X86-NEXT:    retl # encoding: [0xc3]
600;
601; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256:
602; X64:       # %bb.0:
603; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
604; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
605; X64-NEXT:    vmovdqu8 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x06]
606; X64-NEXT:    vmovdqu8 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x0f]
607; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1]
608; X64-NEXT:    retq # encoding: [0xc3]
609    %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> %x1, i32 -1)
610    %res = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask)
611    %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> zeroinitializer, i32 %mask)
612    %res2 = add <32 x i8> %res, %res1
613    ret <32 x i8> %res2
614}
615
616declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
617
618define <16 x i8>@test_int_x86_avx512_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3) {
619; CHECK-LABEL: test_int_x86_avx512_palignr_128:
620; CHECK:       # %bb.0:
621; CHECK-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x02]
622; CHECK-NEXT:    # xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
623; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
624  %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
625  ret <16 x i8> %res
626}
627
628define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
629; X86-LABEL: test_int_x86_avx512_mask_palignr_128:
630; X86:       # %bb.0:
631; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
632; X86-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02]
633; X86-NEXT:    # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
634; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
635; X86-NEXT:    retl # encoding: [0xc3]
636;
637; X64-LABEL: test_int_x86_avx512_mask_palignr_128:
638; X64:       # %bb.0:
639; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
640; X64-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02]
641; X64-NEXT:    # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
642; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
643; X64-NEXT:    retq # encoding: [0xc3]
644  %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
645  ret <16 x i8> %res
646}
647
648define <16 x i8>@test_int_x86_avx512_maskz_palignr_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x4) {
649; X86-LABEL: test_int_x86_avx512_maskz_palignr_128:
650; X86:       # %bb.0:
651; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
652; X86-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
653; X86-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
654; X86-NEXT:    retl # encoding: [0xc3]
655;
656; X64-LABEL: test_int_x86_avx512_maskz_palignr_128:
657; X64:       # %bb.0:
658; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
659; X64-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
660; X64-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
661; X64-NEXT:    retq # encoding: [0xc3]
662  %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
663  ret <16 x i8> %res
664}
665
666declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
667
668define <32 x i8>@test_int_x86_avx512_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3) {
669; CHECK-LABEL: test_int_x86_avx512_palignr_256:
670; CHECK:       # %bb.0:
671; CHECK-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xc1,0x02]
672; CHECK-NEXT:    # ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
673; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
674  %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
675  ret <32 x i8> %res
676}
677
678define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
679; X86-LABEL: test_int_x86_avx512_mask_palignr_256:
680; X86:       # %bb.0:
681; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
682; X86-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02]
683; X86-NEXT:    # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
684; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
685; X86-NEXT:    retl # encoding: [0xc3]
686;
687; X64-LABEL: test_int_x86_avx512_mask_palignr_256:
688; X64:       # %bb.0:
689; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
690; X64-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02]
691; X64-NEXT:    # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
692; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
693; X64-NEXT:    retq # encoding: [0xc3]
694  %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
695  ret <32 x i8> %res
696}
697
698define <32 x i8>@test_int_x86_avx512_maskz_palignr_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x4) {
699; X86-LABEL: test_int_x86_avx512_maskz_palignr_256:
700; X86:       # %bb.0:
701; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
702; X86-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
703; X86-NEXT:    # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
704; X86-NEXT:    retl # encoding: [0xc3]
705;
706; X64-LABEL: test_int_x86_avx512_maskz_palignr_256:
707; X64:       # %bb.0:
708; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
709; X64-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
710; X64-NEXT:    # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
711; X64-NEXT:    retq # encoding: [0xc3]
712  %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
713  ret <32 x i8> %res
714}
715
716declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i32, <8 x i16>, i8)
717
718define <8 x i16>@test_int_x86_avx512_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) {
719; CHECK-LABEL: test_int_x86_avx512_pshufh_w_128:
720; CHECK:       # %bb.0:
721; CHECK-NEXT:    vpshufhw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x03]
722; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,7,4,4,4]
723; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
724  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
725  ret <8 x i16> %res
726}
727
728define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
729; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
730; X86:       # %bb.0:
731; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
732; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
733; X86-NEXT:    vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03]
734; X86-NEXT:    # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
735; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
736; X86-NEXT:    retl # encoding: [0xc3]
737;
738; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
739; X64:       # %bb.0:
740; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
741; X64-NEXT:    vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03]
742; X64-NEXT:    # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
743; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
744; X64-NEXT:    retq # encoding: [0xc3]
745  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
746  ret <8 x i16> %res
747}
748
749define <8 x i16>@test_int_x86_avx512_maskz_pshufh_w_128(<8 x i16> %x0, i8 %x3) {
750; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_128:
751; X86:       # %bb.0:
752; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
753; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
754; X86-NEXT:    vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
755; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
756; X86-NEXT:    retl # encoding: [0xc3]
757;
758; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_128:
759; X64:       # %bb.0:
760; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
761; X64-NEXT:    vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
762; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
763; X64-NEXT:    retq # encoding: [0xc3]
764  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
765  ret <8 x i16> %res
766}
767
768declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i32, <16 x i16>, i16)
769
770define <16 x i16>@test_int_x86_avx512_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2) {
771; CHECK-LABEL: test_int_x86_avx512_pshufh_w_256:
772; CHECK:       # %bb.0:
773; CHECK-NEXT:    vpshufhw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xc0,0x03]
774; CHECK-NEXT:    # ymm0 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
775; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
776  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
777  ret <16 x i16> %res
778}
779
780define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
781; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
782; X86:       # %bb.0:
783; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
784; X86-NEXT:    vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03]
785; X86-NEXT:    # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
786; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
787; X86-NEXT:    retl # encoding: [0xc3]
788;
789; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
790; X64:       # %bb.0:
791; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
792; X64-NEXT:    vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03]
793; X64-NEXT:    # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
794; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
795; X64-NEXT:    retq # encoding: [0xc3]
796  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
797  ret <16 x i16> %res
798}
799
800define <16 x i16>@test_int_x86_avx512_maskz_pshufh_w_256(<16 x i16> %x0, i16 %x3) {
801; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_256:
802; X86:       # %bb.0:
803; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
804; X86-NEXT:    vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
805; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
806; X86-NEXT:    retl # encoding: [0xc3]
807;
808; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_256:
809; X64:       # %bb.0:
810; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
811; X64-NEXT:    vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
812; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
813; X64-NEXT:    retq # encoding: [0xc3]
814  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
815  ret <16 x i16> %res
816}
817
818declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i32, <8 x i16>, i8)
819
820define <8 x i16>@test_int_x86_avx512_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2) {
821; CHECK-LABEL: test_int_x86_avx512_pshufl_w_128:
822; CHECK:       # %bb.0:
823; CHECK-NEXT:    vpshuflw $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x03]
824; CHECK-NEXT:    # xmm0 = xmm0[3,0,0,0,4,5,6,7]
825; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
826  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
827  ret <8 x i16> %res
828}
829
830define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
831; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
832; X86:       # %bb.0:
833; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
834; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
835; X86-NEXT:    vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03]
836; X86-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
837; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
838; X86-NEXT:    retl # encoding: [0xc3]
839;
840; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
841; X64:       # %bb.0:
842; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
843; X64-NEXT:    vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03]
844; X64-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
845; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
846; X64-NEXT:    retq # encoding: [0xc3]
847  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
848  ret <8 x i16> %res
849}
850
851define <8 x i16>@test_int_x86_avx512_maskz_pshufl_w_128(<8 x i16> %x0, i8 %x3) {
852; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_128:
853; X86:       # %bb.0:
854; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
855; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
856; X86-NEXT:    vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
857; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
858; X86-NEXT:    retl # encoding: [0xc3]
859;
860; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_128:
861; X64:       # %bb.0:
862; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
863; X64-NEXT:    vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
864; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
865; X64-NEXT:    retq # encoding: [0xc3]
866  %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
867  ret <8 x i16> %res
868}
869
870declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i32, <16 x i16>, i16)
871
872define <16 x i16>@test_int_x86_avx512_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
873; CHECK-LABEL: test_int_x86_avx512_pshufl_w_256:
874; CHECK:       # %bb.0:
875; CHECK-NEXT:    vpshuflw $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xc0,0x03]
876; CHECK-NEXT:    # ymm0 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
877; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
878  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
879  ret <16 x i16> %res
880}
881
882define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
883; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
884; X86:       # %bb.0:
885; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
886; X86-NEXT:    vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03]
887; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
888; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
889; X86-NEXT:    retl # encoding: [0xc3]
890;
891; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
892; X64:       # %bb.0:
893; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
894; X64-NEXT:    vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03]
895; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
896; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
897; X64-NEXT:    retq # encoding: [0xc3]
898  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
899  ret <16 x i16> %res
900}
901
902define <16 x i16>@test_int_x86_avx512_maskz_pshufl_w_256(<16 x i16> %x0, i16 %x3) {
903; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_256:
904; X86:       # %bb.0:
905; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
906; X86-NEXT:    vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
907; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
908; X86-NEXT:    retl # encoding: [0xc3]
909;
910; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_256:
911; X64:       # %bb.0:
912; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
913; X64-NEXT:    vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
914; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
915; X64-NEXT:    retq # encoding: [0xc3]
916  %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
917  ret <16 x i16> %res
918}
919
920define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
921; CHECK-LABEL: test_pcmpeq_b_256:
922; CHECK:       # %bb.0:
923; CHECK-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
924; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
925; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
926; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
927  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
928  ret i32 %res
929}
930
931define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
932; X86-LABEL: test_mask_pcmpeq_b_256:
933; X86:       # %bb.0:
934; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
935; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
936; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
937; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
938; X86-NEXT:    retl # encoding: [0xc3]
939;
940; X64-LABEL: test_mask_pcmpeq_b_256:
941; X64:       # %bb.0:
942; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
943; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
944; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
945; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
946; X64-NEXT:    retq # encoding: [0xc3]
947  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
948  ret i32 %res
949}
950
951declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
952
953define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
954; CHECK-LABEL: test_pcmpeq_w_256:
955; CHECK:       # %bb.0:
956; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
957; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
958; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
959; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
960; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
961  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
962  ret i16 %res
963}
964
965define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
966; X86-LABEL: test_mask_pcmpeq_w_256:
967; X86:       # %bb.0:
968; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
969; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
970; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
971; X86-NEXT:    # kill: def $ax killed $ax killed $eax
972; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
973; X86-NEXT:    retl # encoding: [0xc3]
974;
975; X64-LABEL: test_mask_pcmpeq_w_256:
976; X64:       # %bb.0:
977; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
978; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
979; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
980; X64-NEXT:    # kill: def $ax killed $ax killed $eax
981; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
982; X64-NEXT:    retq # encoding: [0xc3]
983  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
984  ret i16 %res
985}
986
987declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
988
989define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
990; CHECK-LABEL: test_pcmpgt_b_256:
991; CHECK:       # %bb.0:
992; CHECK-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
993; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
994; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
995; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
996  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
997  ret i32 %res
998}
999
1000define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
1001; X86-LABEL: test_mask_pcmpgt_b_256:
1002; X86:       # %bb.0:
1003; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
1004; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1005; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
1006; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1007; X86-NEXT:    retl # encoding: [0xc3]
1008;
1009; X64-LABEL: test_mask_pcmpgt_b_256:
1010; X64:       # %bb.0:
1011; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
1012; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1013; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
1014; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1015; X64-NEXT:    retq # encoding: [0xc3]
1016  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
1017  ret i32 %res
1018}
1019
1020declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
1021
1022define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
1023; CHECK-LABEL: test_pcmpgt_w_256:
1024; CHECK:       # %bb.0:
1025; CHECK-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1026; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1027; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
1028; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1029; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1030  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
1031  ret i16 %res
1032}
1033
1034define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1035; X86-LABEL: test_mask_pcmpgt_w_256:
1036; X86:       # %bb.0:
1037; X86-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1038; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1039; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1040; X86-NEXT:    # kill: def $ax killed $ax killed $eax
1041; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1042; X86-NEXT:    retl # encoding: [0xc3]
1043;
1044; X64-LABEL: test_mask_pcmpgt_w_256:
1045; X64:       # %bb.0:
1046; X64-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
1047; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1048; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
1049; X64-NEXT:    # kill: def $ax killed $ax killed $eax
1050; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1051; X64-NEXT:    retq # encoding: [0xc3]
1052  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
1053  ret i16 %res
1054}
1055
1056declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
1057
1058define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
1059; CHECK-LABEL: test_pcmpeq_b_128:
1060; CHECK:       # %bb.0:
1061; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1062; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1063; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
1064; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1065  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
1066  ret i16 %res
1067}
1068
1069define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
1070; X86-LABEL: test_mask_pcmpeq_b_128:
1071; X86:       # %bb.0:
1072; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1073; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1074; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1075; X86-NEXT:    # kill: def $ax killed $ax killed $eax
1076; X86-NEXT:    retl # encoding: [0xc3]
1077;
1078; X64-LABEL: test_mask_pcmpeq_b_128:
1079; X64:       # %bb.0:
1080; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
1081; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1082; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
1083; X64-NEXT:    # kill: def $ax killed $ax killed $eax
1084; X64-NEXT:    retq # encoding: [0xc3]
1085  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
1086  ret i16 %res
1087}
1088
1089declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
1090
1091define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
1092; CHECK-LABEL: test_pcmpeq_w_128:
1093; CHECK:       # %bb.0:
1094; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1095; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1096; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1097; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1098  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
1099  ret i8 %res
1100}
1101
1102define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1103; X86-LABEL: test_mask_pcmpeq_w_128:
1104; X86:       # %bb.0:
1105; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1106; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1107; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1108; X86-NEXT:    # kill: def $al killed $al killed $eax
1109; X86-NEXT:    retl # encoding: [0xc3]
1110;
1111; X64-LABEL: test_mask_pcmpeq_w_128:
1112; X64:       # %bb.0:
1113; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1114; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1115; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
1116; X64-NEXT:    # kill: def $al killed $al killed $eax
1117; X64-NEXT:    retq # encoding: [0xc3]
1118  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
1119  ret i8 %res
1120}
1121
1122declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
1123
1124define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
1125; CHECK-LABEL: test_pcmpgt_b_128:
1126; CHECK:       # %bb.0:
1127; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1128; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1129; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
1130; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1131  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
1132  ret i16 %res
1133}
1134
1135define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
1136; X86-LABEL: test_mask_pcmpgt_b_128:
1137; X86:       # %bb.0:
1138; X86-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1139; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1140; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
1141; X86-NEXT:    # kill: def $ax killed $ax killed $eax
1142; X86-NEXT:    retl # encoding: [0xc3]
1143;
1144; X64-LABEL: test_mask_pcmpgt_b_128:
1145; X64:       # %bb.0:
1146; X64-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
1147; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1148; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
1149; X64-NEXT:    # kill: def $ax killed $ax killed $eax
1150; X64-NEXT:    retq # encoding: [0xc3]
1151  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
1152  ret i16 %res
1153}
1154
1155declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16)
1156
1157define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
1158; CHECK-LABEL: test_pcmpgt_w_128:
1159; CHECK:       # %bb.0:
1160; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1161; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1162; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1163; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1164  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
1165  ret i8 %res
1166}
1167
1168define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1169; X86-LABEL: test_mask_pcmpgt_w_128:
1170; X86:       # %bb.0:
1171; X86-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1172; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1173; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1174; X86-NEXT:    # kill: def $al killed $al killed $eax
1175; X86-NEXT:    retl # encoding: [0xc3]
1176;
1177; X64-LABEL: test_mask_pcmpgt_w_128:
1178; X64:       # %bb.0:
1179; X64-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
1180; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1181; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
1182; X64-NEXT:    # kill: def $al killed $al killed $eax
1183; X64-NEXT:    retq # encoding: [0xc3]
1184  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
1185  ret i8 %res
1186}
1187
1188declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8)
1189
1190declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
1191
1192define <16 x i8>@test_int_x86_avx512_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
1193; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_128:
1194; CHECK:       # %bb.0:
1195; CHECK-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1]
1196; CHECK-NEXT:    # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1197; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1198  %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
1199  ret <16 x i8> %res
1200}
1201
1202define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1203; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
1204; X86:       # %bb.0:
1205; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1206; X86-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
1207; X86-NEXT:    # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1208; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1209; X86-NEXT:    retl # encoding: [0xc3]
1210;
1211; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
1212; X64:       # %bb.0:
1213; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1214; X64-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
1215; X64-NEXT:    # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
1216; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1217; X64-NEXT:    retq # encoding: [0xc3]
1218  %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
1219  ret <16 x i8> %res
1220}
1221
1222declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
1223
1224define <16 x i8>@test_int_x86_avx512_ask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
1225; CHECK-LABEL: test_int_x86_avx512_ask_punpcklb_w_128:
1226; CHECK:       # %bb.0:
1227; CHECK-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1]
1228; CHECK-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1229; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1230  %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
1231  ret <16 x i8> %res
1232}
1233
1234define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1235; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
1236; X86:       # %bb.0:
1237; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1238; X86-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
1239; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1240; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1241; X86-NEXT:    retl # encoding: [0xc3]
1242;
1243; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
1244; X64:       # %bb.0:
1245; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1246; X64-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
1247; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1248; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1249; X64-NEXT:    retq # encoding: [0xc3]
1250  %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
1251  ret <16 x i8> %res
1252}
1253
1254declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
1255
1256define <32 x i8>@test_int_x86_avx512_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
1257; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_256:
1258; CHECK:       # %bb.0:
1259; CHECK-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xc1]
1260; CHECK-NEXT:    # ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1261; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1262  %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
1263  ret <32 x i8> %res
1264}
1265
1266define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1267; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
1268; X86:       # %bb.0:
1269; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1270; X86-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
1271; X86-NEXT:    # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1272; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1273; X86-NEXT:    retl # encoding: [0xc3]
1274;
1275; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
1276; X64:       # %bb.0:
1277; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1278; X64-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
1279; X64-NEXT:    # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
1280; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1281; X64-NEXT:    retq # encoding: [0xc3]
1282  %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
1283  ret <32 x i8> %res
1284}
1285
1286declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
1287
1288define <32 x i8>@test_int_x86_avx512_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
1289; CHECK-LABEL: test_int_x86_avx512_punpcklb_w_256:
1290; CHECK:       # %bb.0:
1291; CHECK-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xc1]
1292; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1293; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1294  %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
1295  ret <32 x i8> %res
1296}
1297
1298define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1299; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
1300; X86:       # %bb.0:
1301; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1302; X86-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
1303; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1304; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1305; X86-NEXT:    retl # encoding: [0xc3]
1306;
1307; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
1308; X64:       # %bb.0:
1309; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1310; X64-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
1311; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
1312; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1313; X64-NEXT:    retq # encoding: [0xc3]
1314  %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
1315  ret <32 x i8> %res
1316}
1317
1318declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1319
1320define <8 x i16>@test_int_x86_avx512_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1321; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_128:
1322; CHECK:       # %bb.0:
1323; CHECK-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1]
1324; CHECK-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1325; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1326  %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1327  ret <8 x i16> %res
1328}
1329
1330define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1331; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
1332; X86:       # %bb.0:
1333; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1334; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1335; X86-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
1336; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1337; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1338; X86-NEXT:    retl # encoding: [0xc3]
1339;
1340; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
1341; X64:       # %bb.0:
1342; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1343; X64-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
1344; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1345; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1346; X64-NEXT:    retq # encoding: [0xc3]
1347  %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1348  ret <8 x i16> %res
1349}
1350
1351declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1352
1353define <8 x i16>@test_int_x86_avx512_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1354; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_128:
1355; CHECK:       # %bb.0:
1356; CHECK-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1]
1357; CHECK-NEXT:    # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1358; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1359  %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1360  ret <8 x i16> %res
1361}
1362
1363define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1364; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
1365; X86:       # %bb.0:
1366; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1367; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1368; X86-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
1369; X86-NEXT:    # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1370; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1371; X86-NEXT:    retl # encoding: [0xc3]
1372;
1373; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
1374; X64:       # %bb.0:
1375; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1376; X64-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
1377; X64-NEXT:    # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1378; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1379; X64-NEXT:    retq # encoding: [0xc3]
1380  %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1381  ret <8 x i16> %res
1382}
1383
1384declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1385
1386define <16 x i16>@test_int_x86_avx512_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1387; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_256:
1388; CHECK:       # %bb.0:
1389; CHECK-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xc1]
1390; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1391; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1392  %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1393  ret <16 x i16> %res
1394}
1395
1396define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1397; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
1398; X86:       # %bb.0:
1399; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1400; X86-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
1401; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1402; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1403; X86-NEXT:    retl # encoding: [0xc3]
1404;
1405; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
1406; X64:       # %bb.0:
1407; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1408; X64-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
1409; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
1410; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1411; X64-NEXT:    retq # encoding: [0xc3]
1412  %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1413  ret <16 x i16> %res
1414}
1415
1416declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1417
1418define <16 x i16>@test_int_x86_avx512_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1419; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_256:
1420; CHECK:       # %bb.0:
1421; CHECK-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xc1]
1422; CHECK-NEXT:    # ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1423; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1424  %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1425  ret <16 x i16> %res
1426}
1427
1428define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1429; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
1430; X86:       # %bb.0:
1431; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1432; X86-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
1433; X86-NEXT:    # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1434; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1435; X86-NEXT:    retl # encoding: [0xc3]
1436;
1437; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
1438; X64:       # %bb.0:
1439; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1440; X64-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
1441; X64-NEXT:    # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
1442; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1443; X64-NEXT:    retq # encoding: [0xc3]
1444  %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1445  ret <16 x i16> %res
1446}
1447
1448define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1449; CHECK-LABEL: test_mask_add_epi16_rr_128:
1450; CHECK:       # %bb.0:
1451; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
1452; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1453  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1454  ret <8 x i16> %res
1455}
1456
1457define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1458; X86-LABEL: test_mask_add_epi16_rrk_128:
1459; X86:       # %bb.0:
1460; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1461; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1462; X86-NEXT:    vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
1463; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1464; X86-NEXT:    retl # encoding: [0xc3]
1465;
1466; X64-LABEL: test_mask_add_epi16_rrk_128:
1467; X64:       # %bb.0:
1468; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1469; X64-NEXT:    vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
1470; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1471; X64-NEXT:    retq # encoding: [0xc3]
1472  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1473  ret <8 x i16> %res
1474}
1475
1476define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1477; X86-LABEL: test_mask_add_epi16_rrkz_128:
1478; X86:       # %bb.0:
1479; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1480; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1481; X86-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
1482; X86-NEXT:    retl # encoding: [0xc3]
1483;
1484; X64-LABEL: test_mask_add_epi16_rrkz_128:
1485; X64:       # %bb.0:
1486; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1487; X64-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
1488; X64-NEXT:    retq # encoding: [0xc3]
1489  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1490  ret <8 x i16> %res
1491}
1492
1493define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1494; X86-LABEL: test_mask_add_epi16_rm_128:
1495; X86:       # %bb.0:
1496; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1497; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x00]
1498; X86-NEXT:    retl # encoding: [0xc3]
1499;
1500; X64-LABEL: test_mask_add_epi16_rm_128:
1501; X64:       # %bb.0:
1502; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x07]
1503; X64-NEXT:    retq # encoding: [0xc3]
1504  %b = load <8 x i16>, <8 x i16>* %ptr_b
1505  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1506  ret <8 x i16> %res
1507}
1508
1509define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1510; X86-LABEL: test_mask_add_epi16_rmk_128:
1511; X86:       # %bb.0:
1512; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1513; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1514; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1515; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x08]
1516; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1517; X86-NEXT:    retl # encoding: [0xc3]
1518;
1519; X64-LABEL: test_mask_add_epi16_rmk_128:
1520; X64:       # %bb.0:
1521; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1522; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
1523; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1524; X64-NEXT:    retq # encoding: [0xc3]
1525  %b = load <8 x i16>, <8 x i16>* %ptr_b
1526  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1527  ret <8 x i16> %res
1528}
1529
1530define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
1531; X86-LABEL: test_mask_add_epi16_rmkz_128:
1532; X86:       # %bb.0:
1533; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1534; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1535; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1536; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x00]
1537; X86-NEXT:    retl # encoding: [0xc3]
1538;
1539; X64-LABEL: test_mask_add_epi16_rmkz_128:
1540; X64:       # %bb.0:
1541; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1542; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
1543; X64-NEXT:    retq # encoding: [0xc3]
1544  %b = load <8 x i16>, <8 x i16>* %ptr_b
1545  %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1546  ret <8 x i16> %res
1547}
1548
1549declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1550
1551define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1552; CHECK-LABEL: test_mask_add_epi16_rr_256:
1553; CHECK:       # %bb.0:
1554; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
1555; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1556  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1557  ret <16 x i16> %res
1558}
1559
1560define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1561; X86-LABEL: test_mask_add_epi16_rrk_256:
1562; X86:       # %bb.0:
1563; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1564; X86-NEXT:    vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
1565; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1566; X86-NEXT:    retl # encoding: [0xc3]
1567;
1568; X64-LABEL: test_mask_add_epi16_rrk_256:
1569; X64:       # %bb.0:
1570; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1571; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
1572; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1573; X64-NEXT:    retq # encoding: [0xc3]
1574  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1575  ret <16 x i16> %res
1576}
1577
1578define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1579; X86-LABEL: test_mask_add_epi16_rrkz_256:
1580; X86:       # %bb.0:
1581; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1582; X86-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
1583; X86-NEXT:    retl # encoding: [0xc3]
1584;
1585; X64-LABEL: test_mask_add_epi16_rrkz_256:
1586; X64:       # %bb.0:
1587; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1588; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
1589; X64-NEXT:    retq # encoding: [0xc3]
1590  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1591  ret <16 x i16> %res
1592}
1593
1594define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1595; X86-LABEL: test_mask_add_epi16_rm_256:
1596; X86:       # %bb.0:
1597; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1598; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x00]
1599; X86-NEXT:    retl # encoding: [0xc3]
1600;
1601; X64-LABEL: test_mask_add_epi16_rm_256:
1602; X64:       # %bb.0:
1603; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x07]
1604; X64-NEXT:    retq # encoding: [0xc3]
1605  %b = load <16 x i16>, <16 x i16>* %ptr_b
1606  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1607  ret <16 x i16> %res
1608}
1609
1610define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1611; X86-LABEL: test_mask_add_epi16_rmk_256:
1612; X86:       # %bb.0:
1613; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1614; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1615; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x08]
1616; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1617; X86-NEXT:    retl # encoding: [0xc3]
1618;
1619; X64-LABEL: test_mask_add_epi16_rmk_256:
1620; X64:       # %bb.0:
1621; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1622; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
1623; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1624; X64-NEXT:    retq # encoding: [0xc3]
1625  %b = load <16 x i16>, <16 x i16>* %ptr_b
1626  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1627  ret <16 x i16> %res
1628}
1629
1630define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
1631; X86-LABEL: test_mask_add_epi16_rmkz_256:
1632; X86:       # %bb.0:
1633; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1634; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1635; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x00]
1636; X86-NEXT:    retl # encoding: [0xc3]
1637;
1638; X64-LABEL: test_mask_add_epi16_rmkz_256:
1639; X64:       # %bb.0:
1640; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1641; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
1642; X64-NEXT:    retq # encoding: [0xc3]
1643  %b = load <16 x i16>, <16 x i16>* %ptr_b
1644  %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1645  ret <16 x i16> %res
1646}
1647
1648declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1649
1650define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
1651; CHECK-LABEL: test_mask_sub_epi16_rr_128:
1652; CHECK:       # %bb.0:
1653; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1]
1654; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1655  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1656  ret <8 x i16> %res
1657}
1658
1659define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
1660; X86-LABEL: test_mask_sub_epi16_rrk_128:
1661; X86:       # %bb.0:
1662; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1663; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1664; X86-NEXT:    vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
1665; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1666; X86-NEXT:    retl # encoding: [0xc3]
1667;
1668; X64-LABEL: test_mask_sub_epi16_rrk_128:
1669; X64:       # %bb.0:
1670; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1671; X64-NEXT:    vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
1672; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1673; X64-NEXT:    retq # encoding: [0xc3]
1674  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1675  ret <8 x i16> %res
1676}
1677
1678define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
1679; X86-LABEL: test_mask_sub_epi16_rrkz_128:
1680; X86:       # %bb.0:
1681; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1682; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1683; X86-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
1684; X86-NEXT:    retl # encoding: [0xc3]
1685;
1686; X64-LABEL: test_mask_sub_epi16_rrkz_128:
1687; X64:       # %bb.0:
1688; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1689; X64-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
1690; X64-NEXT:    retq # encoding: [0xc3]
1691  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1692  ret <8 x i16> %res
1693}
1694
1695define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
1696; X86-LABEL: test_mask_sub_epi16_rm_128:
1697; X86:       # %bb.0:
1698; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1699; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x00]
1700; X86-NEXT:    retl # encoding: [0xc3]
1701;
1702; X64-LABEL: test_mask_sub_epi16_rm_128:
1703; X64:       # %bb.0:
1704; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x07]
1705; X64-NEXT:    retq # encoding: [0xc3]
1706  %b = load <8 x i16>, <8 x i16>* %ptr_b
1707  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
1708  ret <8 x i16> %res
1709}
1710
1711define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
1712; X86-LABEL: test_mask_sub_epi16_rmk_128:
1713; X86:       # %bb.0:
1714; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1715; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1716; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1717; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x08]
1718; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1719; X86-NEXT:    retl # encoding: [0xc3]
1720;
1721; X64-LABEL: test_mask_sub_epi16_rmk_128:
1722; X64:       # %bb.0:
1723; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1724; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
1725; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1726; X64-NEXT:    retq # encoding: [0xc3]
1727  %b = load <8 x i16>, <8 x i16>* %ptr_b
1728  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
1729  ret <8 x i16> %res
1730}
1731
1732define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
1733; X86-LABEL: test_mask_sub_epi16_rmkz_128:
1734; X86:       # %bb.0:
1735; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1736; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1737; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1738; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x00]
1739; X86-NEXT:    retl # encoding: [0xc3]
1740;
1741; X64-LABEL: test_mask_sub_epi16_rmkz_128:
1742; X64:       # %bb.0:
1743; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1744; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
1745; X64-NEXT:    retq # encoding: [0xc3]
1746  %b = load <8 x i16>, <8 x i16>* %ptr_b
1747  %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
1748  ret <8 x i16> %res
1749}
1750
1751declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1752
1753define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1754; CHECK-LABEL: test_mask_sub_epi16_rr_256:
1755; CHECK:       # %bb.0:
1756; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1]
1757; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1758  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1759  ret <16 x i16> %res
1760}
1761
1762define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
1763; X86-LABEL: test_mask_sub_epi16_rrk_256:
1764; X86:       # %bb.0:
1765; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1766; X86-NEXT:    vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
1767; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1768; X86-NEXT:    retl # encoding: [0xc3]
1769;
1770; X64-LABEL: test_mask_sub_epi16_rrk_256:
1771; X64:       # %bb.0:
1772; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1773; X64-NEXT:    vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
1774; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1775; X64-NEXT:    retq # encoding: [0xc3]
1776  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1777  ret <16 x i16> %res
1778}
1779
1780define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
1781; X86-LABEL: test_mask_sub_epi16_rrkz_256:
1782; X86:       # %bb.0:
1783; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1784; X86-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
1785; X86-NEXT:    retl # encoding: [0xc3]
1786;
1787; X64-LABEL: test_mask_sub_epi16_rrkz_256:
1788; X64:       # %bb.0:
1789; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1790; X64-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
1791; X64-NEXT:    retq # encoding: [0xc3]
1792  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1793  ret <16 x i16> %res
1794}
1795
1796define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1797; X86-LABEL: test_mask_sub_epi16_rm_256:
1798; X86:       # %bb.0:
1799; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1800; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x00]
1801; X86-NEXT:    retl # encoding: [0xc3]
1802;
1803; X64-LABEL: test_mask_sub_epi16_rm_256:
1804; X64:       # %bb.0:
1805; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x07]
1806; X64-NEXT:    retq # encoding: [0xc3]
1807  %b = load <16 x i16>, <16 x i16>* %ptr_b
1808  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
1809  ret <16 x i16> %res
1810}
1811
1812define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
1813; X86-LABEL: test_mask_sub_epi16_rmk_256:
1814; X86:       # %bb.0:
1815; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1816; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1817; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x08]
1818; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1819; X86-NEXT:    retl # encoding: [0xc3]
1820;
1821; X64-LABEL: test_mask_sub_epi16_rmk_256:
1822; X64:       # %bb.0:
1823; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1824; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
1825; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1826; X64-NEXT:    retq # encoding: [0xc3]
1827  %b = load <16 x i16>, <16 x i16>* %ptr_b
1828  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
1829  ret <16 x i16> %res
1830}
1831
1832define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
1833; X86-LABEL: test_mask_sub_epi16_rmkz_256:
1834; X86:       # %bb.0:
1835; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1836; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1837; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x00]
1838; X86-NEXT:    retl # encoding: [0xc3]
1839;
1840; X64-LABEL: test_mask_sub_epi16_rmkz_256:
1841; X64:       # %bb.0:
1842; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1843; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
1844; X64-NEXT:    retq # encoding: [0xc3]
1845  %b = load <16 x i16>, <16 x i16>* %ptr_b
1846  %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
1847  ret <16 x i16> %res
1848}
1849
1850declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1851
1852define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1853; CHECK-LABEL: test_mask_add_epi16_rr_512:
1854; CHECK:       # %bb.0:
1855; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
1856; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1857  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1858  ret <32 x i16> %res
1859}
1860
1861define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1862; X86-LABEL: test_mask_add_epi16_rrk_512:
1863; X86:       # %bb.0:
1864; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1865; X86-NEXT:    vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
1866; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1867; X86-NEXT:    retl # encoding: [0xc3]
1868;
1869; X64-LABEL: test_mask_add_epi16_rrk_512:
1870; X64:       # %bb.0:
1871; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1872; X64-NEXT:    vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
1873; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1874; X64-NEXT:    retq # encoding: [0xc3]
1875  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1876  ret <32 x i16> %res
1877}
1878
1879define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1880; X86-LABEL: test_mask_add_epi16_rrkz_512:
1881; X86:       # %bb.0:
1882; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1883; X86-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
1884; X86-NEXT:    retl # encoding: [0xc3]
1885;
1886; X64-LABEL: test_mask_add_epi16_rrkz_512:
1887; X64:       # %bb.0:
1888; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1889; X64-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
1890; X64-NEXT:    retq # encoding: [0xc3]
1891  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1892  ret <32 x i16> %res
1893}
1894
1895define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1896; X86-LABEL: test_mask_add_epi16_rm_512:
1897; X86:       # %bb.0:
1898; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1899; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x00]
1900; X86-NEXT:    retl # encoding: [0xc3]
1901;
1902; X64-LABEL: test_mask_add_epi16_rm_512:
1903; X64:       # %bb.0:
1904; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
1905; X64-NEXT:    retq # encoding: [0xc3]
1906  %b = load <32 x i16>, <32 x i16>* %ptr_b
1907  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1908  ret <32 x i16> %res
1909}
1910
1911define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1912; X86-LABEL: test_mask_add_epi16_rmk_512:
1913; X86:       # %bb.0:
1914; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1915; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1916; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x08]
1917; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1918; X86-NEXT:    retl # encoding: [0xc3]
1919;
1920; X64-LABEL: test_mask_add_epi16_rmk_512:
1921; X64:       # %bb.0:
1922; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1923; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
1924; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1925; X64-NEXT:    retq # encoding: [0xc3]
1926  %b = load <32 x i16>, <32 x i16>* %ptr_b
1927  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1928  ret <32 x i16> %res
1929}
1930
1931define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1932; X86-LABEL: test_mask_add_epi16_rmkz_512:
1933; X86:       # %bb.0:
1934; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1935; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1936; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x00]
1937; X86-NEXT:    retl # encoding: [0xc3]
1938;
1939; X64-LABEL: test_mask_add_epi16_rmkz_512:
1940; X64:       # %bb.0:
1941; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1942; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
1943; X64-NEXT:    retq # encoding: [0xc3]
1944  %b = load <32 x i16>, <32 x i16>* %ptr_b
1945  %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1946  ret <32 x i16> %res
1947}
1948
1949declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1950
1951define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1952; CHECK-LABEL: test_mask_sub_epi16_rr_512:
1953; CHECK:       # %bb.0:
1954; CHECK-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
1955; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1956  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1957  ret <32 x i16> %res
1958}
1959
1960define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1961; X86-LABEL: test_mask_sub_epi16_rrk_512:
1962; X86:       # %bb.0:
1963; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1964; X86-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
1965; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1966; X86-NEXT:    retl # encoding: [0xc3]
1967;
1968; X64-LABEL: test_mask_sub_epi16_rrk_512:
1969; X64:       # %bb.0:
1970; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1971; X64-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
1972; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1973; X64-NEXT:    retq # encoding: [0xc3]
1974  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1975  ret <32 x i16> %res
1976}
1977
1978define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1979; X86-LABEL: test_mask_sub_epi16_rrkz_512:
1980; X86:       # %bb.0:
1981; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1982; X86-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
1983; X86-NEXT:    retl # encoding: [0xc3]
1984;
1985; X64-LABEL: test_mask_sub_epi16_rrkz_512:
1986; X64:       # %bb.0:
1987; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1988; X64-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
1989; X64-NEXT:    retq # encoding: [0xc3]
1990  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1991  ret <32 x i16> %res
1992}
1993
1994define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1995; X86-LABEL: test_mask_sub_epi16_rm_512:
1996; X86:       # %bb.0:
1997; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1998; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x00]
1999; X86-NEXT:    retl # encoding: [0xc3]
2000;
2001; X64-LABEL: test_mask_sub_epi16_rm_512:
2002; X64:       # %bb.0:
2003; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
2004; X64-NEXT:    retq # encoding: [0xc3]
2005  %b = load <32 x i16>, <32 x i16>* %ptr_b
2006  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2007  ret <32 x i16> %res
2008}
2009
2010define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
2011; X86-LABEL: test_mask_sub_epi16_rmk_512:
2012; X86:       # %bb.0:
2013; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2014; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2015; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x08]
2016; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2017; X86-NEXT:    retl # encoding: [0xc3]
2018;
2019; X64-LABEL: test_mask_sub_epi16_rmk_512:
2020; X64:       # %bb.0:
2021; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2022; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
2023; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2024; X64-NEXT:    retq # encoding: [0xc3]
2025  %b = load <32 x i16>, <32 x i16>* %ptr_b
2026  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2027  ret <32 x i16> %res
2028}
2029
2030define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
2031; X86-LABEL: test_mask_sub_epi16_rmkz_512:
2032; X86:       # %bb.0:
2033; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2034; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2035; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x00]
2036; X86-NEXT:    retl # encoding: [0xc3]
2037;
2038; X64-LABEL: test_mask_sub_epi16_rmkz_512:
2039; X64:       # %bb.0:
2040; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2041; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
2042; X64-NEXT:    retq # encoding: [0xc3]
2043  %b = load <32 x i16>, <32 x i16>* %ptr_b
2044  %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2045  ret <32 x i16> %res
2046}
2047
2048declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2049
2050define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
2051; CHECK-LABEL: test_mask_mullo_epi16_rr_512:
2052; CHECK:       # %bb.0:
2053; CHECK-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
2054; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2055  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2056  ret <32 x i16> %res
2057}
2058
2059define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
2060; X86-LABEL: test_mask_mullo_epi16_rrk_512:
2061; X86:       # %bb.0:
2062; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2063; X86-NEXT:    vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
2064; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2065; X86-NEXT:    retl # encoding: [0xc3]
2066;
2067; X64-LABEL: test_mask_mullo_epi16_rrk_512:
2068; X64:       # %bb.0:
2069; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2070; X64-NEXT:    vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
2071; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2072; X64-NEXT:    retq # encoding: [0xc3]
2073  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2074  ret <32 x i16> %res
2075}
2076
2077define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
2078; X86-LABEL: test_mask_mullo_epi16_rrkz_512:
2079; X86:       # %bb.0:
2080; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2081; X86-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
2082; X86-NEXT:    retl # encoding: [0xc3]
2083;
2084; X64-LABEL: test_mask_mullo_epi16_rrkz_512:
2085; X64:       # %bb.0:
2086; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2087; X64-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
2088; X64-NEXT:    retq # encoding: [0xc3]
2089  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2090  ret <32 x i16> %res
2091}
2092
2093define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
2094; X86-LABEL: test_mask_mullo_epi16_rm_512:
2095; X86:       # %bb.0:
2096; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2097; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x00]
2098; X86-NEXT:    retl # encoding: [0xc3]
2099;
2100; X64-LABEL: test_mask_mullo_epi16_rm_512:
2101; X64:       # %bb.0:
2102; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
2103; X64-NEXT:    retq # encoding: [0xc3]
2104  %b = load <32 x i16>, <32 x i16>* %ptr_b
2105  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2106  ret <32 x i16> %res
2107}
2108
2109define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
2110; X86-LABEL: test_mask_mullo_epi16_rmk_512:
2111; X86:       # %bb.0:
2112; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2113; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2114; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x08]
2115; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2116; X86-NEXT:    retl # encoding: [0xc3]
2117;
2118; X64-LABEL: test_mask_mullo_epi16_rmk_512:
2119; X64:       # %bb.0:
2120; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2121; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
2122; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2123; X64-NEXT:    retq # encoding: [0xc3]
2124  %b = load <32 x i16>, <32 x i16>* %ptr_b
2125  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2126  ret <32 x i16> %res
2127}
2128
2129define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
2130; X86-LABEL: test_mask_mullo_epi16_rmkz_512:
2131; X86:       # %bb.0:
2132; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2133; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2134; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x00]
2135; X86-NEXT:    retl # encoding: [0xc3]
2136;
2137; X64-LABEL: test_mask_mullo_epi16_rmkz_512:
2138; X64:       # %bb.0:
2139; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2140; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
2141; X64-NEXT:    retq # encoding: [0xc3]
2142  %b = load <32 x i16>, <32 x i16>* %ptr_b
2143  %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2144  ret <32 x i16> %res
2145}
2146
2147declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2148
2149define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
2150; CHECK-LABEL: test_mask_mullo_epi16_rr_128:
2151; CHECK:       # %bb.0:
2152; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1]
2153; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2154  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2155  ret <8 x i16> %res
2156}
2157
2158define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
2159; X86-LABEL: test_mask_mullo_epi16_rrk_128:
2160; X86:       # %bb.0:
2161; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2162; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2163; X86-NEXT:    vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
2164; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2165; X86-NEXT:    retl # encoding: [0xc3]
2166;
2167; X64-LABEL: test_mask_mullo_epi16_rrk_128:
2168; X64:       # %bb.0:
2169; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2170; X64-NEXT:    vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
2171; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2172; X64-NEXT:    retq # encoding: [0xc3]
2173  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2174  ret <8 x i16> %res
2175}
2176
2177define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
2178; X86-LABEL: test_mask_mullo_epi16_rrkz_128:
2179; X86:       # %bb.0:
2180; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2181; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2182; X86-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
2183; X86-NEXT:    retl # encoding: [0xc3]
2184;
2185; X64-LABEL: test_mask_mullo_epi16_rrkz_128:
2186; X64:       # %bb.0:
2187; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2188; X64-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
2189; X64-NEXT:    retq # encoding: [0xc3]
2190  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2191  ret <8 x i16> %res
2192}
2193
2194define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
2195; X86-LABEL: test_mask_mullo_epi16_rm_128:
2196; X86:       # %bb.0:
2197; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2198; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x00]
2199; X86-NEXT:    retl # encoding: [0xc3]
2200;
2201; X64-LABEL: test_mask_mullo_epi16_rm_128:
2202; X64:       # %bb.0:
2203; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x07]
2204; X64-NEXT:    retq # encoding: [0xc3]
2205  %b = load <8 x i16>, <8 x i16>* %ptr_b
2206  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
2207  ret <8 x i16> %res
2208}
2209
2210define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
2211; X86-LABEL: test_mask_mullo_epi16_rmk_128:
2212; X86:       # %bb.0:
2213; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2214; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2215; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2216; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x08]
2217; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2218; X86-NEXT:    retl # encoding: [0xc3]
2219;
2220; X64-LABEL: test_mask_mullo_epi16_rmk_128:
2221; X64:       # %bb.0:
2222; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2223; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
2224; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2225; X64-NEXT:    retq # encoding: [0xc3]
2226  %b = load <8 x i16>, <8 x i16>* %ptr_b
2227  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
2228  ret <8 x i16> %res
2229}
2230
2231define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
2232; X86-LABEL: test_mask_mullo_epi16_rmkz_128:
2233; X86:       # %bb.0:
2234; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2235; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2236; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2237; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x00]
2238; X86-NEXT:    retl # encoding: [0xc3]
2239;
2240; X64-LABEL: test_mask_mullo_epi16_rmkz_128:
2241; X64:       # %bb.0:
2242; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2243; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
2244; X64-NEXT:    retq # encoding: [0xc3]
2245  %b = load <8 x i16>, <8 x i16>* %ptr_b
2246  %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
2247  ret <8 x i16> %res
2248}
2249
2250declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2251
2252define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
2253; CHECK-LABEL: test_mask_mullo_epi16_rr_256:
2254; CHECK:       # %bb.0:
2255; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0xc1]
2256; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2257  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2258  ret <16 x i16> %res
2259}
2260
2261define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
2262; X86-LABEL: test_mask_mullo_epi16_rrk_256:
2263; X86:       # %bb.0:
2264; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2265; X86-NEXT:    vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
2266; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2267; X86-NEXT:    retl # encoding: [0xc3]
2268;
2269; X64-LABEL: test_mask_mullo_epi16_rrk_256:
2270; X64:       # %bb.0:
2271; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2272; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
2273; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2274; X64-NEXT:    retq # encoding: [0xc3]
2275  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2276  ret <16 x i16> %res
2277}
2278
2279define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
2280; X86-LABEL: test_mask_mullo_epi16_rrkz_256:
2281; X86:       # %bb.0:
2282; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2283; X86-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
2284; X86-NEXT:    retl # encoding: [0xc3]
2285;
2286; X64-LABEL: test_mask_mullo_epi16_rrkz_256:
2287; X64:       # %bb.0:
2288; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2289; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
2290; X64-NEXT:    retq # encoding: [0xc3]
2291  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2292  ret <16 x i16> %res
2293}
2294
2295define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
2296; X86-LABEL: test_mask_mullo_epi16_rm_256:
2297; X86:       # %bb.0:
2298; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2299; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x00]
2300; X86-NEXT:    retl # encoding: [0xc3]
2301;
2302; X64-LABEL: test_mask_mullo_epi16_rm_256:
2303; X64:       # %bb.0:
2304; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x07]
2305; X64-NEXT:    retq # encoding: [0xc3]
2306  %b = load <16 x i16>, <16 x i16>* %ptr_b
2307  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
2308  ret <16 x i16> %res
2309}
2310
2311define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
2312; X86-LABEL: test_mask_mullo_epi16_rmk_256:
2313; X86:       # %bb.0:
2314; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2315; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2316; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x08]
2317; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2318; X86-NEXT:    retl # encoding: [0xc3]
2319;
2320; X64-LABEL: test_mask_mullo_epi16_rmk_256:
2321; X64:       # %bb.0:
2322; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2323; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
2324; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2325; X64-NEXT:    retq # encoding: [0xc3]
2326  %b = load <16 x i16>, <16 x i16>* %ptr_b
2327  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
2328  ret <16 x i16> %res
2329}
2330
2331define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
2332; X86-LABEL: test_mask_mullo_epi16_rmkz_256:
2333; X86:       # %bb.0:
2334; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2335; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2336; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x00]
2337; X86-NEXT:    retl # encoding: [0xc3]
2338;
2339; X64-LABEL: test_mask_mullo_epi16_rmkz_256:
2340; X64:       # %bb.0:
2341; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2342; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
2343; X64-NEXT:    retq # encoding: [0xc3]
2344  %b = load <16 x i16>, <16 x i16>* %ptr_b
2345  %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
2346  ret <16 x i16> %res
2347}
2348
2349declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2350
2351declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2352
2353define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2354; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
2355; X86:       # %bb.0:
2356; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2357; X86-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
2358; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2359; X86-NEXT:    retl # encoding: [0xc3]
2360;
2361; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
2362; X64:       # %bb.0:
2363; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2364; X64-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
2365; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2366; X64-NEXT:    retq # encoding: [0xc3]
2367  %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask)
2368  ret <16 x i8> %res
2369}
2370
2371define <16 x i8>@test_int_x86_avx512_maskz_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2372; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128:
2373; X86:       # %bb.0:
2374; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2375; X86-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
2376; X86-NEXT:    retl # encoding: [0xc3]
2377;
2378; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_b_128:
2379; X64:       # %bb.0:
2380; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2381; X64-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
2382; X64-NEXT:    retq # encoding: [0xc3]
2383  %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2384  ret <16 x i8> %res
2385}
2386
2387declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2388
2389define <32 x i8>@test_int_x86_avx512_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2390; CHECK-LABEL: test_int_x86_avx512_pmaxs_b_256:
2391; CHECK:       # %bb.0:
2392; CHECK-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
2393; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2394  %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2395  ret <32 x i8> %res
2396}
2397
2398define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2399; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
2400; X86:       # %bb.0:
2401; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2402; X86-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
2403; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2404; X86-NEXT:    retl # encoding: [0xc3]
2405;
2406; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
2407; X64:       # %bb.0:
2408; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2409; X64-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
2410; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2411; X64-NEXT:    retq # encoding: [0xc3]
2412  %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2413  ret <32 x i8> %res
2414}
2415
2416declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2417
2418define <8 x i16>@test_int_x86_avx512_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2419; CHECK-LABEL: test_int_x86_avx512_pmaxs_w_128:
2420; CHECK:       # %bb.0:
2421; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
2422; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2423  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2424  ret <8 x i16> %res
2425}
2426
2427define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2428; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
2429; X86:       # %bb.0:
2430; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2431; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2432; X86-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
2433; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2434; X86-NEXT:    retl # encoding: [0xc3]
2435;
2436; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
2437; X64:       # %bb.0:
2438; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2439; X64-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
2440; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2441; X64-NEXT:    retq # encoding: [0xc3]
2442  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2443  ret <8 x i16> %res
2444}
2445
2446declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2447
2448define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2449; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
2450; X86:       # %bb.0:
2451; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2452; X86-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
2453; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2454; X86-NEXT:    retl # encoding: [0xc3]
2455;
2456; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
2457; X64:       # %bb.0:
2458; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2459; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
2460; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2461; X64-NEXT:    retq # encoding: [0xc3]
2462  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2463  ret <16 x i16> %res
2464}
2465
2466define <16 x i16>@test_int_x86_avx512_maskz_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2467; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256:
2468; X86:       # %bb.0:
2469; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2470; X86-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
2471; X86-NEXT:    retl # encoding: [0xc3]
2472;
2473; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_w_256:
2474; X64:       # %bb.0:
2475; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2476; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
2477; X64-NEXT:    retq # encoding: [0xc3]
2478  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2479  ret <16 x i16> %res
2480}
2481
2482declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2483
2484define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) {
2485; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
2486; X86:       # %bb.0:
2487; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2488; X86-NEXT:    vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
2489; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2490; X86-NEXT:    retl # encoding: [0xc3]
2491;
2492; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
2493; X64:       # %bb.0:
2494; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2495; X64-NEXT:    vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
2496; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2497; X64-NEXT:    retq # encoding: [0xc3]
2498  %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2499  ret <16 x i8> %res
2500}
2501
2502define <16 x i8>@test_int_x86_avx512_maskz_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2503; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128:
2504; X86:       # %bb.0:
2505; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2506; X86-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
2507; X86-NEXT:    retl # encoding: [0xc3]
2508;
2509; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_b_128:
2510; X64:       # %bb.0:
2511; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2512; X64-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
2513; X64-NEXT:    retq # encoding: [0xc3]
2514  %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2515  ret <16 x i8> %res
2516}
2517
2518declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2519
2520define <32 x i8>@test_int_x86_avx512_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2521; CHECK-LABEL: test_int_x86_avx512_pmaxu_b_256:
2522; CHECK:       # %bb.0:
2523; CHECK-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1]
2524; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2525  %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2526  ret <32 x i8> %res
2527}
2528
2529define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2530; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
2531; X86:       # %bb.0:
2532; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2533; X86-NEXT:    vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
2534; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2535; X86-NEXT:    retl # encoding: [0xc3]
2536;
2537; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
2538; X64:       # %bb.0:
2539; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2540; X64-NEXT:    vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
2541; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2542; X64-NEXT:    retq # encoding: [0xc3]
2543  %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2544  ret <32 x i8> %res
2545}
2546
2547declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2548
2549define <8 x i16>@test_int_x86_avx512_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2550; CHECK-LABEL: test_int_x86_avx512_pmaxu_w_128:
2551; CHECK:       # %bb.0:
2552; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
2553; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2554  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2555  ret <8 x i16> %res
2556}
2557
2558define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2559; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
2560; X86:       # %bb.0:
2561; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2562; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2563; X86-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
2564; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2565; X86-NEXT:    retl # encoding: [0xc3]
2566;
2567; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
2568; X64:       # %bb.0:
2569; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2570; X64-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
2571; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2572; X64-NEXT:    retq # encoding: [0xc3]
2573  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2574  ret <8 x i16> %res
2575}
2576
2577declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2578
2579define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2580; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
2581; X86:       # %bb.0:
2582; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2583; X86-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
2584; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2585; X86-NEXT:    retl # encoding: [0xc3]
2586;
2587; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
2588; X64:       # %bb.0:
2589; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2590; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
2591; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2592; X64-NEXT:    retq # encoding: [0xc3]
2593  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2594  ret <16 x i16> %res
2595}
2596
2597define <16 x i16>@test_int_x86_avx512_maskz_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2598; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256:
2599; X86:       # %bb.0:
2600; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2601; X86-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
2602; X86-NEXT:    retl # encoding: [0xc3]
2603;
2604; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_w_256:
2605; X64:       # %bb.0:
2606; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2607; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
2608; X64-NEXT:    retq # encoding: [0xc3]
2609  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2610  ret <16 x i16> %res
2611}
2612
2613declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2614
2615define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2616; X86-LABEL: test_int_x86_avx512_mask_pmins_b_128:
2617; X86:       # %bb.0:
2618; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2619; X86-NEXT:    vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
2620; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2621; X86-NEXT:    retl # encoding: [0xc3]
2622;
2623; X64-LABEL: test_int_x86_avx512_mask_pmins_b_128:
2624; X64:       # %bb.0:
2625; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2626; X64-NEXT:    vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
2627; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2628; X64-NEXT:    retq # encoding: [0xc3]
2629  %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2630  ret <16 x i8> %res
2631}
2632
2633define <16 x i8>@test_int_x86_avx512_maskz_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2634; X86-LABEL: test_int_x86_avx512_maskz_pmins_b_128:
2635; X86:       # %bb.0:
2636; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2637; X86-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
2638; X86-NEXT:    retl # encoding: [0xc3]
2639;
2640; X64-LABEL: test_int_x86_avx512_maskz_pmins_b_128:
2641; X64:       # %bb.0:
2642; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2643; X64-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
2644; X64-NEXT:    retq # encoding: [0xc3]
2645  %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2646  ret <16 x i8> %res
2647}
2648
2649declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2650
2651define <32 x i8>@test_int_x86_avx512_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2652; CHECK-LABEL: test_int_x86_avx512_pmins_b_256:
2653; CHECK:       # %bb.0:
2654; CHECK-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
2655; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2656  %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2657  ret <32 x i8> %res
2658}
2659
2660define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2661; X86-LABEL: test_int_x86_avx512_mask_pmins_b_256:
2662; X86:       # %bb.0:
2663; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2664; X86-NEXT:    vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
2665; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2666; X86-NEXT:    retl # encoding: [0xc3]
2667;
2668; X64-LABEL: test_int_x86_avx512_mask_pmins_b_256:
2669; X64:       # %bb.0:
2670; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2671; X64-NEXT:    vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
2672; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2673; X64-NEXT:    retq # encoding: [0xc3]
2674  %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2675  ret <32 x i8> %res
2676}
2677
2678declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2679
2680define <8 x i16>@test_int_x86_avx512_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2681; CHECK-LABEL: test_int_x86_avx512_pmins_w_128:
2682; CHECK:       # %bb.0:
2683; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
2684; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2685  %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2686  ret <8 x i16> %res
2687}
2688
2689define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2690; X86-LABEL: test_int_x86_avx512_mask_pmins_w_128:
2691; X86:       # %bb.0:
2692; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2693; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2694; X86-NEXT:    vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
2695; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2696; X86-NEXT:    retl # encoding: [0xc3]
2697;
2698; X64-LABEL: test_int_x86_avx512_mask_pmins_w_128:
2699; X64:       # %bb.0:
2700; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2701; X64-NEXT:    vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
2702; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2703; X64-NEXT:    retq # encoding: [0xc3]
2704  %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2705  ret <8 x i16> %res
2706}
2707
2708declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2709
2710define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2711; X86-LABEL: test_int_x86_avx512_mask_pmins_w_256:
2712; X86:       # %bb.0:
2713; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2714; X86-NEXT:    vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
2715; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2716; X86-NEXT:    retl # encoding: [0xc3]
2717;
2718; X64-LABEL: test_int_x86_avx512_mask_pmins_w_256:
2719; X64:       # %bb.0:
2720; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2721; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
2722; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2723; X64-NEXT:    retq # encoding: [0xc3]
2724  %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2725  ret <16 x i16> %res
2726}
2727
2728define <16 x i16>@test_int_x86_avx512_maskz_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2729; X86-LABEL: test_int_x86_avx512_maskz_pmins_w_256:
2730; X86:       # %bb.0:
2731; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2732; X86-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
2733; X86-NEXT:    retl # encoding: [0xc3]
2734;
2735; X64-LABEL: test_int_x86_avx512_maskz_pmins_w_256:
2736; X64:       # %bb.0:
2737; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2738; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
2739; X64-NEXT:    retq # encoding: [0xc3]
2740  %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2741  ret <16 x i16> %res
2742}
2743
2744declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
2745
2746define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
2747; X86-LABEL: test_int_x86_avx512_mask_pminu_b_128:
2748; X86:       # %bb.0:
2749; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2750; X86-NEXT:    vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
2751; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2752; X86-NEXT:    retl # encoding: [0xc3]
2753;
2754; X64-LABEL: test_int_x86_avx512_mask_pminu_b_128:
2755; X64:       # %bb.0:
2756; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2757; X64-NEXT:    vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
2758; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2759; X64-NEXT:    retq # encoding: [0xc3]
2760  %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
2761  ret <16 x i8> %res
2762}
2763
2764define <16 x i8>@test_int_x86_avx512_maskz_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
2765; X86-LABEL: test_int_x86_avx512_maskz_pminu_b_128:
2766; X86:       # %bb.0:
2767; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2768; X86-NEXT:    vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
2769; X86-NEXT:    retl # encoding: [0xc3]
2770;
2771; X64-LABEL: test_int_x86_avx512_maskz_pminu_b_128:
2772; X64:       # %bb.0:
2773; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2774; X64-NEXT:    vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
2775; X64-NEXT:    retq # encoding: [0xc3]
2776  %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
2777  ret <16 x i8> %res
2778}
2779
2780declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
2781
2782define <32 x i8>@test_int_x86_avx512_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
2783; CHECK-LABEL: test_int_x86_avx512_pminu_b_256:
2784; CHECK:       # %bb.0:
2785; CHECK-NEXT:    vpminub %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1]
2786; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2787  %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
2788  ret <32 x i8> %res
2789}
2790
2791define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
2792; X86-LABEL: test_int_x86_avx512_mask_pminu_b_256:
2793; X86:       # %bb.0:
2794; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2795; X86-NEXT:    vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
2796; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2797; X86-NEXT:    retl # encoding: [0xc3]
2798;
2799; X64-LABEL: test_int_x86_avx512_mask_pminu_b_256:
2800; X64:       # %bb.0:
2801; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2802; X64-NEXT:    vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
2803; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2804; X64-NEXT:    retq # encoding: [0xc3]
2805  %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
2806  ret <32 x i8> %res
2807}
2808
2809declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2810
2811define <8 x i16>@test_int_x86_avx512_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2812; CHECK-LABEL: test_int_x86_avx512_pminu_w_128:
2813; CHECK:       # %bb.0:
2814; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
2815; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2816  %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2817  ret <8 x i16> %res
2818}
2819
2820define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2821; X86-LABEL: test_int_x86_avx512_mask_pminu_w_128:
2822; X86:       # %bb.0:
2823; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2824; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2825; X86-NEXT:    vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
2826; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2827; X86-NEXT:    retl # encoding: [0xc3]
2828;
2829; X64-LABEL: test_int_x86_avx512_mask_pminu_w_128:
2830; X64:       # %bb.0:
2831; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2832; X64-NEXT:    vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
2833; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2834; X64-NEXT:    retq # encoding: [0xc3]
2835  %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2836  ret <8 x i16> %res
2837}
2838
2839declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2840
2841define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
2842; X86-LABEL: test_int_x86_avx512_mask_pminu_w_256:
2843; X86:       # %bb.0:
2844; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2845; X86-NEXT:    vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
2846; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2847; X86-NEXT:    retl # encoding: [0xc3]
2848;
2849; X64-LABEL: test_int_x86_avx512_mask_pminu_w_256:
2850; X64:       # %bb.0:
2851; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2852; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
2853; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2854; X64-NEXT:    retq # encoding: [0xc3]
2855  %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
2856  ret <16 x i16> %res
2857}
2858
2859define <16 x i16>@test_int_x86_avx512_maskz_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %mask) {
2860; X86-LABEL: test_int_x86_avx512_maskz_pminu_w_256:
2861; X86:       # %bb.0:
2862; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2863; X86-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
2864; X86-NEXT:    retl # encoding: [0xc3]
2865;
2866; X64-LABEL: test_int_x86_avx512_maskz_pminu_w_256:
2867; X64:       # %bb.0:
2868; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2869; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
2870; X64-NEXT:    retq # encoding: [0xc3]
2871  %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
2872  ret <16 x i16> %res
2873}
2874
2875declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2876
2877define <8 x i16>@test_int_x86_avx512_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2878; CHECK-LABEL: test_int_x86_avx512_psrl_w_128:
2879; CHECK:       # %bb.0:
2880; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
2881; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2882  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2883  ret <8 x i16> %res
2884}
2885
2886define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2887; X86-LABEL: test_int_x86_avx512_mask_psrl_w_128:
2888; X86:       # %bb.0:
2889; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2890; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2891; X86-NEXT:    vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
2892; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2893; X86-NEXT:    retl # encoding: [0xc3]
2894;
2895; X64-LABEL: test_int_x86_avx512_mask_psrl_w_128:
2896; X64:       # %bb.0:
2897; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2898; X64-NEXT:    vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
2899; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2900; X64-NEXT:    retq # encoding: [0xc3]
2901  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2902  ret <8 x i16> %res
2903}
2904
2905
2906define <8 x i16>@test_int_x86_avx512_maskz_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2907; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_128:
2908; X86:       # %bb.0:
2909; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2910; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2911; X86-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
2912; X86-NEXT:    retl # encoding: [0xc3]
2913;
2914; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_128:
2915; X64:       # %bb.0:
2916; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2917; X64-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
2918; X64-NEXT:    retq # encoding: [0xc3]
2919  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
2920  ret <8 x i16> %res
2921}
2922
2923declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
2924
2925define <16 x i16>@test_int_x86_avx512_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
2926; CHECK-LABEL: test_int_x86_avx512_psrl_w_256:
2927; CHECK:       # %bb.0:
2928; CHECK-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1]
2929; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2930  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
2931  ret <16 x i16> %res
2932}
2933
2934define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2935; X86-LABEL: test_int_x86_avx512_mask_psrl_w_256:
2936; X86:       # %bb.0:
2937; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2938; X86-NEXT:    vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
2939; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2940; X86-NEXT:    retl # encoding: [0xc3]
2941;
2942; X64-LABEL: test_int_x86_avx512_mask_psrl_w_256:
2943; X64:       # %bb.0:
2944; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2945; X64-NEXT:    vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
2946; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2947; X64-NEXT:    retq # encoding: [0xc3]
2948  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
2949  ret <16 x i16> %res
2950}
2951
2952define <16 x i16>@test_int_x86_avx512_maskz_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
2953; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_256:
2954; X86:       # %bb.0:
2955; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2956; X86-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
2957; X86-NEXT:    retl # encoding: [0xc3]
2958;
2959; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_256:
2960; X64:       # %bb.0:
2961; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2962; X64-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
2963; X64-NEXT:    retq # encoding: [0xc3]
2964  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
2965  ret <16 x i16> %res
2966}
2967
2968declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2969
2970define <8 x i16>@test_int_x86_avx512_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2971; CHECK-LABEL: test_int_x86_avx512_psra_w_128:
2972; CHECK:       # %bb.0:
2973; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
2974; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2975  %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2976  ret <8 x i16> %res
2977}
2978
2979define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2980; X86-LABEL: test_int_x86_avx512_mask_psra_w_128:
2981; X86:       # %bb.0:
2982; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2983; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2984; X86-NEXT:    vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
2985; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2986; X86-NEXT:    retl # encoding: [0xc3]
2987;
2988; X64-LABEL: test_int_x86_avx512_mask_psra_w_128:
2989; X64:       # %bb.0:
2990; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2991; X64-NEXT:    vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
2992; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2993; X64-NEXT:    retq # encoding: [0xc3]
2994  %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2995  ret <8 x i16> %res
2996}
2997
2998define <8 x i16>@test_int_x86_avx512_maskz_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2999; X86-LABEL: test_int_x86_avx512_maskz_psra_w_128:
3000; X86:       # %bb.0:
3001; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3002; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3003; X86-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
3004; X86-NEXT:    retl # encoding: [0xc3]
3005;
3006; X64-LABEL: test_int_x86_avx512_maskz_psra_w_128:
3007; X64:       # %bb.0:
3008; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3009; X64-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
3010; X64-NEXT:    retq # encoding: [0xc3]
3011  %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
3012  ret <8 x i16> %res
3013}
3014
3015declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
3016
3017define <16 x i16>@test_int_x86_avx512_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
3018; CHECK-LABEL: test_int_x86_avx512_psra_w_256:
3019; CHECK:       # %bb.0:
3020; CHECK-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1]
3021; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3022  %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
3023  ret <16 x i16> %res
3024}
3025
3026define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3027; X86-LABEL: test_int_x86_avx512_mask_psra_w_256:
3028; X86:       # %bb.0:
3029; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3030; X86-NEXT:    vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
3031; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3032; X86-NEXT:    retl # encoding: [0xc3]
3033;
3034; X64-LABEL: test_int_x86_avx512_mask_psra_w_256:
3035; X64:       # %bb.0:
3036; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3037; X64-NEXT:    vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
3038; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3039; X64-NEXT:    retq # encoding: [0xc3]
3040  %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
3041  ret <16 x i16> %res
3042}
3043
3044define <16 x i16>@test_int_x86_avx512_maskz_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
3045; X86-LABEL: test_int_x86_avx512_maskz_psra_w_256:
3046; X86:       # %bb.0:
3047; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3048; X86-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
3049; X86-NEXT:    retl # encoding: [0xc3]
3050;
3051; X64-LABEL: test_int_x86_avx512_maskz_psra_w_256:
3052; X64:       # %bb.0:
3053; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3054; X64-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
3055; X64-NEXT:    retq # encoding: [0xc3]
3056  %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
3057  ret <16 x i16> %res
3058}
3059
3060declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
3061
3062define <8 x i16>@test_int_x86_avx512_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
3063; CHECK-LABEL: test_int_x86_avx512_psll_w_128:
3064; CHECK:       # %bb.0:
3065; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
3066; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3067  %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
3068  ret <8 x i16> %res
3069}
3070
3071define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
3072; X86-LABEL: test_int_x86_avx512_mask_psll_w_128:
3073; X86:       # %bb.0:
3074; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3075; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3076; X86-NEXT:    vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
3077; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3078; X86-NEXT:    retl # encoding: [0xc3]
3079;
3080; X64-LABEL: test_int_x86_avx512_mask_psll_w_128:
3081; X64:       # %bb.0:
3082; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3083; X64-NEXT:    vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
3084; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3085; X64-NEXT:    retq # encoding: [0xc3]
3086  %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
3087  ret <8 x i16> %res
3088}
3089
3090define <8 x i16>@test_int_x86_avx512_maskz_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
3091; X86-LABEL: test_int_x86_avx512_maskz_psll_w_128:
3092; X86:       # %bb.0:
3093; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3094; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3095; X86-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
3096; X86-NEXT:    retl # encoding: [0xc3]
3097;
3098; X64-LABEL: test_int_x86_avx512_maskz_psll_w_128:
3099; X64:       # %bb.0:
3100; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3101; X64-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
3102; X64-NEXT:    retq # encoding: [0xc3]
3103  %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
3104  ret <8 x i16> %res
3105}
3106
3107declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
3108
3109define <16 x i16>@test_int_x86_avx512_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2) {
3110; CHECK-LABEL: test_int_x86_avx512_psll_w_256:
3111; CHECK:       # %bb.0:
3112; CHECK-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1]
3113; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3114  %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
3115  ret <16 x i16> %res
3116}
3117
3118define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
3119; X86-LABEL: test_int_x86_avx512_mask_psll_w_256:
3120; X86:       # %bb.0:
3121; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3122; X86-NEXT:    vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
3123; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3124; X86-NEXT:    retl # encoding: [0xc3]
3125;
3126; X64-LABEL: test_int_x86_avx512_mask_psll_w_256:
3127; X64:       # %bb.0:
3128; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3129; X64-NEXT:    vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
3130; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3131; X64-NEXT:    retq # encoding: [0xc3]
3132  %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
3133  ret <16 x i16> %res
3134}
3135
3136define <16 x i16>@test_int_x86_avx512_maskz_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, i16 %x3) {
3137; X86-LABEL: test_int_x86_avx512_maskz_psll_w_256:
3138; X86:       # %bb.0:
3139; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3140; X86-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
3141; X86-NEXT:    retl # encoding: [0xc3]
3142;
3143; X64-LABEL: test_int_x86_avx512_maskz_psll_w_256:
3144; X64:       # %bb.0:
3145; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3146; X64-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
3147; X64-NEXT:    retq # encoding: [0xc3]
3148  %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
3149  ret <16 x i16> %res
3150}
3151
3152declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3153
3154define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3155; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
3156; X86:       # %bb.0:
3157; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3158; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3159; X86-NEXT:    vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03]
3160; X86-NEXT:    vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04]
3161; X86-NEXT:    vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05]
3162; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
3163; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
3164; X86-NEXT:    retl # encoding: [0xc3]
3165;
3166; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
3167; X64:       # %bb.0:
3168; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3169; X64-NEXT:    vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03]
3170; X64-NEXT:    vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04]
3171; X64-NEXT:    vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05]
3172; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
3173; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
3174; X64-NEXT:    retq # encoding: [0xc3]
3175  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3176  %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 4, <8 x i16> %x2, i8 -1)
3177  %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 5, <8 x i16> zeroinitializer, i8 %x3)
3178  %res3 = add <8 x i16> %res, %res1
3179  %res4 = add <8 x i16> %res2, %res3
3180  ret <8 x i16> %res4
3181}
3182
3183declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3184
3185define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3186; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
3187; X86:       # %bb.0:
3188; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3189; X86-NEXT:    vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03]
3190; X86-NEXT:    vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04]
3191; X86-NEXT:    vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05]
3192; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
3193; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
3194; X86-NEXT:    retl # encoding: [0xc3]
3195;
3196; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
3197; X64:       # %bb.0:
3198; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3199; X64-NEXT:    vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03]
3200; X64-NEXT:    vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04]
3201; X64-NEXT:    vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05]
3202; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
3203; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
3204; X64-NEXT:    retq # encoding: [0xc3]
3205  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3206  %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 4, <16 x i16> %x2, i16 -1)
3207  %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 5, <16 x i16> zeroinitializer, i16 %x3)
3208  %res3 = add <16 x i16> %res, %res1
3209  %res4 = add <16 x i16> %res3, %res2
3210  ret <16 x i16> %res4
3211}
3212
3213declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3214
3215define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3216; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128:
3217; X86:       # %bb.0:
3218; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3219; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3220; X86-NEXT:    vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03]
3221; X86-NEXT:    vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04]
3222; X86-NEXT:    vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05]
3223; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
3224; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
3225; X86-NEXT:    retl # encoding: [0xc3]
3226;
3227; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128:
3228; X64:       # %bb.0:
3229; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3230; X64-NEXT:    vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03]
3231; X64-NEXT:    vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04]
3232; X64-NEXT:    vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05]
3233; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
3234; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
3235; X64-NEXT:    retq # encoding: [0xc3]
3236  %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3237  %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3)
3238  %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1)
3239  %res3 = add <8 x i16> %res, %res1
3240  %res4 = add <8 x i16> %res3, %res2
3241  ret <8 x i16> %res4
3242}
3243
3244declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3245
3246define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3247; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256:
3248; X86:       # %bb.0:
3249; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3250; X86-NEXT:    vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03]
3251; X86-NEXT:    vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04]
3252; X86-NEXT:    vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05]
3253; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
3254; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
3255; X86-NEXT:    retl # encoding: [0xc3]
3256;
3257; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256:
3258; X64:       # %bb.0:
3259; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3260; X64-NEXT:    vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03]
3261; X64-NEXT:    vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04]
3262; X64-NEXT:    vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05]
3263; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
3264; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
3265; X64-NEXT:    retq # encoding: [0xc3]
3266  %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3267  %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3)
3268  %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1)
3269  %res3 = add <16 x i16> %res, %res1
3270  %res4 = add <16 x i16> %res3, %res2
3271  ret <16 x i16> %res4
3272}
3273
3274declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8)
3275
3276define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
3277; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128:
3278; X86:       # %bb.0:
3279; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3280; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3281; X86-NEXT:    vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03]
3282; X86-NEXT:    vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04]
3283; X86-NEXT:    vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05]
3284; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
3285; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
3286; X86-NEXT:    retl # encoding: [0xc3]
3287;
3288; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128:
3289; X64:       # %bb.0:
3290; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3291; X64-NEXT:    vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03]
3292; X64-NEXT:    vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04]
3293; X64-NEXT:    vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05]
3294; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
3295; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
3296; X64-NEXT:    retq # encoding: [0xc3]
3297  %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
3298  %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3)
3299  %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1)
3300  %res3 = add <8 x i16> %res, %res1
3301  %res4 = add <8 x i16> %res3, %res2
3302  ret <8 x i16> %res4
3303}
3304
3305declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16)
3306
3307define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
3308; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256:
3309; X86:       # %bb.0:
3310; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3311; X86-NEXT:    vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03]
3312; X86-NEXT:    vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04]
3313; X86-NEXT:    vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05]
3314; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
3315; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
3316; X86-NEXT:    retl # encoding: [0xc3]
3317;
3318; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256:
3319; X64:       # %bb.0:
3320; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3321; X64-NEXT:    vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03]
3322; X64-NEXT:    vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04]
3323; X64-NEXT:    vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05]
3324; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
3325; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
3326; X64-NEXT:    retq # encoding: [0xc3]
3327  %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
3328  %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3)
3329  %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1)
3330  %res3 = add <16 x i16> %res, %res1
3331  %res4 = add <16 x i16> %res3, %res2
3332  ret <16 x i16> %res4
3333}
3334
3335declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
3336
3337define <16 x i8>@test_int_x86_avx512_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
3338; CHECK-LABEL: test_int_x86_avx512_pshuf_b_128:
3339; CHECK:       # %bb.0:
3340; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1]
3341; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3342  %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
3343  ret <16 x i8> %res
3344}
3345
3346define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
3347; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
3348; X86:       # %bb.0:
3349; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3350; X86-NEXT:    vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
3351; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3352; X86-NEXT:    retl # encoding: [0xc3]
3353;
3354; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
3355; X64:       # %bb.0:
3356; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3357; X64-NEXT:    vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
3358; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3359; X64-NEXT:    retq # encoding: [0xc3]
3360  %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
3361  ret <16 x i8> %res
3362}
3363
3364declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
3365
3366define <32 x i8>@test_int_x86_avx512_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
3367; CHECK-LABEL: test_int_x86_avx512_pshuf_b_256:
3368; CHECK:       # %bb.0:
3369; CHECK-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
3370; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3371  %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
3372  ret <32 x i8> %res
3373}
3374
3375define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
3376; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
3377; X86:       # %bb.0:
3378; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3379; X86-NEXT:    vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
3380; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3381; X86-NEXT:    retl # encoding: [0xc3]
3382;
3383; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
3384; X64:       # %bb.0:
3385; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3386; X64-NEXT:    vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
3387; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3388; X64-NEXT:    retq # encoding: [0xc3]
3389  %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
3390  ret <32 x i8> %res
3391}
3392
3393declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8)
3394
3395define <8 x i16>@test_int_x86_avx512_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1) {
3396; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_128:
3397; CHECK:       # %bb.0:
3398; CHECK-NEXT:    vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
3399; CHECK-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3400; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3401  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
3402  ret <8 x i16> %res
3403}
3404
3405define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
3406; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
3407; X86:       # %bb.0:
3408; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3409; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3410; X86-NEXT:    vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
3411; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3412; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3413; X86-NEXT:    retl # encoding: [0xc3]
3414;
3415; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
3416; X64:       # %bb.0:
3417; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3418; X64-NEXT:    vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
3419; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3420; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3421; X64-NEXT:    retq # encoding: [0xc3]
3422  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
3423  ret <8 x i16> %res
3424}
3425
3426define <8 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_128(<16 x i8> %x0, i8 %x2) {
3427; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128:
3428; X86:       # %bb.0:
3429; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3430; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3431; X86-NEXT:    vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
3432; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3433; X86-NEXT:    retl # encoding: [0xc3]
3434;
3435; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_128:
3436; X64:       # %bb.0:
3437; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3438; X64-NEXT:    vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
3439; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3440; X64-NEXT:    retq # encoding: [0xc3]
3441  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
3442  ret <8 x i16> %res
3443}
3444
3445declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16)
3446
3447define <16 x i16>@test_int_x86_avx512_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1) {
3448; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_256:
3449; CHECK:       # %bb.0:
3450; CHECK-NEXT:    vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0]
3451; CHECK-NEXT:    # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3452; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3453  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
3454  ret <16 x i16> %res
3455}
3456
3457define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
3458; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
3459; X86:       # %bb.0:
3460; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3461; X86-NEXT:    vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
3462; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3463; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3464; X86-NEXT:    retl # encoding: [0xc3]
3465;
3466; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
3467; X64:       # %bb.0:
3468; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3469; X64-NEXT:    vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
3470; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3471; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3472; X64-NEXT:    retq # encoding: [0xc3]
3473  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
3474  ret <16 x i16> %res
3475}
3476
3477define <16 x i16>@test_int_x86_avx512_maskz_pmovzxb_w_256(<16 x i8> %x0, i16 %x2) {
3478; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256:
3479; X86:       # %bb.0:
3480; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3481; X86-NEXT:    vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
3482; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3483; X86-NEXT:    retl # encoding: [0xc3]
3484;
3485; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_256:
3486; X64:       # %bb.0:
3487; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3488; X64-NEXT:    vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
3489; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
3490; X64-NEXT:    retq # encoding: [0xc3]
3491  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
3492  ret <16 x i16> %res
3493}
3494
3495declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8)
3496
3497define <8 x i16>@test_int_x86_avx512_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1) {
3498; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_128:
3499; CHECK:       # %bb.0:
3500; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
3501; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3502  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
3503  ret <8 x i16> %res
3504}
3505
3506define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
3507; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
3508; X86:       # %bb.0:
3509; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3510; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3511; X86-NEXT:    vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
3512; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3513; X86-NEXT:    retl # encoding: [0xc3]
3514;
3515; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
3516; X64:       # %bb.0:
3517; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3518; X64-NEXT:    vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
3519; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3520; X64-NEXT:    retq # encoding: [0xc3]
3521  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
3522  ret <8 x i16> %res
3523}
3524
3525define <8 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_128(<16 x i8> %x0, i8 %x2) {
3526; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128:
3527; X86:       # %bb.0:
3528; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3529; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3530; X86-NEXT:    vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
3531; X86-NEXT:    retl # encoding: [0xc3]
3532;
3533; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_128:
3534; X64:       # %bb.0:
3535; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3536; X64-NEXT:    vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
3537; X64-NEXT:    retq # encoding: [0xc3]
3538  %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
3539  ret <8 x i16> %res
3540}
3541
3542declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16)
3543
3544define <16 x i16>@test_int_x86_avx512_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1) {
3545; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_256:
3546; CHECK:       # %bb.0:
3547; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xc0]
3548; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3549  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
3550  ret <16 x i16> %res
3551}
3552
3553define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
3554; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
3555; X86:       # %bb.0:
3556; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3557; X86-NEXT:    vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
3558; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3559; X86-NEXT:    retl # encoding: [0xc3]
3560;
3561; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
3562; X64:       # %bb.0:
3563; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3564; X64-NEXT:    vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
3565; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3566; X64-NEXT:    retq # encoding: [0xc3]
3567  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
3568  ret <16 x i16> %res
3569}
3570
3571define <16 x i16>@test_int_x86_avx512_maskz_pmovsxb_w_256(<16 x i8> %x0, i16 %x2) {
3572; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256:
3573; X86:       # %bb.0:
3574; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3575; X86-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
3576; X86-NEXT:    retl # encoding: [0xc3]
3577;
3578; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_256:
3579; X64:       # %bb.0:
3580; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3581; X64-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
3582; X64-NEXT:    retq # encoding: [0xc3]
3583  %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
3584  ret <16 x i16> %res
3585}
3586
3587declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8)
3588
3589define <2 x i64>@test_int_x86_avx512_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1) {
3590; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_128:
3591; CHECK:       # %bb.0:
3592; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
3593; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3594  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
3595  ret <2 x i64> %res
3596}
3597
3598define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
3599; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
3600; X86:       # %bb.0:
3601; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3602; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3603; X86-NEXT:    vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
3604; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3605; X86-NEXT:    retl # encoding: [0xc3]
3606;
3607; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
3608; X64:       # %bb.0:
3609; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3610; X64-NEXT:    vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
3611; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3612; X64-NEXT:    retq # encoding: [0xc3]
3613  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
3614  ret <2 x i64> %res
3615}
3616
3617define <2 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_128(<4 x i32> %x0, i8 %x2) {
3618; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128:
3619; X86:       # %bb.0:
3620; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3621; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3622; X86-NEXT:    vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
3623; X86-NEXT:    retl # encoding: [0xc3]
3624;
3625; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_128:
3626; X64:       # %bb.0:
3627; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3628; X64-NEXT:    vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
3629; X64-NEXT:    retq # encoding: [0xc3]
3630  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
3631  ret <2 x i64> %res
3632}
3633
3634declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8)
3635
3636define <4 x i64>@test_int_x86_avx512_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1) {
3637; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_256:
3638; CHECK:       # %bb.0:
3639; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xc0]
3640; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3641  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
3642  ret <4 x i64> %res
3643}
3644
3645define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
3646; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
3647; X86:       # %bb.0:
3648; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3649; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3650; X86-NEXT:    vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
3651; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3652; X86-NEXT:    retl # encoding: [0xc3]
3653;
3654; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
3655; X64:       # %bb.0:
3656; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3657; X64-NEXT:    vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
3658; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3659; X64-NEXT:    retq # encoding: [0xc3]
3660  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
3661  ret <4 x i64> %res
3662}
3663
3664define <4 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_256(<4 x i32> %x0, i8 %x2) {
3665; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256:
3666; X86:       # %bb.0:
3667; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3668; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3669; X86-NEXT:    vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
3670; X86-NEXT:    retl # encoding: [0xc3]
3671;
3672; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_256:
3673; X64:       # %bb.0:
3674; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3675; X64-NEXT:    vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
3676; X64-NEXT:    retq # encoding: [0xc3]
3677  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
3678  ret <4 x i64> %res
3679}
3680
3681declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16)
3682
3683define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) {
3684; X86-LABEL: test_int_x86_avx512_cvtmask2b_128:
3685; X86:       # %bb.0:
3686; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
3687; X86-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
3688; X86-NEXT:    retl # encoding: [0xc3]
3689;
3690; X64-LABEL: test_int_x86_avx512_cvtmask2b_128:
3691; X64:       # %bb.0:
3692; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3693; X64-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
3694; X64-NEXT:    retq # encoding: [0xc3]
3695  %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0)
3696  ret <16 x i8> %res
3697}
3698
3699declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32)
3700
3701define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) {
3702; X86-LABEL: test_int_x86_avx512_cvtmask2b_256:
3703; X86:       # %bb.0:
3704; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
3705; X86-NEXT:    vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
3706; X86-NEXT:    retl # encoding: [0xc3]
3707;
3708; X64-LABEL: test_int_x86_avx512_cvtmask2b_256:
3709; X64:       # %bb.0:
3710; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3711; X64-NEXT:    vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
3712; X64-NEXT:    retq # encoding: [0xc3]
3713  %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0)
3714  ret <32 x i8> %res
3715}
3716
3717declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8)
3718
3719define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) {
3720; X86-LABEL: test_int_x86_avx512_cvtmask2w_128:
3721; X86:       # %bb.0:
3722; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3723; X86-NEXT:    kmovd %eax, %k0 # encoding: [0xc5,0xfb,0x92,0xc0]
3724; X86-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
3725; X86-NEXT:    retl # encoding: [0xc3]
3726;
3727; X64-LABEL: test_int_x86_avx512_cvtmask2w_128:
3728; X64:       # %bb.0:
3729; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3730; X64-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
3731; X64-NEXT:    retq # encoding: [0xc3]
3732  %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0)
3733  ret <8 x i16> %res
3734}
3735
3736declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16)
3737
3738define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) {
3739; X86-LABEL: test_int_x86_avx512_cvtmask2w_256:
3740; X86:       # %bb.0:
3741; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
3742; X86-NEXT:    vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
3743; X86-NEXT:    retl # encoding: [0xc3]
3744;
3745; X64-LABEL: test_int_x86_avx512_cvtmask2w_256:
3746; X64:       # %bb.0:
3747; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
3748; X64-NEXT:    vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
3749; X64-NEXT:    retq # encoding: [0xc3]
3750  %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0)
3751  ret <16 x i16> %res
3752}
3753define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3754; CHECK-LABEL: test_mask_packs_epi32_rr_128:
3755; CHECK:       # %bb.0:
3756; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
3757; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3758  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3759  ret <8 x i16> %res
3760}
3761
3762define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
3763; X86-LABEL: test_mask_packs_epi32_rrk_128:
3764; X86:       # %bb.0:
3765; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3766; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3767; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
3768; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3769; X86-NEXT:    retl # encoding: [0xc3]
3770;
3771; X64-LABEL: test_mask_packs_epi32_rrk_128:
3772; X64:       # %bb.0:
3773; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3774; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
3775; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3776; X64-NEXT:    retq # encoding: [0xc3]
3777  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3778  ret <8 x i16> %res
3779}
3780
3781define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3782; X86-LABEL: test_mask_packs_epi32_rrkz_128:
3783; X86:       # %bb.0:
3784; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3785; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
3786; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
3787; X86-NEXT:    retl # encoding: [0xc3]
3788;
3789; X64-LABEL: test_mask_packs_epi32_rrkz_128:
3790; X64:       # %bb.0:
3791; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3792; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
3793; X64-NEXT:    retq # encoding: [0xc3]
3794  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3795  ret <8 x i16> %res
3796}
3797
3798define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
3799; X86-LABEL: test_mask_packs_epi32_rm_128:
3800; X86:       # %bb.0:
3801; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3802; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00]
3803; X86-NEXT:    retl # encoding: [0xc3]
3804;
3805; X64-LABEL: test_mask_packs_epi32_rm_128:
3806; X64:       # %bb.0:
3807; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07]
3808; X64-NEXT:    retq # encoding: [0xc3]
3809  %b = load <4 x i32>, <4 x i32>* %ptr_b
3810  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3811  ret <8 x i16> %res
3812}
3813
3814define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
3815; X86-LABEL: test_mask_packs_epi32_rmk_128:
3816; X86:       # %bb.0:
3817; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3818; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3819; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3820; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08]
3821; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3822; X86-NEXT:    retl # encoding: [0xc3]
3823;
3824; X64-LABEL: test_mask_packs_epi32_rmk_128:
3825; X64:       # %bb.0:
3826; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3827; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
3828; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3829; X64-NEXT:    retq # encoding: [0xc3]
3830  %b = load <4 x i32>, <4 x i32>* %ptr_b
3831  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3832  ret <8 x i16> %res
3833}
3834
3835define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
3836; X86-LABEL: test_mask_packs_epi32_rmkz_128:
3837; X86:       # %bb.0:
3838; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3839; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3840; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3841; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00]
3842; X86-NEXT:    retl # encoding: [0xc3]
3843;
3844; X64-LABEL: test_mask_packs_epi32_rmkz_128:
3845; X64:       # %bb.0:
3846; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3847; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
3848; X64-NEXT:    retq # encoding: [0xc3]
3849  %b = load <4 x i32>, <4 x i32>* %ptr_b
3850  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3851  ret <8 x i16> %res
3852}
3853
3854define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
3855; X86-LABEL: test_mask_packs_epi32_rmb_128:
3856; X86:       # %bb.0:
3857; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3858; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00]
3859; X86-NEXT:    retl # encoding: [0xc3]
3860;
3861; X64-LABEL: test_mask_packs_epi32_rmb_128:
3862; X64:       # %bb.0:
3863; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
3864; X64-NEXT:    retq # encoding: [0xc3]
3865  %q = load i32, i32* %ptr_b
3866  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3867  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3868  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
3869  ret <8 x i16> %res
3870}
3871
3872define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
3873; X86-LABEL: test_mask_packs_epi32_rmbk_128:
3874; X86:       # %bb.0:
3875; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3876; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3877; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3878; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08]
3879; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3880; X86-NEXT:    retl # encoding: [0xc3]
3881;
3882; X64-LABEL: test_mask_packs_epi32_rmbk_128:
3883; X64:       # %bb.0:
3884; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3885; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
3886; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3887; X64-NEXT:    retq # encoding: [0xc3]
3888  %q = load i32, i32* %ptr_b
3889  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3890  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3891  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
3892  ret <8 x i16> %res
3893}
3894
3895define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
3896; X86-LABEL: test_mask_packs_epi32_rmbkz_128:
3897; X86:       # %bb.0:
3898; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3899; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3900; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
3901; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00]
3902; X86-NEXT:    retl # encoding: [0xc3]
3903;
3904; X64-LABEL: test_mask_packs_epi32_rmbkz_128:
3905; X64:       # %bb.0:
3906; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3907; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
3908; X64-NEXT:    retq # encoding: [0xc3]
3909  %q = load i32, i32* %ptr_b
3910  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3911  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3912  %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
3913  ret <8 x i16> %res
3914}
3915
3916declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
3917
3918define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3919; CHECK-LABEL: test_mask_packs_epi32_rr_256:
3920; CHECK:       # %bb.0:
3921; CHECK-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
3922; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3923  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
3924  ret <16 x i16> %res
3925}
3926
3927define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
3928; X86-LABEL: test_mask_packs_epi32_rrk_256:
3929; X86:       # %bb.0:
3930; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3931; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
3932; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3933; X86-NEXT:    retl # encoding: [0xc3]
3934;
3935; X64-LABEL: test_mask_packs_epi32_rrk_256:
3936; X64:       # %bb.0:
3937; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3938; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
3939; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3940; X64-NEXT:    retq # encoding: [0xc3]
3941  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
3942  ret <16 x i16> %res
3943}
3944
3945define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
3946; X86-LABEL: test_mask_packs_epi32_rrkz_256:
3947; X86:       # %bb.0:
3948; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3949; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
3950; X86-NEXT:    retl # encoding: [0xc3]
3951;
3952; X64-LABEL: test_mask_packs_epi32_rrkz_256:
3953; X64:       # %bb.0:
3954; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3955; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
3956; X64-NEXT:    retq # encoding: [0xc3]
3957  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
3958  ret <16 x i16> %res
3959}
3960
3961define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3962; X86-LABEL: test_mask_packs_epi32_rm_256:
3963; X86:       # %bb.0:
3964; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3965; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00]
3966; X86-NEXT:    retl # encoding: [0xc3]
3967;
3968; X64-LABEL: test_mask_packs_epi32_rm_256:
3969; X64:       # %bb.0:
3970; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07]
3971; X64-NEXT:    retq # encoding: [0xc3]
3972  %b = load <8 x i32>, <8 x i32>* %ptr_b
3973  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
3974  ret <16 x i16> %res
3975}
3976
3977define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
3978; X86-LABEL: test_mask_packs_epi32_rmk_256:
3979; X86:       # %bb.0:
3980; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3981; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
3982; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08]
3983; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3984; X86-NEXT:    retl # encoding: [0xc3]
3985;
3986; X64-LABEL: test_mask_packs_epi32_rmk_256:
3987; X64:       # %bb.0:
3988; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3989; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
3990; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3991; X64-NEXT:    retq # encoding: [0xc3]
3992  %b = load <8 x i32>, <8 x i32>* %ptr_b
3993  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
3994  ret <16 x i16> %res
3995}
3996
3997define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
3998; X86-LABEL: test_mask_packs_epi32_rmkz_256:
3999; X86:       # %bb.0:
4000; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4001; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4002; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00]
4003; X86-NEXT:    retl # encoding: [0xc3]
4004;
4005; X64-LABEL: test_mask_packs_epi32_rmkz_256:
4006; X64:       # %bb.0:
4007; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4008; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
4009; X64-NEXT:    retq # encoding: [0xc3]
4010  %b = load <8 x i32>, <8 x i32>* %ptr_b
4011  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4012  ret <16 x i16> %res
4013}
4014
4015define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
4016; X86-LABEL: test_mask_packs_epi32_rmb_256:
4017; X86:       # %bb.0:
4018; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4019; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00]
4020; X86-NEXT:    retl # encoding: [0xc3]
4021;
4022; X64-LABEL: test_mask_packs_epi32_rmb_256:
4023; X64:       # %bb.0:
4024; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
4025; X64-NEXT:    retq # encoding: [0xc3]
4026  %q = load i32, i32* %ptr_b
4027  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4028  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4029  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4030  ret <16 x i16> %res
4031}
4032
4033define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
4034; X86-LABEL: test_mask_packs_epi32_rmbk_256:
4035; X86:       # %bb.0:
4036; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4037; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4038; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08]
4039; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4040; X86-NEXT:    retl # encoding: [0xc3]
4041;
4042; X64-LABEL: test_mask_packs_epi32_rmbk_256:
4043; X64:       # %bb.0:
4044; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4045; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
4046; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4047; X64-NEXT:    retq # encoding: [0xc3]
4048  %q = load i32, i32* %ptr_b
4049  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4050  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4051  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4052  ret <16 x i16> %res
4053}
4054
4055define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
4056; X86-LABEL: test_mask_packs_epi32_rmbkz_256:
4057; X86:       # %bb.0:
4058; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4059; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4060; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00]
4061; X86-NEXT:    retl # encoding: [0xc3]
4062;
4063; X64-LABEL: test_mask_packs_epi32_rmbkz_256:
4064; X64:       # %bb.0:
4065; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4066; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
4067; X64-NEXT:    retq # encoding: [0xc3]
4068  %q = load i32, i32* %ptr_b
4069  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4070  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4071  %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4072  ret <16 x i16> %res
4073}
4074
4075declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
4076
4077define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
4078; CHECK-LABEL: test_mask_packs_epi16_rr_128:
4079; CHECK:       # %bb.0:
4080; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
4081; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4082  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4083  ret <16 x i8> %res
4084}
4085
4086define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
4087; X86-LABEL: test_mask_packs_epi16_rrk_128:
4088; X86:       # %bb.0:
4089; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4090; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
4091; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4092; X86-NEXT:    retl # encoding: [0xc3]
4093;
4094; X64-LABEL: test_mask_packs_epi16_rrk_128:
4095; X64:       # %bb.0:
4096; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4097; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
4098; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4099; X64-NEXT:    retq # encoding: [0xc3]
4100  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4101  ret <16 x i8> %res
4102}
4103
4104define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
4105; X86-LABEL: test_mask_packs_epi16_rrkz_128:
4106; X86:       # %bb.0:
4107; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4108; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
4109; X86-NEXT:    retl # encoding: [0xc3]
4110;
4111; X64-LABEL: test_mask_packs_epi16_rrkz_128:
4112; X64:       # %bb.0:
4113; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4114; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
4115; X64-NEXT:    retq # encoding: [0xc3]
4116  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4117  ret <16 x i8> %res
4118}
4119
4120define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
4121; X86-LABEL: test_mask_packs_epi16_rm_128:
4122; X86:       # %bb.0:
4123; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4124; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00]
4125; X86-NEXT:    retl # encoding: [0xc3]
4126;
4127; X64-LABEL: test_mask_packs_epi16_rm_128:
4128; X64:       # %bb.0:
4129; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07]
4130; X64-NEXT:    retq # encoding: [0xc3]
4131  %b = load <8 x i16>, <8 x i16>* %ptr_b
4132  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4133  ret <16 x i8> %res
4134}
4135
4136define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
4137; X86-LABEL: test_mask_packs_epi16_rmk_128:
4138; X86:       # %bb.0:
4139; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4140; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4141; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08]
4142; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4143; X86-NEXT:    retl # encoding: [0xc3]
4144;
4145; X64-LABEL: test_mask_packs_epi16_rmk_128:
4146; X64:       # %bb.0:
4147; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4148; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f]
4149; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4150; X64-NEXT:    retq # encoding: [0xc3]
4151  %b = load <8 x i16>, <8 x i16>* %ptr_b
4152  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4153  ret <16 x i8> %res
4154}
4155
4156define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
4157; X86-LABEL: test_mask_packs_epi16_rmkz_128:
4158; X86:       # %bb.0:
4159; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4160; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4161; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00]
4162; X86-NEXT:    retl # encoding: [0xc3]
4163;
4164; X64-LABEL: test_mask_packs_epi16_rmkz_128:
4165; X64:       # %bb.0:
4166; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4167; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07]
4168; X64-NEXT:    retq # encoding: [0xc3]
4169  %b = load <8 x i16>, <8 x i16>* %ptr_b
4170  %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4171  ret <16 x i8> %res
4172}
4173
4174declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
4175
4176define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
4177; CHECK-LABEL: test_mask_packs_epi16_rr_256:
4178; CHECK:       # %bb.0:
4179; CHECK-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
4180; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4181  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4182  ret <32 x i8> %res
4183}
4184
4185define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
4186; X86-LABEL: test_mask_packs_epi16_rrk_256:
4187; X86:       # %bb.0:
4188; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4189; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
4190; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4191; X86-NEXT:    retl # encoding: [0xc3]
4192;
4193; X64-LABEL: test_mask_packs_epi16_rrk_256:
4194; X64:       # %bb.0:
4195; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4196; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
4197; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4198; X64-NEXT:    retq # encoding: [0xc3]
4199  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4200  ret <32 x i8> %res
4201}
4202
4203define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
4204; X86-LABEL: test_mask_packs_epi16_rrkz_256:
4205; X86:       # %bb.0:
4206; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4207; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
4208; X86-NEXT:    retl # encoding: [0xc3]
4209;
4210; X64-LABEL: test_mask_packs_epi16_rrkz_256:
4211; X64:       # %bb.0:
4212; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4213; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
4214; X64-NEXT:    retq # encoding: [0xc3]
4215  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4216  ret <32 x i8> %res
4217}
4218
4219define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
4220; X86-LABEL: test_mask_packs_epi16_rm_256:
4221; X86:       # %bb.0:
4222; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4223; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00]
4224; X86-NEXT:    retl # encoding: [0xc3]
4225;
4226; X64-LABEL: test_mask_packs_epi16_rm_256:
4227; X64:       # %bb.0:
4228; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07]
4229; X64-NEXT:    retq # encoding: [0xc3]
4230  %b = load <16 x i16>, <16 x i16>* %ptr_b
4231  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4232  ret <32 x i8> %res
4233}
4234
4235define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
4236; X86-LABEL: test_mask_packs_epi16_rmk_256:
4237; X86:       # %bb.0:
4238; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4239; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4240; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08]
4241; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4242; X86-NEXT:    retl # encoding: [0xc3]
4243;
4244; X64-LABEL: test_mask_packs_epi16_rmk_256:
4245; X64:       # %bb.0:
4246; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4247; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f]
4248; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4249; X64-NEXT:    retq # encoding: [0xc3]
4250  %b = load <16 x i16>, <16 x i16>* %ptr_b
4251  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4252  ret <32 x i8> %res
4253}
4254
4255define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
4256; X86-LABEL: test_mask_packs_epi16_rmkz_256:
4257; X86:       # %bb.0:
4258; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4259; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4260; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00]
4261; X86-NEXT:    retl # encoding: [0xc3]
4262;
4263; X64-LABEL: test_mask_packs_epi16_rmkz_256:
4264; X64:       # %bb.0:
4265; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4266; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07]
4267; X64-NEXT:    retq # encoding: [0xc3]
4268  %b = load <16 x i16>, <16 x i16>* %ptr_b
4269  %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4270  ret <32 x i8> %res
4271}
4272
4273declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
4274
4275
4276define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4277; CHECK-LABEL: test_mask_packus_epi32_rr_128:
4278; CHECK:       # %bb.0:
4279; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
4280; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4281  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4282  ret <8 x i16> %res
4283}
4284
4285define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
4286; X86-LABEL: test_mask_packus_epi32_rrk_128:
4287; X86:       # %bb.0:
4288; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4289; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4290; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
4291; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4292; X86-NEXT:    retl # encoding: [0xc3]
4293;
4294; X64-LABEL: test_mask_packus_epi32_rrk_128:
4295; X64:       # %bb.0:
4296; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4297; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
4298; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4299; X64-NEXT:    retq # encoding: [0xc3]
4300  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4301  ret <8 x i16> %res
4302}
4303
4304define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4305; X86-LABEL: test_mask_packus_epi32_rrkz_128:
4306; X86:       # %bb.0:
4307; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4308; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4309; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
4310; X86-NEXT:    retl # encoding: [0xc3]
4311;
4312; X64-LABEL: test_mask_packus_epi32_rrkz_128:
4313; X64:       # %bb.0:
4314; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4315; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
4316; X64-NEXT:    retq # encoding: [0xc3]
4317  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4318  ret <8 x i16> %res
4319}
4320
4321define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
4322; X86-LABEL: test_mask_packus_epi32_rm_128:
4323; X86:       # %bb.0:
4324; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4325; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00]
4326; X86-NEXT:    retl # encoding: [0xc3]
4327;
4328; X64-LABEL: test_mask_packus_epi32_rm_128:
4329; X64:       # %bb.0:
4330; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07]
4331; X64-NEXT:    retq # encoding: [0xc3]
4332  %b = load <4 x i32>, <4 x i32>* %ptr_b
4333  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4334  ret <8 x i16> %res
4335}
4336
4337define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
4338; X86-LABEL: test_mask_packus_epi32_rmk_128:
4339; X86:       # %bb.0:
4340; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4341; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4342; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4343; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08]
4344; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4345; X86-NEXT:    retl # encoding: [0xc3]
4346;
4347; X64-LABEL: test_mask_packus_epi32_rmk_128:
4348; X64:       # %bb.0:
4349; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4350; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f]
4351; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4352; X64-NEXT:    retq # encoding: [0xc3]
4353  %b = load <4 x i32>, <4 x i32>* %ptr_b
4354  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4355  ret <8 x i16> %res
4356}
4357
4358define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
4359; X86-LABEL: test_mask_packus_epi32_rmkz_128:
4360; X86:       # %bb.0:
4361; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4362; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4363; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4364; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00]
4365; X86-NEXT:    retl # encoding: [0xc3]
4366;
4367; X64-LABEL: test_mask_packus_epi32_rmkz_128:
4368; X64:       # %bb.0:
4369; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4370; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07]
4371; X64-NEXT:    retq # encoding: [0xc3]
4372  %b = load <4 x i32>, <4 x i32>* %ptr_b
4373  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4374  ret <8 x i16> %res
4375}
4376
4377define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
4378; X86-LABEL: test_mask_packus_epi32_rmb_128:
4379; X86:       # %bb.0:
4380; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4381; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00]
4382; X86-NEXT:    retl # encoding: [0xc3]
4383;
4384; X64-LABEL: test_mask_packus_epi32_rmb_128:
4385; X64:       # %bb.0:
4386; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07]
4387; X64-NEXT:    retq # encoding: [0xc3]
4388  %q = load i32, i32* %ptr_b
4389  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4390  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4391  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
4392  ret <8 x i16> %res
4393}
4394
4395define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
4396; X86-LABEL: test_mask_packus_epi32_rmbk_128:
4397; X86:       # %bb.0:
4398; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4399; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4400; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4401; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08]
4402; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4403; X86-NEXT:    retl # encoding: [0xc3]
4404;
4405; X64-LABEL: test_mask_packus_epi32_rmbk_128:
4406; X64:       # %bb.0:
4407; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4408; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f]
4409; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4410; X64-NEXT:    retq # encoding: [0xc3]
4411  %q = load i32, i32* %ptr_b
4412  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4413  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4414  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
4415  ret <8 x i16> %res
4416}
4417
4418define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
4419; X86-LABEL: test_mask_packus_epi32_rmbkz_128:
4420; X86:       # %bb.0:
4421; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4422; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4423; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
4424; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00]
4425; X86-NEXT:    retl # encoding: [0xc3]
4426;
4427; X64-LABEL: test_mask_packus_epi32_rmbkz_128:
4428; X64:       # %bb.0:
4429; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4430; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07]
4431; X64-NEXT:    retq # encoding: [0xc3]
4432  %q = load i32, i32* %ptr_b
4433  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4434  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4435  %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
4436  ret <8 x i16> %res
4437}
4438
4439declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
4440
4441define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4442; CHECK-LABEL: test_mask_packus_epi32_rr_256:
4443; CHECK:       # %bb.0:
4444; CHECK-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
4445; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4446  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4447  ret <16 x i16> %res
4448}
4449
4450define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
4451; X86-LABEL: test_mask_packus_epi32_rrk_256:
4452; X86:       # %bb.0:
4453; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4454; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
4455; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4456; X86-NEXT:    retl # encoding: [0xc3]
4457;
4458; X64-LABEL: test_mask_packus_epi32_rrk_256:
4459; X64:       # %bb.0:
4460; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4461; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
4462; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4463; X64-NEXT:    retq # encoding: [0xc3]
4464  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4465  ret <16 x i16> %res
4466}
4467
4468define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
4469; X86-LABEL: test_mask_packus_epi32_rrkz_256:
4470; X86:       # %bb.0:
4471; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4472; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
4473; X86-NEXT:    retl # encoding: [0xc3]
4474;
4475; X64-LABEL: test_mask_packus_epi32_rrkz_256:
4476; X64:       # %bb.0:
4477; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4478; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
4479; X64-NEXT:    retq # encoding: [0xc3]
4480  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4481  ret <16 x i16> %res
4482}
4483
4484define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
4485; X86-LABEL: test_mask_packus_epi32_rm_256:
4486; X86:       # %bb.0:
4487; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4488; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00]
4489; X86-NEXT:    retl # encoding: [0xc3]
4490;
4491; X64-LABEL: test_mask_packus_epi32_rm_256:
4492; X64:       # %bb.0:
4493; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07]
4494; X64-NEXT:    retq # encoding: [0xc3]
4495  %b = load <8 x i32>, <8 x i32>* %ptr_b
4496  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4497  ret <16 x i16> %res
4498}
4499
4500define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
4501; X86-LABEL: test_mask_packus_epi32_rmk_256:
4502; X86:       # %bb.0:
4503; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4504; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4505; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08]
4506; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4507; X86-NEXT:    retl # encoding: [0xc3]
4508;
4509; X64-LABEL: test_mask_packus_epi32_rmk_256:
4510; X64:       # %bb.0:
4511; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4512; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f]
4513; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4514; X64-NEXT:    retq # encoding: [0xc3]
4515  %b = load <8 x i32>, <8 x i32>* %ptr_b
4516  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4517  ret <16 x i16> %res
4518}
4519
4520define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
4521; X86-LABEL: test_mask_packus_epi32_rmkz_256:
4522; X86:       # %bb.0:
4523; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4524; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4525; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00]
4526; X86-NEXT:    retl # encoding: [0xc3]
4527;
4528; X64-LABEL: test_mask_packus_epi32_rmkz_256:
4529; X64:       # %bb.0:
4530; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4531; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07]
4532; X64-NEXT:    retq # encoding: [0xc3]
4533  %b = load <8 x i32>, <8 x i32>* %ptr_b
4534  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4535  ret <16 x i16> %res
4536}
4537
4538define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
4539; X86-LABEL: test_mask_packus_epi32_rmb_256:
4540; X86:       # %bb.0:
4541; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4542; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00]
4543; X86-NEXT:    retl # encoding: [0xc3]
4544;
4545; X64-LABEL: test_mask_packus_epi32_rmb_256:
4546; X64:       # %bb.0:
4547; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07]
4548; X64-NEXT:    retq # encoding: [0xc3]
4549  %q = load i32, i32* %ptr_b
4550  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4551  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4552  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
4553  ret <16 x i16> %res
4554}
4555
4556define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
4557; X86-LABEL: test_mask_packus_epi32_rmbk_256:
4558; X86:       # %bb.0:
4559; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4560; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4561; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08]
4562; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4563; X86-NEXT:    retl # encoding: [0xc3]
4564;
4565; X64-LABEL: test_mask_packus_epi32_rmbk_256:
4566; X64:       # %bb.0:
4567; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4568; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f]
4569; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4570; X64-NEXT:    retq # encoding: [0xc3]
4571  %q = load i32, i32* %ptr_b
4572  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4573  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4574  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
4575  ret <16 x i16> %res
4576}
4577
4578define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
4579; X86-LABEL: test_mask_packus_epi32_rmbkz_256:
4580; X86:       # %bb.0:
4581; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4582; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4583; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00]
4584; X86-NEXT:    retl # encoding: [0xc3]
4585;
4586; X64-LABEL: test_mask_packus_epi32_rmbkz_256:
4587; X64:       # %bb.0:
4588; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4589; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07]
4590; X64-NEXT:    retq # encoding: [0xc3]
4591  %q = load i32, i32* %ptr_b
4592  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4593  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4594  %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
4595  ret <16 x i16> %res
4596}
4597
4598declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
4599
4600define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
4601; CHECK-LABEL: test_mask_packus_epi16_rr_128:
4602; CHECK:       # %bb.0:
4603; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
4604; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4605  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4606  ret <16 x i8> %res
4607}
4608
4609define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
4610; X86-LABEL: test_mask_packus_epi16_rrk_128:
4611; X86:       # %bb.0:
4612; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4613; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
4614; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4615; X86-NEXT:    retl # encoding: [0xc3]
4616;
4617; X64-LABEL: test_mask_packus_epi16_rrk_128:
4618; X64:       # %bb.0:
4619; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4620; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
4621; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4622; X64-NEXT:    retq # encoding: [0xc3]
4623  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4624  ret <16 x i8> %res
4625}
4626
4627define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
4628; X86-LABEL: test_mask_packus_epi16_rrkz_128:
4629; X86:       # %bb.0:
4630; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4631; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
4632; X86-NEXT:    retl # encoding: [0xc3]
4633;
4634; X64-LABEL: test_mask_packus_epi16_rrkz_128:
4635; X64:       # %bb.0:
4636; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4637; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
4638; X64-NEXT:    retq # encoding: [0xc3]
4639  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4640  ret <16 x i8> %res
4641}
4642
4643define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
4644; X86-LABEL: test_mask_packus_epi16_rm_128:
4645; X86:       # %bb.0:
4646; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4647; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00]
4648; X86-NEXT:    retl # encoding: [0xc3]
4649;
4650; X64-LABEL: test_mask_packus_epi16_rm_128:
4651; X64:       # %bb.0:
4652; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07]
4653; X64-NEXT:    retq # encoding: [0xc3]
4654  %b = load <8 x i16>, <8 x i16>* %ptr_b
4655  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
4656  ret <16 x i8> %res
4657}
4658
4659define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
4660; X86-LABEL: test_mask_packus_epi16_rmk_128:
4661; X86:       # %bb.0:
4662; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4663; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4664; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08]
4665; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4666; X86-NEXT:    retl # encoding: [0xc3]
4667;
4668; X64-LABEL: test_mask_packus_epi16_rmk_128:
4669; X64:       # %bb.0:
4670; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4671; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f]
4672; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4673; X64-NEXT:    retq # encoding: [0xc3]
4674  %b = load <8 x i16>, <8 x i16>* %ptr_b
4675  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
4676  ret <16 x i8> %res
4677}
4678
4679define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
4680; X86-LABEL: test_mask_packus_epi16_rmkz_128:
4681; X86:       # %bb.0:
4682; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4683; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4684; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00]
4685; X86-NEXT:    retl # encoding: [0xc3]
4686;
4687; X64-LABEL: test_mask_packus_epi16_rmkz_128:
4688; X64:       # %bb.0:
4689; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4690; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07]
4691; X64-NEXT:    retq # encoding: [0xc3]
4692  %b = load <8 x i16>, <8 x i16>* %ptr_b
4693  %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
4694  ret <16 x i8> %res
4695}
4696
4697declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
4698
4699define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
4700; CHECK-LABEL: test_mask_packus_epi16_rr_256:
4701; CHECK:       # %bb.0:
4702; CHECK-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
4703; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4704  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4705  ret <32 x i8> %res
4706}
4707
4708define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
4709; X86-LABEL: test_mask_packus_epi16_rrk_256:
4710; X86:       # %bb.0:
4711; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4712; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
4713; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4714; X86-NEXT:    retl # encoding: [0xc3]
4715;
4716; X64-LABEL: test_mask_packus_epi16_rrk_256:
4717; X64:       # %bb.0:
4718; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4719; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
4720; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4721; X64-NEXT:    retq # encoding: [0xc3]
4722  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4723  ret <32 x i8> %res
4724}
4725
4726define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
4727; X86-LABEL: test_mask_packus_epi16_rrkz_256:
4728; X86:       # %bb.0:
4729; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4730; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
4731; X86-NEXT:    retl # encoding: [0xc3]
4732;
4733; X64-LABEL: test_mask_packus_epi16_rrkz_256:
4734; X64:       # %bb.0:
4735; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4736; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
4737; X64-NEXT:    retq # encoding: [0xc3]
4738  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4739  ret <32 x i8> %res
4740}
4741
4742define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
4743; X86-LABEL: test_mask_packus_epi16_rm_256:
4744; X86:       # %bb.0:
4745; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4746; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00]
4747; X86-NEXT:    retl # encoding: [0xc3]
4748;
4749; X64-LABEL: test_mask_packus_epi16_rm_256:
4750; X64:       # %bb.0:
4751; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07]
4752; X64-NEXT:    retq # encoding: [0xc3]
4753  %b = load <16 x i16>, <16 x i16>* %ptr_b
4754  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
4755  ret <32 x i8> %res
4756}
4757
4758define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
4759; X86-LABEL: test_mask_packus_epi16_rmk_256:
4760; X86:       # %bb.0:
4761; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4762; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4763; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08]
4764; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4765; X86-NEXT:    retl # encoding: [0xc3]
4766;
4767; X64-LABEL: test_mask_packus_epi16_rmk_256:
4768; X64:       # %bb.0:
4769; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4770; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f]
4771; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4772; X64-NEXT:    retq # encoding: [0xc3]
4773  %b = load <16 x i16>, <16 x i16>* %ptr_b
4774  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
4775  ret <32 x i8> %res
4776}
4777
4778define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
4779; X86-LABEL: test_mask_packus_epi16_rmkz_256:
4780; X86:       # %bb.0:
4781; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4782; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
4783; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00]
4784; X86-NEXT:    retl # encoding: [0xc3]
4785;
4786; X64-LABEL: test_mask_packus_epi16_rmkz_256:
4787; X64:       # %bb.0:
4788; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
4789; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07]
4790; X64-NEXT:    retq # encoding: [0xc3]
4791  %b = load <16 x i16>, <16 x i16>* %ptr_b
4792  %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
4793  ret <32 x i8> %res
4794}
4795
4796declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
4797
4798define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
4799; X86-LABEL: test_cmp_b_256:
4800; X86:       # %bb.0:
4801; X86-NEXT:    pushl %ebx # encoding: [0x53]
4802; X86-NEXT:    .cfi_def_cfa_offset 8
4803; X86-NEXT:    pushl %edi # encoding: [0x57]
4804; X86-NEXT:    .cfi_def_cfa_offset 12
4805; X86-NEXT:    pushl %esi # encoding: [0x56]
4806; X86-NEXT:    .cfi_def_cfa_offset 16
4807; X86-NEXT:    .cfi_offset %esi, -16
4808; X86-NEXT:    .cfi_offset %edi, -12
4809; X86-NEXT:    .cfi_offset %ebx, -8
4810; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
4811; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4812; X86-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
4813; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4814; X86-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
4815; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4816; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
4817; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4818; X86-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
4819; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4820; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
4821; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
4822; X86-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4823; X86-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
4824; X86-NEXT:    vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02]
4825; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
4826; X86-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
4827; X86-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
4828; X86-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
4829; X86-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
4830; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4831; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4832; X86-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
4833; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4834; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
4835; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4836; X86-NEXT:    popl %esi # encoding: [0x5e]
4837; X86-NEXT:    .cfi_def_cfa_offset 12
4838; X86-NEXT:    popl %edi # encoding: [0x5f]
4839; X86-NEXT:    .cfi_def_cfa_offset 8
4840; X86-NEXT:    popl %ebx # encoding: [0x5b]
4841; X86-NEXT:    .cfi_def_cfa_offset 4
4842; X86-NEXT:    retl # encoding: [0xc3]
4843;
4844; X64-LABEL: test_cmp_b_256:
4845; X64:       # %bb.0:
4846; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
4847; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
4848; X64-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
4849; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4850; X64-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
4851; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4852; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
4853; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4854; X64-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
4855; X64-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4856; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
4857; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4858; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4859; X64-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
4860; X64-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
4861; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
4862; X64-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
4863; X64-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
4864; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
4865; X64-NEXT:    vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
4866; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4867; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4868; X64-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
4869; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4870; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
4871; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4872; X64-NEXT:    retq # encoding: [0xc3]
4873  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
4874  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
4875  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
4876  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
4877  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
4878  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
4879  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
4880  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
4881  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
4882  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
4883  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
4884  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
4885  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
4886  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
4887  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
4888  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
4889  ret <8 x i32> %vec7
4890}
4891
4892define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
4893; X86-LABEL: test_mask_cmp_b_256:
4894; X86:       # %bb.0:
4895; X86-NEXT:    pushl %ebp # encoding: [0x55]
4896; X86-NEXT:    .cfi_def_cfa_offset 8
4897; X86-NEXT:    pushl %ebx # encoding: [0x53]
4898; X86-NEXT:    .cfi_def_cfa_offset 12
4899; X86-NEXT:    pushl %edi # encoding: [0x57]
4900; X86-NEXT:    .cfi_def_cfa_offset 16
4901; X86-NEXT:    pushl %esi # encoding: [0x56]
4902; X86-NEXT:    .cfi_def_cfa_offset 20
4903; X86-NEXT:    .cfi_offset %esi, -20
4904; X86-NEXT:    .cfi_offset %edi, -16
4905; X86-NEXT:    .cfi_offset %ebx, -12
4906; X86-NEXT:    .cfi_offset %ebp, -8
4907; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
4908; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
4909; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
4910; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4911; X86-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
4912; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4913; X86-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
4914; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4915; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
4916; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
4917; X86-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
4918; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
4919; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
4920; X86-NEXT:    kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8]
4921; X86-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
4922; X86-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01]
4923; X86-NEXT:    vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02]
4924; X86-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
4925; X86-NEXT:    vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
4926; X86-NEXT:    vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
4927; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4928; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4929; X86-NEXT:    vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6]
4930; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4931; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
4932; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4933; X86-NEXT:    popl %esi # encoding: [0x5e]
4934; X86-NEXT:    .cfi_def_cfa_offset 16
4935; X86-NEXT:    popl %edi # encoding: [0x5f]
4936; X86-NEXT:    .cfi_def_cfa_offset 12
4937; X86-NEXT:    popl %ebx # encoding: [0x5b]
4938; X86-NEXT:    .cfi_def_cfa_offset 8
4939; X86-NEXT:    popl %ebp # encoding: [0x5d]
4940; X86-NEXT:    .cfi_def_cfa_offset 4
4941; X86-NEXT:    retl # encoding: [0xc3]
4942;
4943; X64-LABEL: test_mask_cmp_b_256:
4944; X64:       # %bb.0:
4945; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4946; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
4947; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
4948; X64-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
4949; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
4950; X64-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
4951; X64-NEXT:    kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8]
4952; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
4953; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
4954; X64-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
4955; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
4956; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
4957; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
4958; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
4959; X64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
4960; X64-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
4961; X64-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
4962; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
4963; X64-NEXT:    vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
4964; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
4965; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
4966; X64-NEXT:    vmovd %r9d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1]
4967; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
4968; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
4969; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
4970; X64-NEXT:    retq # encoding: [0xc3]
4971  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
4972  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
4973  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
4974  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
4975  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
4976  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
4977  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
4978  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
4979  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
4980  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
4981  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
4982  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
4983  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
4984  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
4985  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
4986  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
4987  ret <8 x i32> %vec7
4988}
4989
4990declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
4991
4992define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
4993; X86-LABEL: test_ucmp_b_256:
4994; X86:       # %bb.0:
4995; X86-NEXT:    pushl %ebx # encoding: [0x53]
4996; X86-NEXT:    .cfi_def_cfa_offset 8
4997; X86-NEXT:    pushl %edi # encoding: [0x57]
4998; X86-NEXT:    .cfi_def_cfa_offset 12
4999; X86-NEXT:    pushl %esi # encoding: [0x56]
5000; X86-NEXT:    .cfi_def_cfa_offset 16
5001; X86-NEXT:    .cfi_offset %esi, -16
5002; X86-NEXT:    .cfi_offset %edi, -12
5003; X86-NEXT:    .cfi_offset %ebx, -8
5004; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
5005; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5006; X86-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
5007; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5008; X86-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
5009; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5010; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
5011; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5012; X86-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
5013; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5014; X86-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
5015; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
5016; X86-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5017; X86-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
5018; X86-NEXT:    vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02]
5019; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5020; X86-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
5021; X86-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
5022; X86-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
5023; X86-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
5024; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5025; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5026; X86-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
5027; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5028; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
5029; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5030; X86-NEXT:    popl %esi # encoding: [0x5e]
5031; X86-NEXT:    .cfi_def_cfa_offset 12
5032; X86-NEXT:    popl %edi # encoding: [0x5f]
5033; X86-NEXT:    .cfi_def_cfa_offset 8
5034; X86-NEXT:    popl %ebx # encoding: [0x5b]
5035; X86-NEXT:    .cfi_def_cfa_offset 4
5036; X86-NEXT:    retl # encoding: [0xc3]
5037;
5038; X64-LABEL: test_ucmp_b_256:
5039; X64:       # %bb.0:
5040; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
5041; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
5042; X64-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
5043; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5044; X64-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
5045; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5046; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
5047; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5048; X64-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
5049; X64-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5050; X64-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
5051; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5052; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5053; X64-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
5054; X64-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
5055; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5056; X64-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
5057; X64-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
5058; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
5059; X64-NEXT:    vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
5060; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5061; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5062; X64-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
5063; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5064; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
5065; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5066; X64-NEXT:    retq # encoding: [0xc3]
5067  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
5068  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
5069  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
5070  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
5071  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
5072  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
5073  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
5074  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
5075  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
5076  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
5077  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
5078  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
5079  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
5080  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
5081  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
5082  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
5083  ret <8 x i32> %vec7
5084}
5085
5086define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
5087; X86-LABEL: test_mask_ucmp_b_256:
5088; X86:       # %bb.0:
5089; X86-NEXT:    pushl %ebp # encoding: [0x55]
5090; X86-NEXT:    .cfi_def_cfa_offset 8
5091; X86-NEXT:    pushl %ebx # encoding: [0x53]
5092; X86-NEXT:    .cfi_def_cfa_offset 12
5093; X86-NEXT:    pushl %edi # encoding: [0x57]
5094; X86-NEXT:    .cfi_def_cfa_offset 16
5095; X86-NEXT:    pushl %esi # encoding: [0x56]
5096; X86-NEXT:    .cfi_def_cfa_offset 20
5097; X86-NEXT:    .cfi_offset %esi, -20
5098; X86-NEXT:    .cfi_offset %edi, -16
5099; X86-NEXT:    .cfi_offset %ebx, -12
5100; X86-NEXT:    .cfi_offset %ebp, -8
5101; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
5102; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5103; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
5104; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5105; X86-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
5106; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5107; X86-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
5108; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5109; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
5110; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
5111; X86-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
5112; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
5113; X86-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
5114; X86-NEXT:    kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8]
5115; X86-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
5116; X86-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01]
5117; X86-NEXT:    vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02]
5118; X86-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
5119; X86-NEXT:    vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
5120; X86-NEXT:    vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
5121; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5122; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5123; X86-NEXT:    vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6]
5124; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5125; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
5126; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5127; X86-NEXT:    popl %esi # encoding: [0x5e]
5128; X86-NEXT:    .cfi_def_cfa_offset 16
5129; X86-NEXT:    popl %edi # encoding: [0x5f]
5130; X86-NEXT:    .cfi_def_cfa_offset 12
5131; X86-NEXT:    popl %ebx # encoding: [0x5b]
5132; X86-NEXT:    .cfi_def_cfa_offset 8
5133; X86-NEXT:    popl %ebp # encoding: [0x5d]
5134; X86-NEXT:    .cfi_def_cfa_offset 4
5135; X86-NEXT:    retl # encoding: [0xc3]
5136;
5137; X64-LABEL: test_mask_ucmp_b_256:
5138; X64:       # %bb.0:
5139; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5140; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
5141; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
5142; X64-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
5143; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5144; X64-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
5145; X64-NEXT:    kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8]
5146; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
5147; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
5148; X64-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
5149; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5150; X64-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
5151; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5152; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
5153; X64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
5154; X64-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
5155; X64-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
5156; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
5157; X64-NEXT:    vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
5158; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
5159; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
5160; X64-NEXT:    vmovd %r9d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1]
5161; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
5162; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
5163; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
5164; X64-NEXT:    retq # encoding: [0xc3]
5165  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
5166  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
5167  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
5168  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
5169  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
5170  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
5171  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
5172  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
5173  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
5174  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
5175  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
5176  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
5177  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
5178  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
5179  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
5180  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
5181  ret <8 x i32> %vec7
5182}
5183
5184declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
5185
5186define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
5187; CHECK-LABEL: test_cmp_w_256:
5188; CHECK:       # %bb.0:
5189; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
5190; CHECK-NEXT:    vpcmpgtw %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x65,0xc8]
5191; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02]
5192; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
5193; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x05]
5194; CHECK-NEXT:    vpcmpgtw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xe9]
5195; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5196; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5197; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5198; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5199; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5200; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5201; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5202; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5203; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5204; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5205; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5206; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5207; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5208; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5209; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5210; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5211; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5212  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
5213  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5214  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
5215  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5216  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
5217  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5218  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
5219  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5220  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
5221  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5222  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
5223  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5224  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
5225  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5226  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
5227  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5228  ret <8 x i16> %vec7
5229}
5230
5231define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
5232; X86-LABEL: test_mask_cmp_w_256:
5233; X86:       # %bb.0:
5234; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5235; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5236; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5237; X86-NEXT:    vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
5238; X86-NEXT:    vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
5239; X86-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5240; X86-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
5241; X86-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
5242; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5243; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5244; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5245; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5246; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5247; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5248; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5249; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5250; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5251; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5252; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5253; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5254; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5255; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5256; X86-NEXT:    retl # encoding: [0xc3]
5257;
5258; X64-LABEL: test_mask_cmp_w_256:
5259; X64:       # %bb.0:
5260; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5261; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5262; X64-NEXT:    vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
5263; X64-NEXT:    vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
5264; X64-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5265; X64-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
5266; X64-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
5267; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5268; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5269; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5270; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5271; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5272; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5273; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5274; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5275; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5276; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5277; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5278; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5279; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5280; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5281; X64-NEXT:    retq # encoding: [0xc3]
5282  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
5283  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5284  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
5285  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5286  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
5287  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5288  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
5289  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5290  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
5291  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5292  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
5293  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5294  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
5295  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5296  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
5297  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5298  ret <8 x i16> %vec7
5299}
5300
5301declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
5302
5303define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
5304; CHECK-LABEL: test_ucmp_w_256:
5305; CHECK:       # %bb.0:
5306; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
5307; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01]
5308; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02]
5309; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
5310; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x05]
5311; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x06]
5312; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5313; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5314; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5315; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5316; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5317; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5318; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5319; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5320; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5321; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5322; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5323; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5324; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5325; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5326; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5327; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5328; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5329  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
5330  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5331  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
5332  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5333  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
5334  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5335  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
5336  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5337  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
5338  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5339  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
5340  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5341  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
5342  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5343  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
5344  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5345  ret <8 x i16> %vec7
5346}
5347
5348define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
5349; X86-LABEL: test_mask_ucmp_w_256:
5350; X86:       # %bb.0:
5351; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5352; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5353; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5354; X86-NEXT:    vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
5355; X86-NEXT:    vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
5356; X86-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5357; X86-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
5358; X86-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
5359; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5360; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5361; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5362; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5363; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5364; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5365; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5366; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5367; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5368; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5369; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5370; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5371; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5372; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5373; X86-NEXT:    retl # encoding: [0xc3]
5374;
5375; X64-LABEL: test_mask_ucmp_w_256:
5376; X64:       # %bb.0:
5377; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5378; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
5379; X64-NEXT:    vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
5380; X64-NEXT:    vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
5381; X64-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
5382; X64-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
5383; X64-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
5384; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5385; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5386; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5387; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5388; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5389; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5390; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5391; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5392; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5393; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5394; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5395; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5396; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5397; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
5398; X64-NEXT:    retq # encoding: [0xc3]
5399  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
5400  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5401  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
5402  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5403  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
5404  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5405  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
5406  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5407  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
5408  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5409  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
5410  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5411  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
5412  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5413  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
5414  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5415  ret <8 x i16> %vec7
5416}
5417
5418declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
5419
5420define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
5421; CHECK-LABEL: test_cmp_b_128:
5422; CHECK:       # %bb.0:
5423; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
5424; CHECK-NEXT:    vpcmpgtb %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc8]
5425; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02]
5426; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
5427; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x05]
5428; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xe9]
5429; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5430; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5431; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5432; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5433; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5434; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5435; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5436; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5437; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5438; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5439; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5440; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5441; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5442; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5443; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5444; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5445  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
5446  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5447  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
5448  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5449  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
5450  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5451  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
5452  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5453  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
5454  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5455  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
5456  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5457  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
5458  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5459  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
5460  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5461  ret <8 x i16> %vec7
5462}
5463
5464define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
5465; X86-LABEL: test_mask_cmp_b_128:
5466; X86:       # %bb.0:
5467; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5468; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5469; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5470; X86-NEXT:    vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
5471; X86-NEXT:    vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
5472; X86-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5473; X86-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
5474; X86-NEXT:    vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
5475; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5476; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5477; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5478; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5479; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5480; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5481; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5482; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5483; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5484; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5485; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5486; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5487; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5488; X86-NEXT:    retl # encoding: [0xc3]
5489;
5490; X64-LABEL: test_mask_cmp_b_128:
5491; X64:       # %bb.0:
5492; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5493; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5494; X64-NEXT:    vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
5495; X64-NEXT:    vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
5496; X64-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5497; X64-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
5498; X64-NEXT:    vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
5499; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5500; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5501; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5502; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5503; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5504; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5505; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5506; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5507; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5508; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5509; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5510; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5511; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5512; X64-NEXT:    retq # encoding: [0xc3]
5513  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
5514  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5515  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
5516  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5517  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
5518  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5519  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
5520  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5521  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
5522  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5523  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
5524  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5525  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
5526  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5527  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
5528  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5529  ret <8 x i16> %vec7
5530}
5531
5532declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
5533
5534define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
5535; CHECK-LABEL: test_ucmp_b_128:
5536; CHECK:       # %bb.0:
5537; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
5538; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01]
5539; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02]
5540; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
5541; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x05]
5542; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x06]
5543; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5544; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5545; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5546; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5547; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5548; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5549; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5550; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5551; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5552; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5553; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5554; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5555; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
5556; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5557; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5558; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5559  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
5560  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5561  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
5562  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5563  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
5564  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5565  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
5566  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5567  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
5568  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5569  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
5570  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5571  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
5572  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5573  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
5574  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5575  ret <8 x i16> %vec7
5576}
5577
5578define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
5579; X86-LABEL: test_mask_ucmp_b_128:
5580; X86:       # %bb.0:
5581; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5582; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5583; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5584; X86-NEXT:    vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
5585; X86-NEXT:    vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
5586; X86-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5587; X86-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
5588; X86-NEXT:    vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
5589; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5590; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
5591; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5592; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5593; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5594; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5595; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5596; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5597; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
5598; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5599; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5600; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5601; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5602; X86-NEXT:    retl # encoding: [0xc3]
5603;
5604; X64-LABEL: test_mask_ucmp_b_128:
5605; X64:       # %bb.0:
5606; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5607; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
5608; X64-NEXT:    vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
5609; X64-NEXT:    vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
5610; X64-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
5611; X64-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
5612; X64-NEXT:    vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
5613; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5614; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
5615; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5616; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5617; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5618; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5619; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5620; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5621; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
5622; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5623; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5624; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5625; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
5626; X64-NEXT:    retq # encoding: [0xc3]
5627  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
5628  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5629  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
5630  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5631  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
5632  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5633  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
5634  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5635  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
5636  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5637  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
5638  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5639  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
5640  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5641  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
5642  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5643  ret <8 x i16> %vec7
5644}
5645
5646declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
5647
5648define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
5649; CHECK-LABEL: test_cmp_w_128:
5650; CHECK:       # %bb.0:
5651; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
5652; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5653; CHECK-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
5654; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x02]
5655; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04]
5656; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x05]
5657; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe1]
5658; CHECK-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5659; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5660; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5661; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5662; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5663; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5664; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5665; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5666; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5667; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5668; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5669; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5670; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
5671; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5672; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5673  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
5674  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5675  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
5676  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5677  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
5678  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5679  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
5680  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5681  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
5682  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5683  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
5684  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5685  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
5686  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5687  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
5688  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5689  ret <8 x i8> %vec7
5690}
5691
5692define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
5693; X86-LABEL: test_mask_cmp_w_128:
5694; X86:       # %bb.0:
5695; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5696; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5697; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5698; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5699; X86-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0]
5700; X86-NEXT:    vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02]
5701; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5702; X86-NEXT:    vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05]
5703; X86-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
5704; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5705; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
5706; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5707; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
5708; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5709; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
5710; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5711; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
5712; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5713; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
5714; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5715; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
5716; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5717; X86-NEXT:    retl # encoding: [0xc3]
5718;
5719; X64-LABEL: test_mask_cmp_w_128:
5720; X64:       # %bb.0:
5721; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5722; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5723; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5724; X64-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0]
5725; X64-NEXT:    vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02]
5726; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5727; X64-NEXT:    vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05]
5728; X64-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
5729; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5730; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5731; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5732; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5733; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5734; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5735; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5736; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5737; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5738; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5739; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5740; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5741; X64-NEXT:    vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
5742; X64-NEXT:    retq # encoding: [0xc3]
5743  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
5744  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5745  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
5746  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5747  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
5748  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5749  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
5750  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5751  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
5752  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5753  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
5754  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5755  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
5756  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5757  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
5758  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5759  ret <8 x i8> %vec7
5760}
5761
5762declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
5763
5764define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
5765; CHECK-LABEL: test_ucmp_w_128:
5766; CHECK:       # %bb.0:
5767; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
5768; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5769; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x01]
5770; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x02]
5771; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04]
5772; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x05]
5773; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x06]
5774; CHECK-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5775; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5776; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5777; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5778; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5779; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5780; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5781; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5782; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5783; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5784; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5785; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5786; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
5787; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5788; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5789  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
5790  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5791  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
5792  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5793  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
5794  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5795  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
5796  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5797  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
5798  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5799  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
5800  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5801  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
5802  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5803  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
5804  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5805  ret <8 x i8> %vec7
5806}
5807
5808define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
5809; X86-LABEL: test_mask_ucmp_w_128:
5810; X86:       # %bb.0:
5811; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5812; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
5813; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5814; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5815; X86-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
5816; X86-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02]
5817; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5818; X86-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05]
5819; X86-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
5820; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
5821; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
5822; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5823; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
5824; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
5825; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
5826; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
5827; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
5828; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
5829; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
5830; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
5831; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
5832; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
5833; X86-NEXT:    retl # encoding: [0xc3]
5834;
5835; X64-LABEL: test_mask_ucmp_w_128:
5836; X64:       # %bb.0:
5837; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5838; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
5839; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
5840; X64-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
5841; X64-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02]
5842; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
5843; X64-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05]
5844; X64-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
5845; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
5846; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
5847; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
5848; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
5849; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
5850; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
5851; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
5852; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
5853; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
5854; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
5855; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
5856; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
5857; X64-NEXT:    vpinsrb $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
5858; X64-NEXT:    retq # encoding: [0xc3]
5859  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
5860  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
5861  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
5862  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
5863  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
5864  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
5865  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
5866  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
5867  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
5868  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
5869  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
5870  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
5871  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
5872  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
5873  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
5874  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
5875  ret <8 x i8> %vec7
5876}
5877
5878declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
5879
5880define <16 x i8>@mm_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
5881; CHECK-LABEL: mm_avg_epu8:
5882; CHECK:       # %bb.0:
5883; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
5884; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5885  %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
5886  ret <16 x i8> %res
5887}
5888
5889define <16 x i8>@mm_mask_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
5890; X86-LABEL: mm_mask_avg_epu8:
5891; X86:       # %bb.0:
5892; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5893; X86-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
5894; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5895; X86-NEXT:    retl # encoding: [0xc3]
5896;
5897; X64-LABEL: mm_mask_avg_epu8:
5898; X64:       # %bb.0:
5899; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5900; X64-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
5901; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5902; X64-NEXT:    retq # encoding: [0xc3]
5903  %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
5904  ret <16 x i8> %res
5905}
5906
5907declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
5908
5909define <16 x i8>@test_int_x86_avx512_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1) {
5910; CHECK-LABEL: test_int_x86_avx512_pabs_b_128:
5911; CHECK:       # %bb.0:
5912; CHECK-NEXT:    vpabsb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0]
5913; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5914  %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
5915  ret <16 x i8> %res
5916}
5917
5918define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
5919; X86-LABEL: test_int_x86_avx512_mask_pabs_b_128:
5920; X86:       # %bb.0:
5921; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5922; X86-NEXT:    vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
5923; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5924; X86-NEXT:    retl # encoding: [0xc3]
5925;
5926; X64-LABEL: test_int_x86_avx512_mask_pabs_b_128:
5927; X64:       # %bb.0:
5928; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5929; X64-NEXT:    vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
5930; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
5931; X64-NEXT:    retq # encoding: [0xc3]
5932  %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
5933  ret <16 x i8> %res
5934}
5935
5936declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
5937
5938define <32 x i8>@mm256_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
5939; CHECK-LABEL: mm256_avg_epu8:
5940; CHECK:       # %bb.0:
5941; CHECK-NEXT:    vpavgb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1]
5942; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5943  %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
5944  ret <32 x i8> %res
5945}
5946
5947define <32 x i8>@mm256_mask_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
5948; X86-LABEL: mm256_mask_avg_epu8:
5949; X86:       # %bb.0:
5950; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
5951; X86-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
5952; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5953; X86-NEXT:    retl # encoding: [0xc3]
5954;
5955; X64-LABEL: mm256_mask_avg_epu8:
5956; X64:       # %bb.0:
5957; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5958; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
5959; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5960; X64-NEXT:    retq # encoding: [0xc3]
5961  %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
5962  ret <32 x i8> %res
5963}
5964
5965declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32)
5966
5967define <32 x i8>@test_int_x86_avx512_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1) {
5968; CHECK-LABEL: test_int_x86_avx512_pabs_b_256:
5969; CHECK:       # %bb.0:
5970; CHECK-NEXT:    vpabsb %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
5971; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5972  %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1)
5973  ret <32 x i8> %res
5974}
5975
5976define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
5977; X86-LABEL: test_int_x86_avx512_mask_pabs_b_256:
5978; X86:       # %bb.0:
5979; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
5980; X86-NEXT:    vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
5981; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5982; X86-NEXT:    retl # encoding: [0xc3]
5983;
5984; X64-LABEL: test_int_x86_avx512_mask_pabs_b_256:
5985; X64:       # %bb.0:
5986; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
5987; X64-NEXT:    vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
5988; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5989; X64-NEXT:    retq # encoding: [0xc3]
5990  %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
5991  ret <32 x i8> %res
5992}
5993
5994declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
5995
5996define <8 x i16>@mm_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
5997; CHECK-LABEL: mm_avg_epu16:
5998; CHECK:       # %bb.0:
5999; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
6000; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6001  %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6002  ret <8 x i16> %res
6003}
6004
6005define <8 x i16>@mm_mask_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6006; X86-LABEL: mm_mask_avg_epu16:
6007; X86:       # %bb.0:
6008; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6009; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6010; X86-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
6011; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6012; X86-NEXT:    retl # encoding: [0xc3]
6013;
6014; X64-LABEL: mm_mask_avg_epu16:
6015; X64:       # %bb.0:
6016; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6017; X64-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
6018; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6019; X64-NEXT:    retq # encoding: [0xc3]
6020  %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6021  ret <8 x i16> %res
6022}
6023
6024declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8)
6025
6026define <8 x i16>@test_int_x86_avx512_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1) {
6027; CHECK-LABEL: test_int_x86_avx512_pabs_w_128:
6028; CHECK:       # %bb.0:
6029; CHECK-NEXT:    vpabsw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0]
6030; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6031  %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
6032  ret <8 x i16> %res
6033}
6034
6035define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6036; X86-LABEL: test_int_x86_avx512_mask_pabs_w_128:
6037; X86:       # %bb.0:
6038; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6039; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6040; X86-NEXT:    vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
6041; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6042; X86-NEXT:    retl # encoding: [0xc3]
6043;
6044; X64-LABEL: test_int_x86_avx512_mask_pabs_w_128:
6045; X64:       # %bb.0:
6046; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6047; X64-NEXT:    vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
6048; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6049; X64-NEXT:    retq # encoding: [0xc3]
6050  %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6051  ret <8 x i16> %res
6052}
6053
6054declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6055
6056define <16 x i16>@mm256_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6057; CHECK-LABEL: mm256_avg_epu16:
6058; CHECK:       # %bb.0:
6059; CHECK-NEXT:    vpavgw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1]
6060; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6061  %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6062  ret <16 x i16> %res
6063}
6064
6065define <16 x i16>@mm256_mask_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6066; X86-LABEL: mm256_mask_avg_epu16:
6067; X86:       # %bb.0:
6068; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6069; X86-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
6070; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6071; X86-NEXT:    retl # encoding: [0xc3]
6072;
6073; X64-LABEL: mm256_mask_avg_epu16:
6074; X64:       # %bb.0:
6075; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6076; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
6077; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6078; X64-NEXT:    retq # encoding: [0xc3]
6079  %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6080  ret <16 x i16> %res
6081}
6082
6083declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16)
6084
6085define <16 x i16>@test_int_x86_avx512_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1) {
6086; CHECK-LABEL: test_int_x86_avx512_pabs_w_256:
6087; CHECK:       # %bb.0:
6088; CHECK-NEXT:    vpabsw %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
6089; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6090  %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1)
6091  ret <16 x i16> %res
6092}
6093
6094define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6095; X86-LABEL: test_int_x86_avx512_mask_pabs_w_256:
6096; X86:       # %bb.0:
6097; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6098; X86-NEXT:    vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
6099; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6100; X86-NEXT:    retl # encoding: [0xc3]
6101;
6102; X64-LABEL: test_int_x86_avx512_mask_pabs_w_256:
6103; X64:       # %bb.0:
6104; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6105; X64-NEXT:    vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
6106; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6107; X64-NEXT:    retq # encoding: [0xc3]
6108  %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6109  ret <16 x i16> %res
6110}
6111
6112declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6113
6114declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16)
6115
6116define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
6117; X86-LABEL: test_int_x86_avx512_ptestm_b_128:
6118; X86:       # %bb.0:
6119; X86-NEXT:    vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
6120; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6121; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6122; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6123; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6124; X86-NEXT:    # kill: def $ax killed $ax killed $eax
6125; X86-NEXT:    retl # encoding: [0xc3]
6126;
6127; X64-LABEL: test_int_x86_avx512_ptestm_b_128:
6128; X64:       # %bb.0:
6129; X64-NEXT:    vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
6130; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6131; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6132; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6133; X64-NEXT:    # kill: def $ax killed $ax killed $eax
6134; X64-NEXT:    retq # encoding: [0xc3]
6135  %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
6136  %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
6137  %res2 = add i16 %res, %res1
6138  ret i16 %res2
6139}
6140
6141declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32)
6142
6143define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
6144; X86-LABEL: test_int_x86_avx512_ptestm_b_256:
6145; X86:       # %bb.0:
6146; X86-NEXT:    vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
6147; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6148; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6149; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
6150; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6151; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6152; X86-NEXT:    retl # encoding: [0xc3]
6153;
6154; X64-LABEL: test_int_x86_avx512_ptestm_b_256:
6155; X64:       # %bb.0:
6156; X64-NEXT:    vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
6157; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6158; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6159; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6160; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6161; X64-NEXT:    retq # encoding: [0xc3]
6162  %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
6163  %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
6164  %res2 = add i32 %res, %res1
6165  ret i32 %res2
6166}
6167
6168declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8)
6169
6170define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6171; X86-LABEL: test_int_x86_avx512_ptestm_w_128:
6172; X86:       # %bb.0:
6173; X86-NEXT:    vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
6174; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6175; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
6176; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
6177; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
6178; X86-NEXT:    retl # encoding: [0xc3]
6179;
6180; X64-LABEL: test_int_x86_avx512_ptestm_w_128:
6181; X64:       # %bb.0:
6182; X64-NEXT:    vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
6183; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6184; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
6185; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
6186; X64-NEXT:    # kill: def $al killed $al killed $eax
6187; X64-NEXT:    retq # encoding: [0xc3]
6188  %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6189  %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
6190  %res2 = add i8 %res, %res1
6191  ret i8 %res2
6192}
6193
6194declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16)
6195
6196define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6197; X86-LABEL: test_int_x86_avx512_ptestm_w_256:
6198; X86:       # %bb.0:
6199; X86-NEXT:    vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
6200; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6201; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6202; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6203; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6204; X86-NEXT:    # kill: def $ax killed $ax killed $eax
6205; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6206; X86-NEXT:    retl # encoding: [0xc3]
6207;
6208; X64-LABEL: test_int_x86_avx512_ptestm_w_256:
6209; X64:       # %bb.0:
6210; X64-NEXT:    vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
6211; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6212; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6213; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6214; X64-NEXT:    # kill: def $ax killed $ax killed $eax
6215; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6216; X64-NEXT:    retq # encoding: [0xc3]
6217  %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6218  %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
6219  %res2 = add i16 %res, %res1
6220  ret i16 %res2
6221}
6222
6223declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16)
6224
6225define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
6226; X86-LABEL: test_int_x86_avx512_ptestnm_b_128:
6227; X86:       # %bb.0:
6228; X86-NEXT:    vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
6229; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6230; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6231; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6232; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6233; X86-NEXT:    # kill: def $ax killed $ax killed $eax
6234; X86-NEXT:    retl # encoding: [0xc3]
6235;
6236; X64-LABEL: test_int_x86_avx512_ptestnm_b_128:
6237; X64:       # %bb.0:
6238; X64-NEXT:    vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
6239; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6240; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6241; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6242; X64-NEXT:    # kill: def $ax killed $ax killed $eax
6243; X64-NEXT:    retq # encoding: [0xc3]
6244  %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
6245  %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
6246  %res2 = add i16 %res, %res1
6247  ret i16 %res2
6248}
6249
6250declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32)
6251
6252define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
6253; X86-LABEL: test_int_x86_avx512_ptestnm_b_256:
6254; X86:       # %bb.0:
6255; X86-NEXT:    vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
6256; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6257; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
6258; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
6259; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6260; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6261; X86-NEXT:    retl # encoding: [0xc3]
6262;
6263; X64-LABEL: test_int_x86_avx512_ptestnm_b_256:
6264; X64:       # %bb.0:
6265; X64-NEXT:    vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
6266; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6267; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6268; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6269; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6270; X64-NEXT:    retq # encoding: [0xc3]
6271  %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
6272  %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
6273  %res2 = add i32 %res, %res1
6274  ret i32 %res2
6275}
6276
6277declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2)
6278
6279define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
6280; X86-LABEL: test_int_x86_avx512_ptestnm_w_128:
6281; X86:       # %bb.0:
6282; X86-NEXT:    vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
6283; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6284; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
6285; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
6286; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
6287; X86-NEXT:    retl # encoding: [0xc3]
6288;
6289; X64-LABEL: test_int_x86_avx512_ptestnm_w_128:
6290; X64:       # %bb.0:
6291; X64-NEXT:    vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
6292; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6293; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
6294; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
6295; X64-NEXT:    # kill: def $al killed $al killed $eax
6296; X64-NEXT:    retq # encoding: [0xc3]
6297  %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
6298  %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
6299  %res2 = add i8 %res, %res1
6300  ret i8 %res2
6301}
6302
6303declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2)
6304
6305define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
6306; X86-LABEL: test_int_x86_avx512_ptestnm_w_256:
6307; X86:       # %bb.0:
6308; X86-NEXT:    vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
6309; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
6310; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
6311; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
6312; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
6313; X86-NEXT:    # kill: def $ax killed $ax killed $eax
6314; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6315; X86-NEXT:    retl # encoding: [0xc3]
6316;
6317; X64-LABEL: test_int_x86_avx512_ptestnm_w_256:
6318; X64:       # %bb.0:
6319; X64-NEXT:    vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
6320; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6321; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
6322; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
6323; X64-NEXT:    # kill: def $ax killed $ax killed $eax
6324; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6325; X64-NEXT:    retq # encoding: [0xc3]
6326  %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
6327  %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
6328  %res2 = add i16 %res, %res1
6329  ret i16 %res2
6330}
6331
6332declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
6333
6334define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
6335; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
6336; CHECK:       # %bb.0:
6337; CHECK-NEXT:    vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0]
6338; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
6339; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6340    %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
6341    ret i16 %res
6342}
6343
6344declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
6345
6346define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
6347; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
6348; CHECK:       # %bb.0:
6349; CHECK-NEXT:    vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0]
6350; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6351; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6352    %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
6353    ret i32 %res
6354}
6355
6356declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
6357
6358define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
6359; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
6360; CHECK:       # %bb.0:
6361; CHECK-NEXT:    vpmovw2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
6362; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6363; CHECK-NEXT:    # kill: def $al killed $al killed $eax
6364; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6365    %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
6366    ret i8 %res
6367}
6368
6369declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
6370
6371define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
6372; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
6373; CHECK:       # %bb.0:
6374; CHECK-NEXT:    vpmovw2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
6375; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
6376; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
6377; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
6378; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6379    %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
6380    ret i16 %res
6381}
6382
6383declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6384
6385define <8 x i16>@test_int_x86_avx512_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6386; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_128:
6387; CHECK:       # %bb.0:
6388; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
6389; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6390  %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6391  ret <8 x i16> %res
6392}
6393
6394define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6395; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
6396; X86:       # %bb.0:
6397; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6398; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6399; X86-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
6400; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6401; X86-NEXT:    retl # encoding: [0xc3]
6402;
6403; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
6404; X64:       # %bb.0:
6405; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6406; X64-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
6407; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6408; X64-NEXT:    retq # encoding: [0xc3]
6409  %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6410  ret <8 x i16> %res
6411}
6412
6413declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6414
6415define <16 x i16>@test_int_x86_avx512_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6416; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_256:
6417; CHECK:       # %bb.0:
6418; CHECK-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
6419; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6420  %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6421  ret <16 x i16> %res
6422}
6423
6424define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6425; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
6426; X86:       # %bb.0:
6427; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6428; X86-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
6429; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6430; X86-NEXT:    retl # encoding: [0xc3]
6431;
6432; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
6433; X64:       # %bb.0:
6434; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6435; X64-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
6436; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6437; X64-NEXT:    retq # encoding: [0xc3]
6438  %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6439  ret <16 x i16> %res
6440}
6441
6442declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6443
6444define <8 x i16>@test_int_x86_avx512_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6445; CHECK-LABEL: test_int_x86_avx512_pmulh_w_128:
6446; CHECK:       # %bb.0:
6447; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
6448; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6449  %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6450  ret <8 x i16> %res
6451}
6452
6453define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6454; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
6455; X86:       # %bb.0:
6456; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6457; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6458; X86-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
6459; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6460; X86-NEXT:    retl # encoding: [0xc3]
6461;
6462; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
6463; X64:       # %bb.0:
6464; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6465; X64-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
6466; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6467; X64-NEXT:    retq # encoding: [0xc3]
6468  %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6469  ret <8 x i16> %res
6470}
6471
6472declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6473
6474define <16 x i16>@test_int_x86_avx512_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6475; CHECK-LABEL: test_int_x86_avx512_pmulh_w_256:
6476; CHECK:       # %bb.0:
6477; CHECK-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1]
6478; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6479  %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6480  ret <16 x i16> %res
6481}
6482
6483define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6484; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
6485; X86:       # %bb.0:
6486; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6487; X86-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
6488; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6489; X86-NEXT:    retl # encoding: [0xc3]
6490;
6491; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
6492; X64:       # %bb.0:
6493; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6494; X64-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
6495; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6496; X64-NEXT:    retq # encoding: [0xc3]
6497  %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6498  ret <16 x i16> %res
6499}
6500
6501declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6502
6503define <8 x i16>@test_int_x86_avx512_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6504; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_128:
6505; CHECK:       # %bb.0:
6506; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1]
6507; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6508  %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6509  ret <8 x i16> %res
6510}
6511
6512define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6513; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
6514; X86:       # %bb.0:
6515; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6516; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6517; X86-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
6518; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6519; X86-NEXT:    retl # encoding: [0xc3]
6520;
6521; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
6522; X64:       # %bb.0:
6523; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6524; X64-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
6525; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6526; X64-NEXT:    retq # encoding: [0xc3]
6527  %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6528  ret <8 x i16> %res
6529}
6530
6531declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6532
6533define <16 x i16>@test_int_x86_avx512_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6534; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_256:
6535; CHECK:       # %bb.0:
6536; CHECK-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
6537; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6538  %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6539  ret <16 x i16> %res
6540}
6541
6542define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6543; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
6544; X86:       # %bb.0:
6545; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6546; X86-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
6547; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6548; X86-NEXT:    retl # encoding: [0xc3]
6549;
6550; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
6551; X64:       # %bb.0:
6552; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6553; X64-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
6554; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6555; X64-NEXT:    retq # encoding: [0xc3]
6556  %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6557  ret <16 x i16> %res
6558}
6559
6560declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
6561
6562define <8 x i16>@test_int_x86_avx512_ask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2) {
6563; CHECK-LABEL: test_int_x86_avx512_ask_pmaddubs_w_128:
6564; CHECK:       # %bb.0:
6565; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1]
6566; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6567  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
6568  ret <8 x i16> %res
6569}
6570
6571define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
6572; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
6573; X86:       # %bb.0:
6574; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6575; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6576; X86-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
6577; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6578; X86-NEXT:    retl # encoding: [0xc3]
6579;
6580; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
6581; X64:       # %bb.0:
6582; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6583; X64-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
6584; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6585; X64-NEXT:    retq # encoding: [0xc3]
6586  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
6587  ret <8 x i16> %res
6588}
6589
6590declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
6591
6592define <16 x i16>@test_int_x86_avx512_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2) {
6593; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_256:
6594; CHECK:       # %bb.0:
6595; CHECK-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
6596; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6597  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
6598  ret <16 x i16> %res
6599}
6600
6601define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
6602; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
6603; X86:       # %bb.0:
6604; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6605; X86-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
6606; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6607; X86-NEXT:    retl # encoding: [0xc3]
6608;
6609; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
6610; X64:       # %bb.0:
6611; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6612; X64-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
6613; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6614; X64-NEXT:    retq # encoding: [0xc3]
6615  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
6616  ret <16 x i16> %res
6617}
6618
6619declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
6620
6621define <4 x i32>@test_int_x86_avx512_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2) {
6622; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_128:
6623; CHECK:       # %bb.0:
6624; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
6625; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6626  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
6627  ret <4 x i32> %res
6628}
6629
6630define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
6631; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
6632; X86:       # %bb.0:
6633; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6634; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6635; X86-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
6636; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6637; X86-NEXT:    retl # encoding: [0xc3]
6638;
6639; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
6640; X64:       # %bb.0:
6641; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6642; X64-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
6643; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6644; X64-NEXT:    retq # encoding: [0xc3]
6645  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
6646  ret <4 x i32> %res
6647}
6648
6649declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
6650
6651define <8 x i32>@test_int_x86_avx512_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2) {
6652; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_256:
6653; CHECK:       # %bb.0:
6654; CHECK-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1]
6655; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6656  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
6657  ret <8 x i32> %res
6658}
6659
6660define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
6661; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
6662; X86:       # %bb.0:
6663; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6664; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6665; X86-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
6666; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6667; X86-NEXT:    retl # encoding: [0xc3]
6668;
6669; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
6670; X64:       # %bb.0:
6671; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6672; X64-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
6673; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6674; X64-NEXT:    retq # encoding: [0xc3]
6675  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
6676  ret <8 x i32> %res
6677}
6678
6679declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6680
6681define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6682; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128:
6683; CHECK:       # %bb.0:
6684; CHECK-NEXT:    vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0]
6685; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6686  %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6687  ret <8 x i16> %res
6688}
6689
6690define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6691; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
6692; X86:       # %bb.0:
6693; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6694; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6695; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
6696; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6697; X86-NEXT:    retl # encoding: [0xc3]
6698;
6699; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
6700; X64:       # %bb.0:
6701; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6702; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
6703; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6704; X64-NEXT:    retq # encoding: [0xc3]
6705  %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6706  ret <8 x i16> %res
6707}
6708
6709define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
6710; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
6711; X86:       # %bb.0:
6712; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6713; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6714; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
6715; X86-NEXT:    retl # encoding: [0xc3]
6716;
6717; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
6718; X64:       # %bb.0:
6719; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6720; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
6721; X64-NEXT:    retq # encoding: [0xc3]
6722  %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
6723  ret <8 x i16> %res
6724}
6725
6726declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6727
6728define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6729; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256:
6730; CHECK:       # %bb.0:
6731; CHECK-NEXT:    vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0]
6732; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6733  %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6734  ret <16 x i16> %res
6735}
6736
6737define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6738; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
6739; X86:       # %bb.0:
6740; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6741; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
6742; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6743; X86-NEXT:    retl # encoding: [0xc3]
6744;
6745; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
6746; X64:       # %bb.0:
6747; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6748; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
6749; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6750; X64-NEXT:    retq # encoding: [0xc3]
6751  %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6752  ret <16 x i16> %res
6753}
6754
6755define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
6756; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
6757; X86:       # %bb.0:
6758; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6759; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
6760; X86-NEXT:    retl # encoding: [0xc3]
6761;
6762; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
6763; X64:       # %bb.0:
6764; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6765; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
6766; X64-NEXT:    retq # encoding: [0xc3]
6767  %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
6768  ret <16 x i16> %res
6769}
6770
6771declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6772
6773define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6774; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128:
6775; CHECK:       # %bb.0:
6776; CHECK-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2]
6777; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6778  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6779  ret <8 x i16> %res
6780}
6781
6782define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6783; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
6784; X86:       # %bb.0:
6785; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6786; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6787; X86-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
6788; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6789; X86-NEXT:    retl # encoding: [0xc3]
6790;
6791; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
6792; X64:       # %bb.0:
6793; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6794; X64-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
6795; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6796; X64-NEXT:    retq # encoding: [0xc3]
6797  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6798  ret <8 x i16> %res
6799}
6800
6801declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6802
6803define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6804; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
6805; X86:       # %bb.0:
6806; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6807; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6808; X86-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
6809; X86-NEXT:    retl # encoding: [0xc3]
6810;
6811; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
6812; X64:       # %bb.0:
6813; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6814; X64-NEXT:    vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
6815; X64-NEXT:    retq # encoding: [0xc3]
6816  %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6817  ret <8 x i16> %res
6818}
6819
6820declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6821
6822define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6823; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256:
6824; CHECK:       # %bb.0:
6825; CHECK-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2]
6826; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6827  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6828  ret <16 x i16> %res
6829}
6830
6831define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6832; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
6833; X86:       # %bb.0:
6834; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6835; X86-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
6836; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6837; X86-NEXT:    retl # encoding: [0xc3]
6838;
6839; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
6840; X64:       # %bb.0:
6841; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6842; X64-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
6843; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6844; X64-NEXT:    retq # encoding: [0xc3]
6845  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6846  ret <16 x i16> %res
6847}
6848
6849declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6850
6851define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6852; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
6853; X86:       # %bb.0:
6854; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6855; X86-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
6856; X86-NEXT:    retl # encoding: [0xc3]
6857;
6858; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
6859; X64:       # %bb.0:
6860; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6861; X64-NEXT:    vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
6862; X64-NEXT:    retq # encoding: [0xc3]
6863  %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6864  ret <16 x i16> %res
6865}
6866
6867declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
6868
6869define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
6870; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128:
6871; CHECK:       # %bb.0:
6872; CHECK-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2]
6873; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6874  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
6875  ret <8 x i16> %res
6876}
6877
6878define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
6879; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
6880; X86:       # %bb.0:
6881; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6882; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6883; X86-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
6884; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6885; X86-NEXT:    retl # encoding: [0xc3]
6886;
6887; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
6888; X64:       # %bb.0:
6889; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6890; X64-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
6891; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
6892; X64-NEXT:    retq # encoding: [0xc3]
6893  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
6894  ret <8 x i16> %res
6895}
6896
6897declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
6898
6899define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
6900; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256:
6901; CHECK:       # %bb.0:
6902; CHECK-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2]
6903; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6904  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
6905  ret <16 x i16> %res
6906}
6907
6908define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
6909; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
6910; X86:       # %bb.0:
6911; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6912; X86-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
6913; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6914; X86-NEXT:    retl # encoding: [0xc3]
6915;
6916; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
6917; X64:       # %bb.0:
6918; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6919; X64-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
6920; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
6921; X64-NEXT:    retq # encoding: [0xc3]
6922  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
6923  ret <16 x i16> %res
6924}
6925
6926declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8)
6927
6928define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
6929; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
6930; X86:       # %bb.0:
6931; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6932; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
6933; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
6934; X86-NEXT:    vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
6935; X86-NEXT:    vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04]
6936; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
6937; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
6938; X86-NEXT:    retl # encoding: [0xc3]
6939;
6940; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
6941; X64:       # %bb.0:
6942; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6943; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
6944; X64-NEXT:    vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
6945; X64-NEXT:    vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04]
6946; X64-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
6947; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
6948; X64-NEXT:    retq # encoding: [0xc3]
6949  %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4)
6950  %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3, <8 x i16> zeroinitializer, i8 %x4)
6951  %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4, <8 x i16> %x3, i8 -1)
6952  %res3 = add <8 x i16> %res, %res1
6953  %res4 = add <8 x i16> %res2, %res3
6954  ret <8 x i16> %res4
6955}
6956
6957declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16)
6958
6959define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
6960; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
6961; X86:       # %bb.0:
6962; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6963; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
6964; X86-NEXT:    vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
6965; X86-NEXT:    vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04]
6966; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
6967; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
6968; X86-NEXT:    retl # encoding: [0xc3]
6969;
6970; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
6971; X64:       # %bb.0:
6972; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
6973; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
6974; X64-NEXT:    vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
6975; X64-NEXT:    vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04]
6976; X64-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
6977; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
6978; X64-NEXT:    retq # encoding: [0xc3]
6979  %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4)
6980  %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3, <16 x i16> zeroinitializer, i16 %x4)
6981  %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4, <16 x i16> %x3, i16 -1)
6982  %res3 = add <16 x i16> %res, %res1
6983  %res4 = add <16 x i16> %res3, %res2
6984  ret <16 x i16> %res4
6985}
6986
6987define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
6988; CHECK-LABEL: test_mask_adds_epu16_rr_128:
6989; CHECK:       # %bb.0:
6990; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
6991; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6992  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
6993  ret <8 x i16> %res
6994}
6995
6996define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
6997; X86-LABEL: test_mask_adds_epu16_rrk_128:
6998; X86:       # %bb.0:
6999; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7000; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7001; X86-NEXT:    vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
7002; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7003; X86-NEXT:    retl # encoding: [0xc3]
7004;
7005; X64-LABEL: test_mask_adds_epu16_rrk_128:
7006; X64:       # %bb.0:
7007; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7008; X64-NEXT:    vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
7009; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7010; X64-NEXT:    retq # encoding: [0xc3]
7011  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7012  ret <8 x i16> %res
7013}
7014
7015define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7016; X86-LABEL: test_mask_adds_epu16_rrkz_128:
7017; X86:       # %bb.0:
7018; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7019; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7020; X86-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
7021; X86-NEXT:    retl # encoding: [0xc3]
7022;
7023; X64-LABEL: test_mask_adds_epu16_rrkz_128:
7024; X64:       # %bb.0:
7025; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7026; X64-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
7027; X64-NEXT:    retq # encoding: [0xc3]
7028  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7029  ret <8 x i16> %res
7030}
7031
7032define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
7033; X86-LABEL: test_mask_adds_epu16_rm_128:
7034; X86:       # %bb.0:
7035; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7036; X86-NEXT:    vpaddusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x00]
7037; X86-NEXT:    retl # encoding: [0xc3]
7038;
7039; X64-LABEL: test_mask_adds_epu16_rm_128:
7040; X64:       # %bb.0:
7041; X64-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07]
7042; X64-NEXT:    retq # encoding: [0xc3]
7043  %b = load <8 x i16>, <8 x i16>* %ptr_b
7044  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7045  ret <8 x i16> %res
7046}
7047
7048define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
7049; X86-LABEL: test_mask_adds_epu16_rmk_128:
7050; X86:       # %bb.0:
7051; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7052; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7053; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7054; X86-NEXT:    vpaddusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x08]
7055; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7056; X86-NEXT:    retl # encoding: [0xc3]
7057;
7058; X64-LABEL: test_mask_adds_epu16_rmk_128:
7059; X64:       # %bb.0:
7060; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7061; X64-NEXT:    vpaddusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f]
7062; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7063; X64-NEXT:    retq # encoding: [0xc3]
7064  %b = load <8 x i16>, <8 x i16>* %ptr_b
7065  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7066  ret <8 x i16> %res
7067}
7068
7069define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
7070; X86-LABEL: test_mask_adds_epu16_rmkz_128:
7071; X86:       # %bb.0:
7072; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7073; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7074; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7075; X86-NEXT:    vpaddusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x00]
7076; X86-NEXT:    retl # encoding: [0xc3]
7077;
7078; X64-LABEL: test_mask_adds_epu16_rmkz_128:
7079; X64:       # %bb.0:
7080; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7081; X64-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07]
7082; X64-NEXT:    retq # encoding: [0xc3]
7083  %b = load <8 x i16>, <8 x i16>* %ptr_b
7084  %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7085  ret <8 x i16> %res
7086}
7087
7088declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7089
7090define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7091; CHECK-LABEL: test_mask_adds_epu16_rr_256:
7092; CHECK:       # %bb.0:
7093; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
7094; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7095  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7096  ret <16 x i16> %res
7097}
7098
7099define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7100; X86-LABEL: test_mask_adds_epu16_rrk_256:
7101; X86:       # %bb.0:
7102; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7103; X86-NEXT:    vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
7104; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7105; X86-NEXT:    retl # encoding: [0xc3]
7106;
7107; X64-LABEL: test_mask_adds_epu16_rrk_256:
7108; X64:       # %bb.0:
7109; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7110; X64-NEXT:    vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
7111; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7112; X64-NEXT:    retq # encoding: [0xc3]
7113  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7114  ret <16 x i16> %res
7115}
7116
7117define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7118; X86-LABEL: test_mask_adds_epu16_rrkz_256:
7119; X86:       # %bb.0:
7120; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7121; X86-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
7122; X86-NEXT:    retl # encoding: [0xc3]
7123;
7124; X64-LABEL: test_mask_adds_epu16_rrkz_256:
7125; X64:       # %bb.0:
7126; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7127; X64-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
7128; X64-NEXT:    retq # encoding: [0xc3]
7129  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7130  ret <16 x i16> %res
7131}
7132
7133define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
7134; X86-LABEL: test_mask_adds_epu16_rm_256:
7135; X86:       # %bb.0:
7136; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7137; X86-NEXT:    vpaddusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x00]
7138; X86-NEXT:    retl # encoding: [0xc3]
7139;
7140; X64-LABEL: test_mask_adds_epu16_rm_256:
7141; X64:       # %bb.0:
7142; X64-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07]
7143; X64-NEXT:    retq # encoding: [0xc3]
7144  %b = load <16 x i16>, <16 x i16>* %ptr_b
7145  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7146  ret <16 x i16> %res
7147}
7148
7149define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
7150; X86-LABEL: test_mask_adds_epu16_rmk_256:
7151; X86:       # %bb.0:
7152; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7153; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7154; X86-NEXT:    vpaddusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x08]
7155; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7156; X86-NEXT:    retl # encoding: [0xc3]
7157;
7158; X64-LABEL: test_mask_adds_epu16_rmk_256:
7159; X64:       # %bb.0:
7160; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7161; X64-NEXT:    vpaddusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f]
7162; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7163; X64-NEXT:    retq # encoding: [0xc3]
7164  %b = load <16 x i16>, <16 x i16>* %ptr_b
7165  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7166  ret <16 x i16> %res
7167}
7168
7169define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
7170; X86-LABEL: test_mask_adds_epu16_rmkz_256:
7171; X86:       # %bb.0:
7172; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7173; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7174; X86-NEXT:    vpaddusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x00]
7175; X86-NEXT:    retl # encoding: [0xc3]
7176;
7177; X64-LABEL: test_mask_adds_epu16_rmkz_256:
7178; X64:       # %bb.0:
7179; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7180; X64-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07]
7181; X64-NEXT:    retq # encoding: [0xc3]
7182  %b = load <16 x i16>, <16 x i16>* %ptr_b
7183  %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7184  ret <16 x i16> %res
7185}
7186
7187declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7188
7189define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
7190; CHECK-LABEL: test_mask_subs_epu16_rr_128:
7191; CHECK:       # %bb.0:
7192; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
7193; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7194  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7195  ret <8 x i16> %res
7196}
7197
7198define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7199; X86-LABEL: test_mask_subs_epu16_rrk_128:
7200; X86:       # %bb.0:
7201; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7202; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7203; X86-NEXT:    vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
7204; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7205; X86-NEXT:    retl # encoding: [0xc3]
7206;
7207; X64-LABEL: test_mask_subs_epu16_rrk_128:
7208; X64:       # %bb.0:
7209; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7210; X64-NEXT:    vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
7211; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7212; X64-NEXT:    retq # encoding: [0xc3]
7213  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7214  ret <8 x i16> %res
7215}
7216
7217define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7218; X86-LABEL: test_mask_subs_epu16_rrkz_128:
7219; X86:       # %bb.0:
7220; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7221; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7222; X86-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
7223; X86-NEXT:    retl # encoding: [0xc3]
7224;
7225; X64-LABEL: test_mask_subs_epu16_rrkz_128:
7226; X64:       # %bb.0:
7227; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7228; X64-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
7229; X64-NEXT:    retq # encoding: [0xc3]
7230  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7231  ret <8 x i16> %res
7232}
7233
7234define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
7235; X86-LABEL: test_mask_subs_epu16_rm_128:
7236; X86:       # %bb.0:
7237; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7238; X86-NEXT:    vpsubusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x00]
7239; X86-NEXT:    retl # encoding: [0xc3]
7240;
7241; X64-LABEL: test_mask_subs_epu16_rm_128:
7242; X64:       # %bb.0:
7243; X64-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07]
7244; X64-NEXT:    retq # encoding: [0xc3]
7245  %b = load <8 x i16>, <8 x i16>* %ptr_b
7246  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7247  ret <8 x i16> %res
7248}
7249
7250define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
7251; X86-LABEL: test_mask_subs_epu16_rmk_128:
7252; X86:       # %bb.0:
7253; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7254; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7255; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7256; X86-NEXT:    vpsubusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x08]
7257; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7258; X86-NEXT:    retl # encoding: [0xc3]
7259;
7260; X64-LABEL: test_mask_subs_epu16_rmk_128:
7261; X64:       # %bb.0:
7262; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7263; X64-NEXT:    vpsubusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f]
7264; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7265; X64-NEXT:    retq # encoding: [0xc3]
7266  %b = load <8 x i16>, <8 x i16>* %ptr_b
7267  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7268  ret <8 x i16> %res
7269}
7270
7271define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
7272; X86-LABEL: test_mask_subs_epu16_rmkz_128:
7273; X86:       # %bb.0:
7274; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7275; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7276; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7277; X86-NEXT:    vpsubusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x00]
7278; X86-NEXT:    retl # encoding: [0xc3]
7279;
7280; X64-LABEL: test_mask_subs_epu16_rmkz_128:
7281; X64:       # %bb.0:
7282; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7283; X64-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07]
7284; X64-NEXT:    retq # encoding: [0xc3]
7285  %b = load <8 x i16>, <8 x i16>* %ptr_b
7286  %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7287  ret <8 x i16> %res
7288}
7289
7290declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7291
7292define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7293; CHECK-LABEL: test_mask_subs_epu16_rr_256:
7294; CHECK:       # %bb.0:
7295; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
7296; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7297  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7298  ret <16 x i16> %res
7299}
7300
7301define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7302; X86-LABEL: test_mask_subs_epu16_rrk_256:
7303; X86:       # %bb.0:
7304; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7305; X86-NEXT:    vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
7306; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7307; X86-NEXT:    retl # encoding: [0xc3]
7308;
7309; X64-LABEL: test_mask_subs_epu16_rrk_256:
7310; X64:       # %bb.0:
7311; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7312; X64-NEXT:    vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
7313; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7314; X64-NEXT:    retq # encoding: [0xc3]
7315  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7316  ret <16 x i16> %res
7317}
7318
7319define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7320; X86-LABEL: test_mask_subs_epu16_rrkz_256:
7321; X86:       # %bb.0:
7322; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7323; X86-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
7324; X86-NEXT:    retl # encoding: [0xc3]
7325;
7326; X64-LABEL: test_mask_subs_epu16_rrkz_256:
7327; X64:       # %bb.0:
7328; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7329; X64-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
7330; X64-NEXT:    retq # encoding: [0xc3]
7331  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7332  ret <16 x i16> %res
7333}
7334
7335define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
7336; X86-LABEL: test_mask_subs_epu16_rm_256:
7337; X86:       # %bb.0:
7338; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7339; X86-NEXT:    vpsubusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x00]
7340; X86-NEXT:    retl # encoding: [0xc3]
7341;
7342; X64-LABEL: test_mask_subs_epu16_rm_256:
7343; X64:       # %bb.0:
7344; X64-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07]
7345; X64-NEXT:    retq # encoding: [0xc3]
7346  %b = load <16 x i16>, <16 x i16>* %ptr_b
7347  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7348  ret <16 x i16> %res
7349}
7350
7351define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
7352; X86-LABEL: test_mask_subs_epu16_rmk_256:
7353; X86:       # %bb.0:
7354; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7355; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7356; X86-NEXT:    vpsubusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x08]
7357; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7358; X86-NEXT:    retl # encoding: [0xc3]
7359;
7360; X64-LABEL: test_mask_subs_epu16_rmk_256:
7361; X64:       # %bb.0:
7362; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7363; X64-NEXT:    vpsubusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f]
7364; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7365; X64-NEXT:    retq # encoding: [0xc3]
7366  %b = load <16 x i16>, <16 x i16>* %ptr_b
7367  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7368  ret <16 x i16> %res
7369}
7370
7371define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
7372; X86-LABEL: test_mask_subs_epu16_rmkz_256:
7373; X86:       # %bb.0:
7374; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7375; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7376; X86-NEXT:    vpsubusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x00]
7377; X86-NEXT:    retl # encoding: [0xc3]
7378;
7379; X64-LABEL: test_mask_subs_epu16_rmkz_256:
7380; X64:       # %bb.0:
7381; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7382; X64-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07]
7383; X64-NEXT:    retq # encoding: [0xc3]
7384  %b = load <16 x i16>, <16 x i16>* %ptr_b
7385  %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7386  ret <16 x i16> %res
7387}
7388
7389declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7390
7391define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
7392; CHECK-LABEL: test_mask_adds_epu8_rr_128:
7393; CHECK:       # %bb.0:
7394; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
7395; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7396  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7397  ret <16 x i8> %res
7398}
7399
7400define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
7401; X86-LABEL: test_mask_adds_epu8_rrk_128:
7402; X86:       # %bb.0:
7403; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7404; X86-NEXT:    vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
7405; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7406; X86-NEXT:    retl # encoding: [0xc3]
7407;
7408; X64-LABEL: test_mask_adds_epu8_rrk_128:
7409; X64:       # %bb.0:
7410; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7411; X64-NEXT:    vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
7412; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7413; X64-NEXT:    retq # encoding: [0xc3]
7414  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7415  ret <16 x i8> %res
7416}
7417
7418define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
7419; X86-LABEL: test_mask_adds_epu8_rrkz_128:
7420; X86:       # %bb.0:
7421; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7422; X86-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
7423; X86-NEXT:    retl # encoding: [0xc3]
7424;
7425; X64-LABEL: test_mask_adds_epu8_rrkz_128:
7426; X64:       # %bb.0:
7427; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7428; X64-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
7429; X64-NEXT:    retq # encoding: [0xc3]
7430  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7431  ret <16 x i8> %res
7432}
7433
7434define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
7435; X86-LABEL: test_mask_adds_epu8_rm_128:
7436; X86:       # %bb.0:
7437; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7438; X86-NEXT:    vpaddusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x00]
7439; X86-NEXT:    retl # encoding: [0xc3]
7440;
7441; X64-LABEL: test_mask_adds_epu8_rm_128:
7442; X64:       # %bb.0:
7443; X64-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07]
7444; X64-NEXT:    retq # encoding: [0xc3]
7445  %b = load <16 x i8>, <16 x i8>* %ptr_b
7446  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7447  ret <16 x i8> %res
7448}
7449
7450define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
7451; X86-LABEL: test_mask_adds_epu8_rmk_128:
7452; X86:       # %bb.0:
7453; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7454; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7455; X86-NEXT:    vpaddusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x08]
7456; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7457; X86-NEXT:    retl # encoding: [0xc3]
7458;
7459; X64-LABEL: test_mask_adds_epu8_rmk_128:
7460; X64:       # %bb.0:
7461; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7462; X64-NEXT:    vpaddusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f]
7463; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7464; X64-NEXT:    retq # encoding: [0xc3]
7465  %b = load <16 x i8>, <16 x i8>* %ptr_b
7466  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7467  ret <16 x i8> %res
7468}
7469
7470define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
7471; X86-LABEL: test_mask_adds_epu8_rmkz_128:
7472; X86:       # %bb.0:
7473; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7474; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7475; X86-NEXT:    vpaddusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x00]
7476; X86-NEXT:    retl # encoding: [0xc3]
7477;
7478; X64-LABEL: test_mask_adds_epu8_rmkz_128:
7479; X64:       # %bb.0:
7480; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7481; X64-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07]
7482; X64-NEXT:    retq # encoding: [0xc3]
7483  %b = load <16 x i8>, <16 x i8>* %ptr_b
7484  %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7485  ret <16 x i8> %res
7486}
7487
7488declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
7489
7490define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
7491; CHECK-LABEL: test_mask_adds_epu8_rr_256:
7492; CHECK:       # %bb.0:
7493; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
7494; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7495  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7496  ret <32 x i8> %res
7497}
7498
7499define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
7500; X86-LABEL: test_mask_adds_epu8_rrk_256:
7501; X86:       # %bb.0:
7502; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7503; X86-NEXT:    vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
7504; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7505; X86-NEXT:    retl # encoding: [0xc3]
7506;
7507; X64-LABEL: test_mask_adds_epu8_rrk_256:
7508; X64:       # %bb.0:
7509; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7510; X64-NEXT:    vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
7511; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7512; X64-NEXT:    retq # encoding: [0xc3]
7513  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7514  ret <32 x i8> %res
7515}
7516
7517define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
7518; X86-LABEL: test_mask_adds_epu8_rrkz_256:
7519; X86:       # %bb.0:
7520; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7521; X86-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
7522; X86-NEXT:    retl # encoding: [0xc3]
7523;
7524; X64-LABEL: test_mask_adds_epu8_rrkz_256:
7525; X64:       # %bb.0:
7526; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7527; X64-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
7528; X64-NEXT:    retq # encoding: [0xc3]
7529  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7530  ret <32 x i8> %res
7531}
7532
7533define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
7534; X86-LABEL: test_mask_adds_epu8_rm_256:
7535; X86:       # %bb.0:
7536; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7537; X86-NEXT:    vpaddusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x00]
7538; X86-NEXT:    retl # encoding: [0xc3]
7539;
7540; X64-LABEL: test_mask_adds_epu8_rm_256:
7541; X64:       # %bb.0:
7542; X64-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07]
7543; X64-NEXT:    retq # encoding: [0xc3]
7544  %b = load <32 x i8>, <32 x i8>* %ptr_b
7545  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7546  ret <32 x i8> %res
7547}
7548
7549define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
7550; X86-LABEL: test_mask_adds_epu8_rmk_256:
7551; X86:       # %bb.0:
7552; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7553; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7554; X86-NEXT:    vpaddusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x08]
7555; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7556; X86-NEXT:    retl # encoding: [0xc3]
7557;
7558; X64-LABEL: test_mask_adds_epu8_rmk_256:
7559; X64:       # %bb.0:
7560; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7561; X64-NEXT:    vpaddusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f]
7562; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7563; X64-NEXT:    retq # encoding: [0xc3]
7564  %b = load <32 x i8>, <32 x i8>* %ptr_b
7565  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7566  ret <32 x i8> %res
7567}
7568
7569define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
7570; X86-LABEL: test_mask_adds_epu8_rmkz_256:
7571; X86:       # %bb.0:
7572; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7573; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7574; X86-NEXT:    vpaddusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x00]
7575; X86-NEXT:    retl # encoding: [0xc3]
7576;
7577; X64-LABEL: test_mask_adds_epu8_rmkz_256:
7578; X64:       # %bb.0:
7579; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7580; X64-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07]
7581; X64-NEXT:    retq # encoding: [0xc3]
7582  %b = load <32 x i8>, <32 x i8>* %ptr_b
7583  %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7584  ret <32 x i8> %res
7585}
7586
7587declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
7588
7589define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
7590; CHECK-LABEL: test_mask_subs_epu8_rr_128:
7591; CHECK:       # %bb.0:
7592; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
7593; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7594  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7595  ret <16 x i8> %res
7596}
7597
7598define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
7599; X86-LABEL: test_mask_subs_epu8_rrk_128:
7600; X86:       # %bb.0:
7601; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7602; X86-NEXT:    vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
7603; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7604; X86-NEXT:    retl # encoding: [0xc3]
7605;
7606; X64-LABEL: test_mask_subs_epu8_rrk_128:
7607; X64:       # %bb.0:
7608; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7609; X64-NEXT:    vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
7610; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7611; X64-NEXT:    retq # encoding: [0xc3]
7612  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7613  ret <16 x i8> %res
7614}
7615
7616define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
7617; X86-LABEL: test_mask_subs_epu8_rrkz_128:
7618; X86:       # %bb.0:
7619; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7620; X86-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
7621; X86-NEXT:    retl # encoding: [0xc3]
7622;
7623; X64-LABEL: test_mask_subs_epu8_rrkz_128:
7624; X64:       # %bb.0:
7625; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7626; X64-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
7627; X64-NEXT:    retq # encoding: [0xc3]
7628  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7629  ret <16 x i8> %res
7630}
7631
7632define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
7633; X86-LABEL: test_mask_subs_epu8_rm_128:
7634; X86:       # %bb.0:
7635; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7636; X86-NEXT:    vpsubusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x00]
7637; X86-NEXT:    retl # encoding: [0xc3]
7638;
7639; X64-LABEL: test_mask_subs_epu8_rm_128:
7640; X64:       # %bb.0:
7641; X64-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07]
7642; X64-NEXT:    retq # encoding: [0xc3]
7643  %b = load <16 x i8>, <16 x i8>* %ptr_b
7644  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
7645  ret <16 x i8> %res
7646}
7647
7648define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
7649; X86-LABEL: test_mask_subs_epu8_rmk_128:
7650; X86:       # %bb.0:
7651; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7652; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7653; X86-NEXT:    vpsubusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x08]
7654; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7655; X86-NEXT:    retl # encoding: [0xc3]
7656;
7657; X64-LABEL: test_mask_subs_epu8_rmk_128:
7658; X64:       # %bb.0:
7659; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7660; X64-NEXT:    vpsubusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f]
7661; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7662; X64-NEXT:    retq # encoding: [0xc3]
7663  %b = load <16 x i8>, <16 x i8>* %ptr_b
7664  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
7665  ret <16 x i8> %res
7666}
7667
7668define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
7669; X86-LABEL: test_mask_subs_epu8_rmkz_128:
7670; X86:       # %bb.0:
7671; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7672; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7673; X86-NEXT:    vpsubusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x00]
7674; X86-NEXT:    retl # encoding: [0xc3]
7675;
7676; X64-LABEL: test_mask_subs_epu8_rmkz_128:
7677; X64:       # %bb.0:
7678; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7679; X64-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07]
7680; X64-NEXT:    retq # encoding: [0xc3]
7681  %b = load <16 x i8>, <16 x i8>* %ptr_b
7682  %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
7683  ret <16 x i8> %res
7684}
7685
7686declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
7687
7688define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
7689; CHECK-LABEL: test_mask_subs_epu8_rr_256:
7690; CHECK:       # %bb.0:
7691; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
7692; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7693  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7694  ret <32 x i8> %res
7695}
7696
7697define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
7698; X86-LABEL: test_mask_subs_epu8_rrk_256:
7699; X86:       # %bb.0:
7700; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7701; X86-NEXT:    vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
7702; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7703; X86-NEXT:    retl # encoding: [0xc3]
7704;
7705; X64-LABEL: test_mask_subs_epu8_rrk_256:
7706; X64:       # %bb.0:
7707; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7708; X64-NEXT:    vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
7709; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7710; X64-NEXT:    retq # encoding: [0xc3]
7711  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7712  ret <32 x i8> %res
7713}
7714
7715define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
7716; X86-LABEL: test_mask_subs_epu8_rrkz_256:
7717; X86:       # %bb.0:
7718; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
7719; X86-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
7720; X86-NEXT:    retl # encoding: [0xc3]
7721;
7722; X64-LABEL: test_mask_subs_epu8_rrkz_256:
7723; X64:       # %bb.0:
7724; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7725; X64-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
7726; X64-NEXT:    retq # encoding: [0xc3]
7727  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7728  ret <32 x i8> %res
7729}
7730
7731define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
7732; X86-LABEL: test_mask_subs_epu8_rm_256:
7733; X86:       # %bb.0:
7734; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7735; X86-NEXT:    vpsubusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x00]
7736; X86-NEXT:    retl # encoding: [0xc3]
7737;
7738; X64-LABEL: test_mask_subs_epu8_rm_256:
7739; X64:       # %bb.0:
7740; X64-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07]
7741; X64-NEXT:    retq # encoding: [0xc3]
7742  %b = load <32 x i8>, <32 x i8>* %ptr_b
7743  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
7744  ret <32 x i8> %res
7745}
7746
7747define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
7748; X86-LABEL: test_mask_subs_epu8_rmk_256:
7749; X86:       # %bb.0:
7750; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7751; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7752; X86-NEXT:    vpsubusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x08]
7753; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7754; X86-NEXT:    retl # encoding: [0xc3]
7755;
7756; X64-LABEL: test_mask_subs_epu8_rmk_256:
7757; X64:       # %bb.0:
7758; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7759; X64-NEXT:    vpsubusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f]
7760; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7761; X64-NEXT:    retq # encoding: [0xc3]
7762  %b = load <32 x i8>, <32 x i8>* %ptr_b
7763  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
7764  ret <32 x i8> %res
7765}
7766
7767define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
7768; X86-LABEL: test_mask_subs_epu8_rmkz_256:
7769; X86:       # %bb.0:
7770; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7771; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
7772; X86-NEXT:    vpsubusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x00]
7773; X86-NEXT:    retl # encoding: [0xc3]
7774;
7775; X64-LABEL: test_mask_subs_epu8_rmkz_256:
7776; X64:       # %bb.0:
7777; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7778; X64-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07]
7779; X64-NEXT:    retq # encoding: [0xc3]
7780  %b = load <32 x i8>, <32 x i8>* %ptr_b
7781  %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
7782  ret <32 x i8> %res
7783}
7784
7785declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
7786
7787define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
7788; CHECK-LABEL: test_mask_adds_epi16_rr_128:
7789; CHECK:       # %bb.0:
7790; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
7791; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7792  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7793  ret <8 x i16> %res
7794}
7795
7796define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7797; X86-LABEL: test_mask_adds_epi16_rrk_128:
7798; X86:       # %bb.0:
7799; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7800; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7801; X86-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
7802; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7803; X86-NEXT:    retl # encoding: [0xc3]
7804;
7805; X64-LABEL: test_mask_adds_epi16_rrk_128:
7806; X64:       # %bb.0:
7807; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7808; X64-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
7809; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7810; X64-NEXT:    retq # encoding: [0xc3]
7811  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7812  ret <8 x i16> %res
7813}
7814
7815define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
7816; X86-LABEL: test_mask_adds_epi16_rrkz_128:
7817; X86:       # %bb.0:
7818; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7819; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7820; X86-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
7821; X86-NEXT:    retl # encoding: [0xc3]
7822;
7823; X64-LABEL: test_mask_adds_epi16_rrkz_128:
7824; X64:       # %bb.0:
7825; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7826; X64-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
7827; X64-NEXT:    retq # encoding: [0xc3]
7828  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7829  ret <8 x i16> %res
7830}
7831
7832define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
7833; X86-LABEL: test_mask_adds_epi16_rm_128:
7834; X86:       # %bb.0:
7835; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7836; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x00]
7837; X86-NEXT:    retl # encoding: [0xc3]
7838;
7839; X64-LABEL: test_mask_adds_epi16_rm_128:
7840; X64:       # %bb.0:
7841; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07]
7842; X64-NEXT:    retq # encoding: [0xc3]
7843  %b = load <8 x i16>, <8 x i16>* %ptr_b
7844  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
7845  ret <8 x i16> %res
7846}
7847
7848define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
7849; X86-LABEL: test_mask_adds_epi16_rmk_128:
7850; X86:       # %bb.0:
7851; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7852; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7853; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7854; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08]
7855; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7856; X86-NEXT:    retl # encoding: [0xc3]
7857;
7858; X64-LABEL: test_mask_adds_epi16_rmk_128:
7859; X64:       # %bb.0:
7860; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7861; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
7862; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7863; X64-NEXT:    retq # encoding: [0xc3]
7864  %b = load <8 x i16>, <8 x i16>* %ptr_b
7865  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
7866  ret <8 x i16> %res
7867}
7868
7869define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
7870; X86-LABEL: test_mask_adds_epi16_rmkz_128:
7871; X86:       # %bb.0:
7872; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7873; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7874; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
7875; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00]
7876; X86-NEXT:    retl # encoding: [0xc3]
7877;
7878; X64-LABEL: test_mask_adds_epi16_rmkz_128:
7879; X64:       # %bb.0:
7880; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7881; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
7882; X64-NEXT:    retq # encoding: [0xc3]
7883  %b = load <8 x i16>, <8 x i16>* %ptr_b
7884  %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
7885  ret <8 x i16> %res
7886}
7887
7888declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
7889
7890define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
7891; CHECK-LABEL: test_mask_adds_epi16_rr_256:
7892; CHECK:       # %bb.0:
7893; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
7894; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7895  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7896  ret <16 x i16> %res
7897}
7898
7899define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
7900; X86-LABEL: test_mask_adds_epi16_rrk_256:
7901; X86:       # %bb.0:
7902; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7903; X86-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
7904; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7905; X86-NEXT:    retl # encoding: [0xc3]
7906;
7907; X64-LABEL: test_mask_adds_epi16_rrk_256:
7908; X64:       # %bb.0:
7909; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7910; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
7911; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7912; X64-NEXT:    retq # encoding: [0xc3]
7913  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7914  ret <16 x i16> %res
7915}
7916
7917define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
7918; X86-LABEL: test_mask_adds_epi16_rrkz_256:
7919; X86:       # %bb.0:
7920; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7921; X86-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
7922; X86-NEXT:    retl # encoding: [0xc3]
7923;
7924; X64-LABEL: test_mask_adds_epi16_rrkz_256:
7925; X64:       # %bb.0:
7926; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
7927; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
7928; X64-NEXT:    retq # encoding: [0xc3]
7929  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7930  ret <16 x i16> %res
7931}
7932
7933define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
7934; X86-LABEL: test_mask_adds_epi16_rm_256:
7935; X86:       # %bb.0:
7936; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7937; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x00]
7938; X86-NEXT:    retl # encoding: [0xc3]
7939;
7940; X64-LABEL: test_mask_adds_epi16_rm_256:
7941; X64:       # %bb.0:
7942; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07]
7943; X64-NEXT:    retq # encoding: [0xc3]
7944  %b = load <16 x i16>, <16 x i16>* %ptr_b
7945  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
7946  ret <16 x i16> %res
7947}
7948
7949define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
7950; X86-LABEL: test_mask_adds_epi16_rmk_256:
7951; X86:       # %bb.0:
7952; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7953; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7954; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08]
7955; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7956; X86-NEXT:    retl # encoding: [0xc3]
7957;
7958; X64-LABEL: test_mask_adds_epi16_rmk_256:
7959; X64:       # %bb.0:
7960; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7961; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
7962; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7963; X64-NEXT:    retq # encoding: [0xc3]
7964  %b = load <16 x i16>, <16 x i16>* %ptr_b
7965  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
7966  ret <16 x i16> %res
7967}
7968
7969define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
7970; X86-LABEL: test_mask_adds_epi16_rmkz_256:
7971; X86:       # %bb.0:
7972; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
7973; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7974; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00]
7975; X86-NEXT:    retl # encoding: [0xc3]
7976;
7977; X64-LABEL: test_mask_adds_epi16_rmkz_256:
7978; X64:       # %bb.0:
7979; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
7980; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
7981; X64-NEXT:    retq # encoding: [0xc3]
7982  %b = load <16 x i16>, <16 x i16>* %ptr_b
7983  %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
7984  ret <16 x i16> %res
7985}
7986
7987declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
7988
7989declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) #0
7990
7991define <8 x i16> @test_test_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
7992; X86-LABEL: test_test_subs_epi16_rrk_128:
7993; X86:       # %bb.0:
7994; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7995; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
7996; X86-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
7997; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7998; X86-NEXT:    retl # encoding: [0xc3]
7999;
8000; X64-LABEL: test_test_subs_epi16_rrk_128:
8001; X64:       # %bb.0:
8002; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8003; X64-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8004; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8005; X64-NEXT:    retq # encoding: [0xc3]
8006  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8007  %2 = bitcast i8 %mask to <8 x i1>
8008  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8009  ret <8 x i16> %3
8010}
8011
8012define <8 x i16> @test_test_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8013; X86-LABEL: test_test_subs_epi16_rrkz_128:
8014; X86:       # %bb.0:
8015; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8016; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8017; X86-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8018; X86-NEXT:    retl # encoding: [0xc3]
8019;
8020; X64-LABEL: test_test_subs_epi16_rrkz_128:
8021; X64:       # %bb.0:
8022; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8023; X64-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8024; X64-NEXT:    retq # encoding: [0xc3]
8025  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8026  %2 = bitcast i8 %mask to <8 x i1>
8027  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8028  ret <8 x i16> %3
8029}
8030
8031define <8 x i16> @test_test_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
8032; X86-LABEL: test_test_subs_epi16_rmk_128:
8033; X86:       # %bb.0:
8034; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8035; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8036; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8037; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08]
8038; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8039; X86-NEXT:    retl # encoding: [0xc3]
8040;
8041; X64-LABEL: test_test_subs_epi16_rmk_128:
8042; X64:       # %bb.0:
8043; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8044; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
8045; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8046; X64-NEXT:    retq # encoding: [0xc3]
8047  %b = load <8 x i16>, <8 x i16>* %ptr_b
8048  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8049  %2 = bitcast i8 %mask to <8 x i1>
8050  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8051  ret <8 x i16> %3
8052}
8053
8054define <8 x i16> @test_test_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
8055; X86-LABEL: test_test_subs_epi16_rmkz_128:
8056; X86:       # %bb.0:
8057; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8058; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8059; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8060; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00]
8061; X86-NEXT:    retl # encoding: [0xc3]
8062;
8063; X64-LABEL: test_test_subs_epi16_rmkz_128:
8064; X64:       # %bb.0:
8065; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8066; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
8067; X64-NEXT:    retq # encoding: [0xc3]
8068  %b = load <8 x i16>, <8 x i16>* %ptr_b
8069  %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b)
8070  %2 = bitcast i8 %mask to <8 x i1>
8071  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8072  ret <8 x i16> %3
8073}
8074
8075declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) #0
8076
8077define <16 x i16> @test_test_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8078; X86-LABEL: test_test_subs_epi16_rrk_256:
8079; X86:       # %bb.0:
8080; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8081; X86-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8082; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8083; X86-NEXT:    retl # encoding: [0xc3]
8084;
8085; X64-LABEL: test_test_subs_epi16_rrk_256:
8086; X64:       # %bb.0:
8087; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8088; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8089; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8090; X64-NEXT:    retq # encoding: [0xc3]
8091  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8092  %2 = bitcast i16 %mask to <16 x i1>
8093  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8094  ret <16 x i16> %3
8095}
8096
8097define <16 x i16> @test_test_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8098; X86-LABEL: test_test_subs_epi16_rrkz_256:
8099; X86:       # %bb.0:
8100; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8101; X86-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8102; X86-NEXT:    retl # encoding: [0xc3]
8103;
8104; X64-LABEL: test_test_subs_epi16_rrkz_256:
8105; X64:       # %bb.0:
8106; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8107; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8108; X64-NEXT:    retq # encoding: [0xc3]
8109  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8110  %2 = bitcast i16 %mask to <16 x i1>
8111  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8112  ret <16 x i16> %3
8113}
8114
8115define <16 x i16> @test_test_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
8116; X86-LABEL: test_test_subs_epi16_rmk_256:
8117; X86:       # %bb.0:
8118; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8119; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8120; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08]
8121; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8122; X86-NEXT:    retl # encoding: [0xc3]
8123;
8124; X64-LABEL: test_test_subs_epi16_rmk_256:
8125; X64:       # %bb.0:
8126; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8127; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
8128; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8129; X64-NEXT:    retq # encoding: [0xc3]
8130  %b = load <16 x i16>, <16 x i16>* %ptr_b
8131  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8132  %2 = bitcast i16 %mask to <16 x i1>
8133  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8134  ret <16 x i16> %3
8135}
8136
8137define <16 x i16> @test_test_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
8138; X86-LABEL: test_test_subs_epi16_rmkz_256:
8139; X86:       # %bb.0:
8140; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8141; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8142; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00]
8143; X86-NEXT:    retl # encoding: [0xc3]
8144;
8145; X64-LABEL: test_test_subs_epi16_rmkz_256:
8146; X64:       # %bb.0:
8147; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8148; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
8149; X64-NEXT:    retq # encoding: [0xc3]
8150  %b = load <16 x i16>, <16 x i16>* %ptr_b
8151  %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b)
8152  %2 = bitcast i16 %mask to <16 x i1>
8153  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8154  ret <16 x i16> %3
8155}
8156
8157declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) #0
8158
8159define <16 x i8> @test_test_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8160; X86-LABEL: test_test_subs_epi8_rrk_128:
8161; X86:       # %bb.0:
8162; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8163; X86-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
8164; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8165; X86-NEXT:    retl # encoding: [0xc3]
8166;
8167; X64-LABEL: test_test_subs_epi8_rrk_128:
8168; X64:       # %bb.0:
8169; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8170; X64-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
8171; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8172; X64-NEXT:    retq # encoding: [0xc3]
8173  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8174  %2 = bitcast i16 %mask to <16 x i1>
8175  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8176  ret <16 x i8> %3
8177}
8178
8179define <16 x i8> @test_test_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8180; X86-LABEL: test_test_subs_epi8_rrkz_128:
8181; X86:       # %bb.0:
8182; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8183; X86-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
8184; X86-NEXT:    retl # encoding: [0xc3]
8185;
8186; X64-LABEL: test_test_subs_epi8_rrkz_128:
8187; X64:       # %bb.0:
8188; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8189; X64-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
8190; X64-NEXT:    retq # encoding: [0xc3]
8191  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8192  %2 = bitcast i16 %mask to <16 x i1>
8193  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8194  ret <16 x i8> %3
8195}
8196
8197define <16 x i8> @test_test_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
8198; X86-LABEL: test_test_subs_epi8_rmk_128:
8199; X86:       # %bb.0:
8200; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8201; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8202; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08]
8203; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8204; X86-NEXT:    retl # encoding: [0xc3]
8205;
8206; X64-LABEL: test_test_subs_epi8_rmk_128:
8207; X64:       # %bb.0:
8208; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8209; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
8210; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8211; X64-NEXT:    retq # encoding: [0xc3]
8212  %b = load <16 x i8>, <16 x i8>* %ptr_b
8213  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8214  %2 = bitcast i16 %mask to <16 x i1>
8215  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8216  ret <16 x i8> %3
8217}
8218
8219define <16 x i8> @test_test_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
8220; X86-LABEL: test_test_subs_epi8_rmkz_128:
8221; X86:       # %bb.0:
8222; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8223; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8224; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00]
8225; X86-NEXT:    retl # encoding: [0xc3]
8226;
8227; X64-LABEL: test_test_subs_epi8_rmkz_128:
8228; X64:       # %bb.0:
8229; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8230; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
8231; X64-NEXT:    retq # encoding: [0xc3]
8232  %b = load <16 x i8>, <16 x i8>* %ptr_b
8233  %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b)
8234  %2 = bitcast i16 %mask to <16 x i1>
8235  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8236  ret <16 x i8> %3
8237}
8238
8239declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) #0
8240
8241define <32 x i8> @test_test_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8242; X86-LABEL: test_test_subs_epi8_rrk_256:
8243; X86:       # %bb.0:
8244; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8245; X86-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
8246; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8247; X86-NEXT:    retl # encoding: [0xc3]
8248;
8249; X64-LABEL: test_test_subs_epi8_rrk_256:
8250; X64:       # %bb.0:
8251; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8252; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
8253; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8254; X64-NEXT:    retq # encoding: [0xc3]
8255  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8256  %2 = bitcast i32 %mask to <32 x i1>
8257  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8258  ret <32 x i8> %3
8259}
8260
8261define <32 x i8> @test_test_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
8262; X86-LABEL: test_test_subs_epi8_rrkz_256:
8263; X86:       # %bb.0:
8264; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8265; X86-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
8266; X86-NEXT:    retl # encoding: [0xc3]
8267;
8268; X64-LABEL: test_test_subs_epi8_rrkz_256:
8269; X64:       # %bb.0:
8270; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8271; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
8272; X64-NEXT:    retq # encoding: [0xc3]
8273  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8274  %2 = bitcast i32 %mask to <32 x i1>
8275  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8276  ret <32 x i8> %3
8277}
8278
8279define <32 x i8> @test_test_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
8280; X86-LABEL: test_test_subs_epi8_rmk_256:
8281; X86:       # %bb.0:
8282; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8283; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8284; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08]
8285; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8286; X86-NEXT:    retl # encoding: [0xc3]
8287;
8288; X64-LABEL: test_test_subs_epi8_rmk_256:
8289; X64:       # %bb.0:
8290; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8291; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
8292; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8293; X64-NEXT:    retq # encoding: [0xc3]
8294  %b = load <32 x i8>, <32 x i8>* %ptr_b
8295  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8296  %2 = bitcast i32 %mask to <32 x i1>
8297  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8298  ret <32 x i8> %3
8299}
8300
8301define <32 x i8> @test_test_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
8302; X86-LABEL: test_test_subs_epi8_rmkz_256:
8303; X86:       # %bb.0:
8304; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8305; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8306; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00]
8307; X86-NEXT:    retl # encoding: [0xc3]
8308;
8309; X64-LABEL: test_test_subs_epi8_rmkz_256:
8310; X64:       # %bb.0:
8311; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8312; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
8313; X64-NEXT:    retq # encoding: [0xc3]
8314  %b = load <32 x i8>, <32 x i8>* %ptr_b
8315  %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b)
8316  %2 = bitcast i32 %mask to <32 x i1>
8317  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8318  ret <32 x i8> %3
8319}
8320
8321define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
8322; CHECK-LABEL: test_mask_subs_epi16_rr_128:
8323; CHECK:       # %bb.0:
8324; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
8325; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8326  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
8327  ret <8 x i16> %res
8328}
8329
8330define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
8331; X86-LABEL: test_mask_subs_epi16_rrk_128:
8332; X86:       # %bb.0:
8333; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8334; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8335; X86-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8336; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8337; X86-NEXT:    retl # encoding: [0xc3]
8338;
8339; X64-LABEL: test_mask_subs_epi16_rrk_128:
8340; X64:       # %bb.0:
8341; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8342; X64-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
8343; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8344; X64-NEXT:    retq # encoding: [0xc3]
8345  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
8346  ret <8 x i16> %res
8347}
8348
8349define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8350; X86-LABEL: test_mask_subs_epi16_rrkz_128:
8351; X86:       # %bb.0:
8352; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8353; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8354; X86-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8355; X86-NEXT:    retl # encoding: [0xc3]
8356;
8357; X64-LABEL: test_mask_subs_epi16_rrkz_128:
8358; X64:       # %bb.0:
8359; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8360; X64-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
8361; X64-NEXT:    retq # encoding: [0xc3]
8362  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
8363  ret <8 x i16> %res
8364}
8365
8366define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
8367; X86-LABEL: test_mask_subs_epi16_rm_128:
8368; X86:       # %bb.0:
8369; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8370; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x00]
8371; X86-NEXT:    retl # encoding: [0xc3]
8372;
8373; X64-LABEL: test_mask_subs_epi16_rm_128:
8374; X64:       # %bb.0:
8375; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07]
8376; X64-NEXT:    retq # encoding: [0xc3]
8377  %b = load <8 x i16>, <8 x i16>* %ptr_b
8378  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
8379  ret <8 x i16> %res
8380}
8381
8382define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
8383; X86-LABEL: test_mask_subs_epi16_rmk_128:
8384; X86:       # %bb.0:
8385; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8386; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8387; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8388; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08]
8389; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8390; X86-NEXT:    retl # encoding: [0xc3]
8391;
8392; X64-LABEL: test_mask_subs_epi16_rmk_128:
8393; X64:       # %bb.0:
8394; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8395; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
8396; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8397; X64-NEXT:    retq # encoding: [0xc3]
8398  %b = load <8 x i16>, <8 x i16>* %ptr_b
8399  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
8400  ret <8 x i16> %res
8401}
8402
8403define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
8404; X86-LABEL: test_mask_subs_epi16_rmkz_128:
8405; X86:       # %bb.0:
8406; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8407; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8408; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8409; X86-NEXT:    vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00]
8410; X86-NEXT:    retl # encoding: [0xc3]
8411;
8412; X64-LABEL: test_mask_subs_epi16_rmkz_128:
8413; X64:       # %bb.0:
8414; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8415; X64-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
8416; X64-NEXT:    retq # encoding: [0xc3]
8417  %b = load <8 x i16>, <8 x i16>* %ptr_b
8418  %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
8419  ret <8 x i16> %res
8420}
8421
8422declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
8423
8424define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
8425; CHECK-LABEL: test_mask_subs_epi16_rr_256:
8426; CHECK:       # %bb.0:
8427; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
8428; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8429  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
8430  ret <16 x i16> %res
8431}
8432
8433define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8434; X86-LABEL: test_mask_subs_epi16_rrk_256:
8435; X86:       # %bb.0:
8436; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8437; X86-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8438; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8439; X86-NEXT:    retl # encoding: [0xc3]
8440;
8441; X64-LABEL: test_mask_subs_epi16_rrk_256:
8442; X64:       # %bb.0:
8443; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8444; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
8445; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8446; X64-NEXT:    retq # encoding: [0xc3]
8447  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
8448  ret <16 x i16> %res
8449}
8450
8451define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8452; X86-LABEL: test_mask_subs_epi16_rrkz_256:
8453; X86:       # %bb.0:
8454; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8455; X86-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8456; X86-NEXT:    retl # encoding: [0xc3]
8457;
8458; X64-LABEL: test_mask_subs_epi16_rrkz_256:
8459; X64:       # %bb.0:
8460; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8461; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
8462; X64-NEXT:    retq # encoding: [0xc3]
8463  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
8464  ret <16 x i16> %res
8465}
8466
8467define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
8468; X86-LABEL: test_mask_subs_epi16_rm_256:
8469; X86:       # %bb.0:
8470; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8471; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x00]
8472; X86-NEXT:    retl # encoding: [0xc3]
8473;
8474; X64-LABEL: test_mask_subs_epi16_rm_256:
8475; X64:       # %bb.0:
8476; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07]
8477; X64-NEXT:    retq # encoding: [0xc3]
8478  %b = load <16 x i16>, <16 x i16>* %ptr_b
8479  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
8480  ret <16 x i16> %res
8481}
8482
8483define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
8484; X86-LABEL: test_mask_subs_epi16_rmk_256:
8485; X86:       # %bb.0:
8486; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8487; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8488; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08]
8489; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8490; X86-NEXT:    retl # encoding: [0xc3]
8491;
8492; X64-LABEL: test_mask_subs_epi16_rmk_256:
8493; X64:       # %bb.0:
8494; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8495; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
8496; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8497; X64-NEXT:    retq # encoding: [0xc3]
8498  %b = load <16 x i16>, <16 x i16>* %ptr_b
8499  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
8500  ret <16 x i16> %res
8501}
8502
8503define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
8504; X86-LABEL: test_mask_subs_epi16_rmkz_256:
8505; X86:       # %bb.0:
8506; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8507; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8508; X86-NEXT:    vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00]
8509; X86-NEXT:    retl # encoding: [0xc3]
8510;
8511; X64-LABEL: test_mask_subs_epi16_rmkz_256:
8512; X64:       # %bb.0:
8513; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8514; X64-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
8515; X64-NEXT:    retq # encoding: [0xc3]
8516  %b = load <16 x i16>, <16 x i16>* %ptr_b
8517  %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
8518  ret <16 x i16> %res
8519}
8520
8521declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
8522
8523declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) #0
8524
8525define <8 x i16> @test_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
8526; X86-LABEL: test_adds_epi16_rrk_128:
8527; X86:       # %bb.0:
8528; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8529; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8530; X86-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
8531; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8532; X86-NEXT:    retl # encoding: [0xc3]
8533;
8534; X64-LABEL: test_adds_epi16_rrk_128:
8535; X64:       # %bb.0:
8536; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8537; X64-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
8538; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8539; X64-NEXT:    retq # encoding: [0xc3]
8540  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8541  %2 = bitcast i8 %mask to <8 x i1>
8542  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8543  ret <8 x i16> %3
8544}
8545
8546define <8 x i16> @test_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
8547; X86-LABEL: test_adds_epi16_rrkz_128:
8548; X86:       # %bb.0:
8549; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8550; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
8551; X86-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
8552; X86-NEXT:    retl # encoding: [0xc3]
8553;
8554; X64-LABEL: test_adds_epi16_rrkz_128:
8555; X64:       # %bb.0:
8556; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8557; X64-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
8558; X64-NEXT:    retq # encoding: [0xc3]
8559  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8560  %2 = bitcast i8 %mask to <8 x i1>
8561  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8562  ret <8 x i16> %3
8563}
8564
8565define <8 x i16> @test_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
8566; X86-LABEL: test_adds_epi16_rmk_128:
8567; X86:       # %bb.0:
8568; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8569; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8570; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8571; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08]
8572; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8573; X86-NEXT:    retl # encoding: [0xc3]
8574;
8575; X64-LABEL: test_adds_epi16_rmk_128:
8576; X64:       # %bb.0:
8577; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8578; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
8579; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8580; X64-NEXT:    retq # encoding: [0xc3]
8581  %b = load <8 x i16>, <8 x i16>* %ptr_b
8582  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8583  %2 = bitcast i8 %mask to <8 x i1>
8584  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
8585  ret <8 x i16> %3
8586}
8587
8588define <8 x i16> @test_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
8589; X86-LABEL: test_adds_epi16_rmkz_128:
8590; X86:       # %bb.0:
8591; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8592; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8593; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
8594; X86-NEXT:    vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00]
8595; X86-NEXT:    retl # encoding: [0xc3]
8596;
8597; X64-LABEL: test_adds_epi16_rmkz_128:
8598; X64:       # %bb.0:
8599; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8600; X64-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
8601; X64-NEXT:    retq # encoding: [0xc3]
8602  %b = load <8 x i16>, <8 x i16>* %ptr_b
8603  %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
8604  %2 = bitcast i8 %mask to <8 x i1>
8605  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
8606  ret <8 x i16> %3
8607}
8608
8609declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) #0
8610
8611define <16 x i16> @test_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
8612; X86-LABEL: test_adds_epi16_rrk_256:
8613; X86:       # %bb.0:
8614; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8615; X86-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
8616; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8617; X86-NEXT:    retl # encoding: [0xc3]
8618;
8619; X64-LABEL: test_adds_epi16_rrk_256:
8620; X64:       # %bb.0:
8621; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8622; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
8623; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8624; X64-NEXT:    retq # encoding: [0xc3]
8625  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8626  %2 = bitcast i16 %mask to <16 x i1>
8627  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8628  ret <16 x i16> %3
8629}
8630
8631define <16 x i16> @test_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
8632; X86-LABEL: test_adds_epi16_rrkz_256:
8633; X86:       # %bb.0:
8634; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8635; X86-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
8636; X86-NEXT:    retl # encoding: [0xc3]
8637;
8638; X64-LABEL: test_adds_epi16_rrkz_256:
8639; X64:       # %bb.0:
8640; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8641; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
8642; X64-NEXT:    retq # encoding: [0xc3]
8643  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8644  %2 = bitcast i16 %mask to <16 x i1>
8645  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8646  ret <16 x i16> %3
8647}
8648
8649define <16 x i16> @test_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
8650; X86-LABEL: test_adds_epi16_rmk_256:
8651; X86:       # %bb.0:
8652; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8653; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8654; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08]
8655; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8656; X86-NEXT:    retl # encoding: [0xc3]
8657;
8658; X64-LABEL: test_adds_epi16_rmk_256:
8659; X64:       # %bb.0:
8660; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8661; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
8662; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8663; X64-NEXT:    retq # encoding: [0xc3]
8664  %b = load <16 x i16>, <16 x i16>* %ptr_b
8665  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8666  %2 = bitcast i16 %mask to <16 x i1>
8667  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
8668  ret <16 x i16> %3
8669}
8670
8671define <16 x i16> @test_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
8672; X86-LABEL: test_adds_epi16_rmkz_256:
8673; X86:       # %bb.0:
8674; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8675; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8676; X86-NEXT:    vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00]
8677; X86-NEXT:    retl # encoding: [0xc3]
8678;
8679; X64-LABEL: test_adds_epi16_rmkz_256:
8680; X64:       # %bb.0:
8681; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8682; X64-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
8683; X64-NEXT:    retq # encoding: [0xc3]
8684  %b = load <16 x i16>, <16 x i16>* %ptr_b
8685  %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b)
8686  %2 = bitcast i16 %mask to <16 x i1>
8687  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
8688  ret <16 x i16> %3
8689}
8690
8691declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) #0
8692
8693define <16 x i8> @test_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8694; X86-LABEL: test_adds_epi8_rrk_128:
8695; X86:       # %bb.0:
8696; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8697; X86-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8698; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8699; X86-NEXT:    retl # encoding: [0xc3]
8700;
8701; X64-LABEL: test_adds_epi8_rrk_128:
8702; X64:       # %bb.0:
8703; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8704; X64-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8705; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8706; X64-NEXT:    retq # encoding: [0xc3]
8707  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8708  %2 = bitcast i16 %mask to <16 x i1>
8709  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8710  ret <16 x i8> %3
8711}
8712
8713define <16 x i8> @test_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8714; X86-LABEL: test_adds_epi8_rrkz_128:
8715; X86:       # %bb.0:
8716; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8717; X86-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8718; X86-NEXT:    retl # encoding: [0xc3]
8719;
8720; X64-LABEL: test_adds_epi8_rrkz_128:
8721; X64:       # %bb.0:
8722; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8723; X64-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8724; X64-NEXT:    retq # encoding: [0xc3]
8725  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8726  %2 = bitcast i16 %mask to <16 x i1>
8727  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8728  ret <16 x i8> %3
8729}
8730
8731define <16 x i8> @test_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
8732; X86-LABEL: test_adds_epi8_rm_128:
8733; X86:       # %bb.0:
8734; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8735; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00]
8736; X86-NEXT:    retl # encoding: [0xc3]
8737;
8738; X64-LABEL: test_adds_epi8_rm_128:
8739; X64:       # %bb.0:
8740; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
8741; X64-NEXT:    retq # encoding: [0xc3]
8742  %b = load <16 x i8>, <16 x i8>* %ptr_b
8743  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8744  ret <16 x i8> %1
8745}
8746
8747define <16 x i8> @test_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
8748; X86-LABEL: test_adds_epi8_rmk_128:
8749; X86:       # %bb.0:
8750; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8751; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8752; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08]
8753; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8754; X86-NEXT:    retl # encoding: [0xc3]
8755;
8756; X64-LABEL: test_adds_epi8_rmk_128:
8757; X64:       # %bb.0:
8758; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8759; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
8760; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8761; X64-NEXT:    retq # encoding: [0xc3]
8762  %b = load <16 x i8>, <16 x i8>* %ptr_b
8763  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8764  %2 = bitcast i16 %mask to <16 x i1>
8765  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
8766  ret <16 x i8> %3
8767}
8768
8769define <16 x i8> @test_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
8770; X86-LABEL: test_adds_epi8_rmkz_128:
8771; X86:       # %bb.0:
8772; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8773; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8774; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00]
8775; X86-NEXT:    retl # encoding: [0xc3]
8776;
8777; X64-LABEL: test_adds_epi8_rmkz_128:
8778; X64:       # %bb.0:
8779; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8780; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
8781; X64-NEXT:    retq # encoding: [0xc3]
8782  %b = load <16 x i8>, <16 x i8>* %ptr_b
8783  %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b)
8784  %2 = bitcast i16 %mask to <16 x i1>
8785  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
8786  ret <16 x i8> %3
8787}
8788
8789declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) #0
8790
8791define <32 x i8> @test_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8792; X86-LABEL: test_adds_epi8_rrk_256:
8793; X86:       # %bb.0:
8794; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8795; X86-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8796; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8797; X86-NEXT:    retl # encoding: [0xc3]
8798;
8799; X64-LABEL: test_adds_epi8_rrk_256:
8800; X64:       # %bb.0:
8801; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8802; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8803; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8804; X64-NEXT:    retq # encoding: [0xc3]
8805  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8806  %2 = bitcast i32 %mask to <32 x i1>
8807  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8808  ret <32 x i8> %3
8809}
8810
8811define <32 x i8> @test_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
8812; X86-LABEL: test_adds_epi8_rrkz_256:
8813; X86:       # %bb.0:
8814; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8815; X86-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
8816; X86-NEXT:    retl # encoding: [0xc3]
8817;
8818; X64-LABEL: test_adds_epi8_rrkz_256:
8819; X64:       # %bb.0:
8820; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8821; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
8822; X64-NEXT:    retq # encoding: [0xc3]
8823  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8824  %2 = bitcast i32 %mask to <32 x i1>
8825  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8826  ret <32 x i8> %3
8827}
8828
8829define <32 x i8> @test_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
8830; X86-LABEL: test_adds_epi8_rmk_256:
8831; X86:       # %bb.0:
8832; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8833; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8834; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08]
8835; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8836; X86-NEXT:    retl # encoding: [0xc3]
8837;
8838; X64-LABEL: test_adds_epi8_rmk_256:
8839; X64:       # %bb.0:
8840; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8841; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
8842; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8843; X64-NEXT:    retq # encoding: [0xc3]
8844  %b = load <32 x i8>, <32 x i8>* %ptr_b
8845  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8846  %2 = bitcast i32 %mask to <32 x i1>
8847  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
8848  ret <32 x i8> %3
8849}
8850
8851define <32 x i8> @test_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
8852; X86-LABEL: test_adds_epi8_rmkz_256:
8853; X86:       # %bb.0:
8854; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8855; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
8856; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00]
8857; X86-NEXT:    retl # encoding: [0xc3]
8858;
8859; X64-LABEL: test_adds_epi8_rmkz_256:
8860; X64:       # %bb.0:
8861; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8862; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
8863; X64-NEXT:    retq # encoding: [0xc3]
8864  %b = load <32 x i8>, <32 x i8>* %ptr_b
8865  %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b)
8866  %2 = bitcast i32 %mask to <32 x i1>
8867  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
8868  ret <32 x i8> %3
8869}
8870
8871define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
8872; CHECK-LABEL: test_mask_adds_epi8_rr_128:
8873; CHECK:       # %bb.0:
8874; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
8875; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8876  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
8877  ret <16 x i8> %res
8878}
8879
8880define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
8881; X86-LABEL: test_mask_adds_epi8_rrk_128:
8882; X86:       # %bb.0:
8883; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8884; X86-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8885; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8886; X86-NEXT:    retl # encoding: [0xc3]
8887;
8888; X64-LABEL: test_mask_adds_epi8_rrk_128:
8889; X64:       # %bb.0:
8890; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8891; X64-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
8892; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8893; X64-NEXT:    retq # encoding: [0xc3]
8894  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
8895  ret <16 x i8> %res
8896}
8897
8898define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
8899; X86-LABEL: test_mask_adds_epi8_rrkz_128:
8900; X86:       # %bb.0:
8901; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8902; X86-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8903; X86-NEXT:    retl # encoding: [0xc3]
8904;
8905; X64-LABEL: test_mask_adds_epi8_rrkz_128:
8906; X64:       # %bb.0:
8907; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8908; X64-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
8909; X64-NEXT:    retq # encoding: [0xc3]
8910  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
8911  ret <16 x i8> %res
8912}
8913
8914define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
8915; X86-LABEL: test_mask_adds_epi8_rm_128:
8916; X86:       # %bb.0:
8917; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8918; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00]
8919; X86-NEXT:    retl # encoding: [0xc3]
8920;
8921; X64-LABEL: test_mask_adds_epi8_rm_128:
8922; X64:       # %bb.0:
8923; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
8924; X64-NEXT:    retq # encoding: [0xc3]
8925  %b = load <16 x i8>, <16 x i8>* %ptr_b
8926  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
8927  ret <16 x i8> %res
8928}
8929
8930define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
8931; X86-LABEL: test_mask_adds_epi8_rmk_128:
8932; X86:       # %bb.0:
8933; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8934; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8935; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08]
8936; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8937; X86-NEXT:    retl # encoding: [0xc3]
8938;
8939; X64-LABEL: test_mask_adds_epi8_rmk_128:
8940; X64:       # %bb.0:
8941; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8942; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
8943; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8944; X64-NEXT:    retq # encoding: [0xc3]
8945  %b = load <16 x i8>, <16 x i8>* %ptr_b
8946  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
8947  ret <16 x i8> %res
8948}
8949
8950define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
8951; X86-LABEL: test_mask_adds_epi8_rmkz_128:
8952; X86:       # %bb.0:
8953; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
8954; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8955; X86-NEXT:    vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00]
8956; X86-NEXT:    retl # encoding: [0xc3]
8957;
8958; X64-LABEL: test_mask_adds_epi8_rmkz_128:
8959; X64:       # %bb.0:
8960; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
8961; X64-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
8962; X64-NEXT:    retq # encoding: [0xc3]
8963  %b = load <16 x i8>, <16 x i8>* %ptr_b
8964  %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
8965  ret <16 x i8> %res
8966}
8967
8968declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
8969
8970define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
8971; CHECK-LABEL: test_mask_adds_epi8_rr_256:
8972; CHECK:       # %bb.0:
8973; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
8974; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8975  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
8976  ret <32 x i8> %res
8977}
8978
8979define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
8980; X86-LABEL: test_mask_adds_epi8_rrk_256:
8981; X86:       # %bb.0:
8982; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
8983; X86-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8984; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8985; X86-NEXT:    retl # encoding: [0xc3]
8986;
8987; X64-LABEL: test_mask_adds_epi8_rrk_256:
8988; X64:       # %bb.0:
8989; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
8990; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
8991; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8992; X64-NEXT:    retq # encoding: [0xc3]
8993  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
8994  ret <32 x i8> %res
8995}
8996
8997define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
8998; X86-LABEL: test_mask_adds_epi8_rrkz_256:
8999; X86:       # %bb.0:
9000; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9001; X86-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
9002; X86-NEXT:    retl # encoding: [0xc3]
9003;
9004; X64-LABEL: test_mask_adds_epi8_rrkz_256:
9005; X64:       # %bb.0:
9006; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9007; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
9008; X64-NEXT:    retq # encoding: [0xc3]
9009  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9010  ret <32 x i8> %res
9011}
9012
9013define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
9014; X86-LABEL: test_mask_adds_epi8_rm_256:
9015; X86:       # %bb.0:
9016; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9017; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x00]
9018; X86-NEXT:    retl # encoding: [0xc3]
9019;
9020; X64-LABEL: test_mask_adds_epi8_rm_256:
9021; X64:       # %bb.0:
9022; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07]
9023; X64-NEXT:    retq # encoding: [0xc3]
9024  %b = load <32 x i8>, <32 x i8>* %ptr_b
9025  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9026  ret <32 x i8> %res
9027}
9028
9029define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
9030; X86-LABEL: test_mask_adds_epi8_rmk_256:
9031; X86:       # %bb.0:
9032; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9033; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9034; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08]
9035; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9036; X86-NEXT:    retl # encoding: [0xc3]
9037;
9038; X64-LABEL: test_mask_adds_epi8_rmk_256:
9039; X64:       # %bb.0:
9040; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9041; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
9042; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9043; X64-NEXT:    retq # encoding: [0xc3]
9044  %b = load <32 x i8>, <32 x i8>* %ptr_b
9045  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9046  ret <32 x i8> %res
9047}
9048
9049define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
9050; X86-LABEL: test_mask_adds_epi8_rmkz_256:
9051; X86:       # %bb.0:
9052; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9053; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9054; X86-NEXT:    vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00]
9055; X86-NEXT:    retl # encoding: [0xc3]
9056;
9057; X64-LABEL: test_mask_adds_epi8_rmkz_256:
9058; X64:       # %bb.0:
9059; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9060; X64-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
9061; X64-NEXT:    retq # encoding: [0xc3]
9062  %b = load <32 x i8>, <32 x i8>* %ptr_b
9063  %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9064  ret <32 x i8> %res
9065}
9066
9067declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
9068
9069define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
9070; CHECK-LABEL: test_mask_subs_epi8_rr_128:
9071; CHECK:       # %bb.0:
9072; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
9073; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9074  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
9075  ret <16 x i8> %res
9076}
9077
9078define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
9079; X86-LABEL: test_mask_subs_epi8_rrk_128:
9080; X86:       # %bb.0:
9081; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9082; X86-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
9083; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9084; X86-NEXT:    retl # encoding: [0xc3]
9085;
9086; X64-LABEL: test_mask_subs_epi8_rrk_128:
9087; X64:       # %bb.0:
9088; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9089; X64-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
9090; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9091; X64-NEXT:    retq # encoding: [0xc3]
9092  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
9093  ret <16 x i8> %res
9094}
9095
9096define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
9097; X86-LABEL: test_mask_subs_epi8_rrkz_128:
9098; X86:       # %bb.0:
9099; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9100; X86-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
9101; X86-NEXT:    retl # encoding: [0xc3]
9102;
9103; X64-LABEL: test_mask_subs_epi8_rrkz_128:
9104; X64:       # %bb.0:
9105; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9106; X64-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
9107; X64-NEXT:    retq # encoding: [0xc3]
9108  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
9109  ret <16 x i8> %res
9110}
9111
9112define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
9113; X86-LABEL: test_mask_subs_epi8_rm_128:
9114; X86:       # %bb.0:
9115; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9116; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x00]
9117; X86-NEXT:    retl # encoding: [0xc3]
9118;
9119; X64-LABEL: test_mask_subs_epi8_rm_128:
9120; X64:       # %bb.0:
9121; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07]
9122; X64-NEXT:    retq # encoding: [0xc3]
9123  %b = load <16 x i8>, <16 x i8>* %ptr_b
9124  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
9125  ret <16 x i8> %res
9126}
9127
9128define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
9129; X86-LABEL: test_mask_subs_epi8_rmk_128:
9130; X86:       # %bb.0:
9131; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9132; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9133; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08]
9134; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9135; X86-NEXT:    retl # encoding: [0xc3]
9136;
9137; X64-LABEL: test_mask_subs_epi8_rmk_128:
9138; X64:       # %bb.0:
9139; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9140; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
9141; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9142; X64-NEXT:    retq # encoding: [0xc3]
9143  %b = load <16 x i8>, <16 x i8>* %ptr_b
9144  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
9145  ret <16 x i8> %res
9146}
9147
9148define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
9149; X86-LABEL: test_mask_subs_epi8_rmkz_128:
9150; X86:       # %bb.0:
9151; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9152; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9153; X86-NEXT:    vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00]
9154; X86-NEXT:    retl # encoding: [0xc3]
9155;
9156; X64-LABEL: test_mask_subs_epi8_rmkz_128:
9157; X64:       # %bb.0:
9158; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9159; X64-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
9160; X64-NEXT:    retq # encoding: [0xc3]
9161  %b = load <16 x i8>, <16 x i8>* %ptr_b
9162  %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
9163  ret <16 x i8> %res
9164}
9165
9166declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
9167
9168define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
9169; CHECK-LABEL: test_mask_subs_epi8_rr_256:
9170; CHECK:       # %bb.0:
9171; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
9172; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9173  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9174  ret <32 x i8> %res
9175}
9176
9177define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
9178; X86-LABEL: test_mask_subs_epi8_rrk_256:
9179; X86:       # %bb.0:
9180; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9181; X86-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
9182; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9183; X86-NEXT:    retl # encoding: [0xc3]
9184;
9185; X64-LABEL: test_mask_subs_epi8_rrk_256:
9186; X64:       # %bb.0:
9187; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9188; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
9189; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9190; X64-NEXT:    retq # encoding: [0xc3]
9191  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9192  ret <32 x i8> %res
9193}
9194
9195define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
9196; X86-LABEL: test_mask_subs_epi8_rrkz_256:
9197; X86:       # %bb.0:
9198; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
9199; X86-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
9200; X86-NEXT:    retl # encoding: [0xc3]
9201;
9202; X64-LABEL: test_mask_subs_epi8_rrkz_256:
9203; X64:       # %bb.0:
9204; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9205; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
9206; X64-NEXT:    retq # encoding: [0xc3]
9207  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9208  ret <32 x i8> %res
9209}
9210
9211define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
9212; X86-LABEL: test_mask_subs_epi8_rm_256:
9213; X86:       # %bb.0:
9214; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9215; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x00]
9216; X86-NEXT:    retl # encoding: [0xc3]
9217;
9218; X64-LABEL: test_mask_subs_epi8_rm_256:
9219; X64:       # %bb.0:
9220; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07]
9221; X64-NEXT:    retq # encoding: [0xc3]
9222  %b = load <32 x i8>, <32 x i8>* %ptr_b
9223  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
9224  ret <32 x i8> %res
9225}
9226
9227define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
9228; X86-LABEL: test_mask_subs_epi8_rmk_256:
9229; X86:       # %bb.0:
9230; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9231; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9232; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08]
9233; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9234; X86-NEXT:    retl # encoding: [0xc3]
9235;
9236; X64-LABEL: test_mask_subs_epi8_rmk_256:
9237; X64:       # %bb.0:
9238; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9239; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
9240; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9241; X64-NEXT:    retq # encoding: [0xc3]
9242  %b = load <32 x i8>, <32 x i8>* %ptr_b
9243  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
9244  ret <32 x i8> %res
9245}
9246
9247define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
9248; X86-LABEL: test_mask_subs_epi8_rmkz_256:
9249; X86:       # %bb.0:
9250; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9251; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
9252; X86-NEXT:    vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00]
9253; X86-NEXT:    retl # encoding: [0xc3]
9254;
9255; X64-LABEL: test_mask_subs_epi8_rmkz_256:
9256; X64:       # %bb.0:
9257; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
9258; X64-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
9259; X64-NEXT:    retq # encoding: [0xc3]
9260  %b = load <32 x i8>, <32 x i8>* %ptr_b
9261  %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
9262  ret <32 x i8> %res
9263}
9264
9265declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
9266
9267declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9268
9269define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9270; CHECK-LABEL: test_int_x86_avx512_psrav16_hi:
9271; CHECK:       # %bb.0:
9272; CHECK-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1]
9273; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9274  %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9275  ret <16 x i16> %res
9276}
9277
9278define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9279; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi:
9280; X86:       # %bb.0:
9281; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9282; X86-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
9283; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9284; X86-NEXT:    retl # encoding: [0xc3]
9285;
9286; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi:
9287; X64:       # %bb.0:
9288; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9289; X64-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
9290; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9291; X64-NEXT:    retq # encoding: [0xc3]
9292  %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9293  ret <16 x i16> %res
9294}
9295
9296define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9297; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
9298; X86:       # %bb.0:
9299; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9300; X86-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
9301; X86-NEXT:    retl # encoding: [0xc3]
9302;
9303; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
9304; X64:       # %bb.0:
9305; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9306; X64-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
9307; X64-NEXT:    retq # encoding: [0xc3]
9308  %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9309  ret <16 x i16> %res
9310}
9311
9312declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9313
9314define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9315; CHECK-LABEL: test_int_x86_avx512_psrav8_hi:
9316; CHECK:       # %bb.0:
9317; CHECK-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1]
9318; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9319  %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9320  ret <8 x i16> %res
9321}
9322
9323define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9324; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi:
9325; X86:       # %bb.0:
9326; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9327; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9328; X86-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
9329; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9330; X86-NEXT:    retl # encoding: [0xc3]
9331;
9332; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi:
9333; X64:       # %bb.0:
9334; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9335; X64-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
9336; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9337; X64-NEXT:    retq # encoding: [0xc3]
9338  %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9339  ret <8 x i16> %res
9340}
9341
9342define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9343; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
9344; X86:       # %bb.0:
9345; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9346; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9347; X86-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
9348; X86-NEXT:    retl # encoding: [0xc3]
9349;
9350; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
9351; X64:       # %bb.0:
9352; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9353; X64-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
9354; X64-NEXT:    retq # encoding: [0xc3]
9355  %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9356  ret <8 x i16> %res
9357}
9358
9359declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9360
9361define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9362; CHECK-LABEL: test_int_x86_avx512_psllv16_hi:
9363; CHECK:       # %bb.0:
9364; CHECK-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1]
9365; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9366  %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9367  ret <16 x i16> %res
9368}
9369
9370define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9371; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi:
9372; X86:       # %bb.0:
9373; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9374; X86-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
9375; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9376; X86-NEXT:    retl # encoding: [0xc3]
9377;
9378; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi:
9379; X64:       # %bb.0:
9380; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9381; X64-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
9382; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9383; X64-NEXT:    retq # encoding: [0xc3]
9384  %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9385  ret <16 x i16> %res
9386}
9387
9388define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9389; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
9390; X86:       # %bb.0:
9391; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9392; X86-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
9393; X86-NEXT:    retl # encoding: [0xc3]
9394;
9395; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
9396; X64:       # %bb.0:
9397; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9398; X64-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
9399; X64-NEXT:    retq # encoding: [0xc3]
9400  %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9401  ret <16 x i16> %res
9402}
9403
9404declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9405
9406define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9407; CHECK-LABEL: test_int_x86_avx512_psllv8_hi:
9408; CHECK:       # %bb.0:
9409; CHECK-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1]
9410; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9411  %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9412  ret <8 x i16> %res
9413}
9414
9415define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9416; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi:
9417; X86:       # %bb.0:
9418; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9419; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9420; X86-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
9421; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9422; X86-NEXT:    retl # encoding: [0xc3]
9423;
9424; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi:
9425; X64:       # %bb.0:
9426; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9427; X64-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
9428; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9429; X64-NEXT:    retq # encoding: [0xc3]
9430  %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9431  ret <8 x i16> %res
9432}
9433
9434define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9435; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
9436; X86:       # %bb.0:
9437; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9438; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9439; X86-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
9440; X86-NEXT:    retl # encoding: [0xc3]
9441;
9442; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
9443; X64:       # %bb.0:
9444; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9445; X64-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
9446; X64-NEXT:    retq # encoding: [0xc3]
9447  %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9448  ret <8 x i16> %res
9449}
9450
9451declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
9452
9453define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
9454; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi:
9455; CHECK:       # %bb.0:
9456; CHECK-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1]
9457; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9458  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
9459  ret <16 x i16> %res
9460}
9461
9462define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
9463; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
9464; X86:       # %bb.0:
9465; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9466; X86-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
9467; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9468; X86-NEXT:    retl # encoding: [0xc3]
9469;
9470; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
9471; X64:       # %bb.0:
9472; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9473; X64-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
9474; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9475; X64-NEXT:    retq # encoding: [0xc3]
9476  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
9477  ret <16 x i16> %res
9478}
9479
9480define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
9481; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
9482; X86:       # %bb.0:
9483; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9484; X86-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
9485; X86-NEXT:    retl # encoding: [0xc3]
9486;
9487; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
9488; X64:       # %bb.0:
9489; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9490; X64-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
9491; X64-NEXT:    retq # encoding: [0xc3]
9492  %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
9493  ret <16 x i16> %res
9494}
9495
9496declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
9497
9498define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
9499; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi:
9500; CHECK:       # %bb.0:
9501; CHECK-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1]
9502; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9503  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
9504  ret <8 x i16> %res
9505}
9506
9507define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
9508; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
9509; X86:       # %bb.0:
9510; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9511; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9512; X86-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
9513; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9514; X86-NEXT:    retl # encoding: [0xc3]
9515;
9516; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
9517; X64:       # %bb.0:
9518; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9519; X64-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
9520; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9521; X64-NEXT:    retq # encoding: [0xc3]
9522  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
9523  ret <8 x i16> %res
9524}
9525
9526define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
9527; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
9528; X86:       # %bb.0:
9529; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9530; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
9531; X86-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
9532; X86-NEXT:    retl # encoding: [0xc3]
9533;
9534; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
9535; X64:       # %bb.0:
9536; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9537; X64-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
9538; X64-NEXT:    retq # encoding: [0xc3]
9539  %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
9540  ret <8 x i16> %res
9541}
9542
9543declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
9544
9545define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1) {
9546; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256:
9547; CHECK:       # %bb.0:
9548; CHECK-NEXT:    vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
9549; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9550; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9551  %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
9552  ret <16 x i8> %res
9553}
9554
9555define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
9556; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
9557; X86:       # %bb.0:
9558; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9559; X86-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
9560; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9561; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9562; X86-NEXT:    retl # encoding: [0xc3]
9563;
9564; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
9565; X64:       # %bb.0:
9566; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9567; X64-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
9568; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9569; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9570; X64-NEXT:    retq # encoding: [0xc3]
9571  %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
9572  ret <16 x i8> %res
9573}
9574
9575define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) {
9576; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
9577; X86:       # %bb.0:
9578; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9579; X86-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
9580; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9581; X86-NEXT:    retl # encoding: [0xc3]
9582;
9583; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
9584; X64:       # %bb.0:
9585; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
9586; X64-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
9587; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9588; X64-NEXT:    retq # encoding: [0xc3]
9589  %res = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
9590  ret <16 x i8> %res
9591}
9592