1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
3; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
4
5define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
6; AVX512BW-LABEL: test_cmp_b_512:
7; AVX512BW:       ## BB#0:
8; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
9; AVX512BW-NEXT:    kmovq %k0, %rax
10; AVX512BW-NEXT:    vpcmpltb %zmm1, %zmm0, %k0
11; AVX512BW-NEXT:    kmovq %k0, %rcx
12; AVX512BW-NEXT:    addq %rax, %rcx
13; AVX512BW-NEXT:    vpcmpleb %zmm1, %zmm0, %k0
14; AVX512BW-NEXT:    kmovq %k0, %rax
15; AVX512BW-NEXT:    addq %rcx, %rax
16; AVX512BW-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0
17; AVX512BW-NEXT:    kmovq %k0, %rcx
18; AVX512BW-NEXT:    addq %rax, %rcx
19; AVX512BW-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0
20; AVX512BW-NEXT:    kmovq %k0, %rax
21; AVX512BW-NEXT:    addq %rcx, %rax
22; AVX512BW-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0
23; AVX512BW-NEXT:    kmovq %k0, %rcx
24; AVX512BW-NEXT:    addq %rax, %rcx
25; AVX512BW-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0
26; AVX512BW-NEXT:    kmovq %k0, %rdx
27; AVX512BW-NEXT:    addq %rcx, %rdx
28; AVX512BW-NEXT:    vpcmpordb %zmm1, %zmm0, %k0
29; AVX512BW-NEXT:    kmovq %k0, %rax
30; AVX512BW-NEXT:    addq %rdx, %rax
31; AVX512BW-NEXT:    retq
32;
33; AVX512F-32-LABEL: test_cmp_b_512:
34; AVX512F-32:       # BB#0:
35; AVX512F-32-NEXT:    subl $68, %esp
36; AVX512F-32-NEXT:  .Ltmp0:
37; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
38; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
39; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
40; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
41; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
42; AVX512F-32-NEXT:    vpcmpltb %zmm1, %zmm0, %k0
43; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
44; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
45; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
46; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0
47; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
48; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
49; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
50; AVX512F-32-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0
51; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
52; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
53; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
54; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0
55; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
56; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
57; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
58; AVX512F-32-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0
59; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
60; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
61; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
62; AVX512F-32-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0
63; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
64; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
65; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
66; AVX512F-32-NEXT:    vpcmpordb %zmm1, %zmm0, %k0
67; AVX512F-32-NEXT:    kmovq %k0, (%esp)
68; AVX512F-32-NEXT:    addl (%esp), %eax
69; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
70; AVX512F-32-NEXT:    addl $68, %esp
71; AVX512F-32-NEXT:    retl
72  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
73  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
74  %ret1 = add i64 %res0, %res1
75  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
76  %ret2 = add i64 %ret1, %res2
77  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
78  %ret3 = add i64 %ret2, %res3
79  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
80  %ret4 = add i64 %ret3, %res4
81  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
82  %ret5 = add i64 %ret4, %res5
83  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
84  %ret6 = add i64 %ret5, %res6
85  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
86  %ret7 = add i64 %ret6, %res7
87  ret i64 %ret7
88}
89
90define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
91; AVX512BW-LABEL: test_mask_cmp_b_512:
92; AVX512BW:       ## BB#0:
93; AVX512BW-NEXT:    kmovq %rdi, %k1
94; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
95; AVX512BW-NEXT:    kmovq %k0, %rax
96; AVX512BW-NEXT:    vpcmpltb %zmm1, %zmm0, %k0 {%k1}
97; AVX512BW-NEXT:    kmovq %k0, %rcx
98; AVX512BW-NEXT:    addq %rax, %rcx
99; AVX512BW-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1}
100; AVX512BW-NEXT:    kmovq %k0, %rax
101; AVX512BW-NEXT:    addq %rcx, %rax
102; AVX512BW-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
103; AVX512BW-NEXT:    kmovq %k0, %rcx
104; AVX512BW-NEXT:    addq %rax, %rcx
105; AVX512BW-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
106; AVX512BW-NEXT:    kmovq %k0, %rax
107; AVX512BW-NEXT:    addq %rcx, %rax
108; AVX512BW-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
109; AVX512BW-NEXT:    kmovq %k0, %rcx
110; AVX512BW-NEXT:    addq %rax, %rcx
111; AVX512BW-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
112; AVX512BW-NEXT:    kmovq %k0, %rdx
113; AVX512BW-NEXT:    addq %rcx, %rdx
114; AVX512BW-NEXT:    vpcmpordb %zmm1, %zmm0, %k0 {%k1}
115; AVX512BW-NEXT:    kmovq %k0, %rax
116; AVX512BW-NEXT:    addq %rdx, %rax
117; AVX512BW-NEXT:    retq
118;
119; AVX512F-32-LABEL: test_mask_cmp_b_512:
120; AVX512F-32:       # BB#0:
121; AVX512F-32-NEXT:    subl $68, %esp
122; AVX512F-32-NEXT:  .Ltmp1:
123; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
124; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
125; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
126; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
127; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
128; AVX512F-32-NEXT:    kmovq %k0, (%esp)
129; AVX512F-32-NEXT:    movl (%esp), %eax
130; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
131; AVX512F-32-NEXT:    vpcmpltb %zmm1, %zmm0, %k0 {%k1}
132; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
133; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
134; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
135; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1}
136; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
137; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
138; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
139; AVX512F-32-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
140; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
141; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
142; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
143; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
144; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
145; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
146; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
147; AVX512F-32-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
148; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
149; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
150; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
151; AVX512F-32-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
152; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
153; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
154; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
155; AVX512F-32-NEXT:    vpcmpordb %zmm1, %zmm0, %k0 {%k1}
156; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
157; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
158; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
159; AVX512F-32-NEXT:    addl $68, %esp
160; AVX512F-32-NEXT:    retl
161  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
162  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
163  %ret1 = add i64 %res0, %res1
164  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
165  %ret2 = add i64 %ret1, %res2
166  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
167  %ret3 = add i64 %ret2, %res3
168  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
169  %ret4 = add i64 %ret3, %res4
170  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
171  %ret5 = add i64 %ret4, %res5
172  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
173  %ret6 = add i64 %ret5, %res6
174  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
175  %ret7 = add i64 %ret6, %res7
176  ret i64 %ret7
177}
178
179declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
180
181define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
182; AVX512BW-LABEL: test_ucmp_b_512:
183; AVX512BW:       ## BB#0:
184; AVX512BW-NEXT:    vpcmpequb %zmm1, %zmm0, %k0
185; AVX512BW-NEXT:    kmovq %k0, %rax
186; AVX512BW-NEXT:    vpcmpltub %zmm1, %zmm0, %k0
187; AVX512BW-NEXT:    kmovq %k0, %rcx
188; AVX512BW-NEXT:    addq %rax, %rcx
189; AVX512BW-NEXT:    vpcmpleub %zmm1, %zmm0, %k0
190; AVX512BW-NEXT:    kmovq %k0, %rax
191; AVX512BW-NEXT:    addq %rcx, %rax
192; AVX512BW-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0
193; AVX512BW-NEXT:    kmovq %k0, %rcx
194; AVX512BW-NEXT:    addq %rax, %rcx
195; AVX512BW-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0
196; AVX512BW-NEXT:    kmovq %k0, %rax
197; AVX512BW-NEXT:    addq %rcx, %rax
198; AVX512BW-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0
199; AVX512BW-NEXT:    kmovq %k0, %rcx
200; AVX512BW-NEXT:    addq %rax, %rcx
201; AVX512BW-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
202; AVX512BW-NEXT:    kmovq %k0, %rdx
203; AVX512BW-NEXT:    addq %rcx, %rdx
204; AVX512BW-NEXT:    vpcmpordub %zmm1, %zmm0, %k0
205; AVX512BW-NEXT:    kmovq %k0, %rax
206; AVX512BW-NEXT:    addq %rdx, %rax
207; AVX512BW-NEXT:    retq
208;
209; AVX512F-32-LABEL: test_ucmp_b_512:
210; AVX512F-32:       # BB#0:
211; AVX512F-32-NEXT:    subl $68, %esp
212; AVX512F-32-NEXT:  .Ltmp2:
213; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
214; AVX512F-32-NEXT:    vpcmpequb %zmm1, %zmm0, %k0
215; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
216; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
217; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
218; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0
219; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
220; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
221; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
222; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0
223; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
224; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
225; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
226; AVX512F-32-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0
227; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
228; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
229; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
230; AVX512F-32-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0
231; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
232; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
233; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
234; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0
235; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
236; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
237; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
238; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
239; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
240; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
241; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
242; AVX512F-32-NEXT:    vpcmpordub %zmm1, %zmm0, %k0
243; AVX512F-32-NEXT:    kmovq %k0, (%esp)
244; AVX512F-32-NEXT:    addl (%esp), %eax
245; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
246; AVX512F-32-NEXT:    addl $68, %esp
247; AVX512F-32-NEXT:    retl
248  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
249  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
250  %ret1 = add i64 %res0, %res1
251  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
252  %ret2 = add i64 %ret1, %res2
253  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
254  %ret3 = add i64 %ret2, %res3
255  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
256  %ret4 = add i64 %ret3, %res4
257  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
258  %ret5 = add i64 %ret4, %res5
259  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
260  %ret6 = add i64 %ret5, %res6
261  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
262  %ret7 = add i64 %ret6, %res7
263  ret i64 %ret7
264}
265
266define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
267; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512:
268; AVX512BW:       ## BB#0:
269; AVX512BW-NEXT:    kmovq %rdi, %k1
270; AVX512BW-NEXT:    vpcmpequb %zmm1, %zmm0, %k0 {%k1}
271; AVX512BW-NEXT:    kmovq %k0, %rax
272; AVX512BW-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1}
273; AVX512BW-NEXT:    kmovq %k0, %rcx
274; AVX512BW-NEXT:    addq %rax, %rcx
275; AVX512BW-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1}
276; AVX512BW-NEXT:    kmovq %k0, %rax
277; AVX512BW-NEXT:    addq %rcx, %rax
278; AVX512BW-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
279; AVX512BW-NEXT:    kmovq %k0, %rcx
280; AVX512BW-NEXT:    addq %rax, %rcx
281; AVX512BW-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
282; AVX512BW-NEXT:    kmovq %k0, %rax
283; AVX512BW-NEXT:    addq %rcx, %rax
284; AVX512BW-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
285; AVX512BW-NEXT:    kmovq %k0, %rcx
286; AVX512BW-NEXT:    addq %rax, %rcx
287; AVX512BW-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
288; AVX512BW-NEXT:    kmovq %k0, %rdx
289; AVX512BW-NEXT:    addq %rcx, %rdx
290; AVX512BW-NEXT:    vpcmpordub %zmm1, %zmm0, %k0 {%k1}
291; AVX512BW-NEXT:    kmovq %k0, %rax
292; AVX512BW-NEXT:    addq %rdx, %rax
293; AVX512BW-NEXT:    retq
294;
295; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
296; AVX512F-32:       # BB#0:
297; AVX512F-32-NEXT:    subl $68, %esp
298; AVX512F-32-NEXT:  .Ltmp3:
299; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
300; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
301; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
302; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
303; AVX512F-32-NEXT:    vpcmpequb %zmm1, %zmm0, %k0 {%k1}
304; AVX512F-32-NEXT:    kmovq %k0, (%esp)
305; AVX512F-32-NEXT:    movl (%esp), %eax
306; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
307; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1}
308; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
309; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
310; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
311; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1}
312; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
313; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
314; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
315; AVX512F-32-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
316; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
317; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
318; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
319; AVX512F-32-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
320; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
321; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
322; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
323; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
324; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
325; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
326; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
327; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
328; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
329; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
330; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
331; AVX512F-32-NEXT:    vpcmpordub %zmm1, %zmm0, %k0 {%k1}
332; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
333; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
334; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
335; AVX512F-32-NEXT:    addl $68, %esp
336; AVX512F-32-NEXT:    retl
337  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
338  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
339  %ret1 = add i64 %res0, %res1
340  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
341  %ret2 = add i64 %ret1, %res2
342  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
343  %ret3 = add i64 %ret2, %res3
344  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
345  %ret4 = add i64 %ret3, %res4
346  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
347  %ret5 = add i64 %ret4, %res5
348  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
349  %ret6 = add i64 %ret5, %res6
350  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
351  %ret7 = add i64 %ret6, %res7
352  ret i64 %ret7
353}
354
355declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
356
357define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
358; AVX512BW-LABEL: test_cmp_w_512:
359; AVX512BW:       ## BB#0:
360; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
361; AVX512BW-NEXT:    kmovd %k0, %eax
362; AVX512BW-NEXT:    vpcmpltw %zmm1, %zmm0, %k0
363; AVX512BW-NEXT:    kmovd %k0, %ecx
364; AVX512BW-NEXT:    addl %eax, %ecx
365; AVX512BW-NEXT:    vpcmplew %zmm1, %zmm0, %k0
366; AVX512BW-NEXT:    kmovd %k0, %eax
367; AVX512BW-NEXT:    addl %ecx, %eax
368; AVX512BW-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0
369; AVX512BW-NEXT:    kmovd %k0, %ecx
370; AVX512BW-NEXT:    addl %eax, %ecx
371; AVX512BW-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0
372; AVX512BW-NEXT:    kmovd %k0, %eax
373; AVX512BW-NEXT:    addl %ecx, %eax
374; AVX512BW-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0
375; AVX512BW-NEXT:    kmovd %k0, %ecx
376; AVX512BW-NEXT:    addl %eax, %ecx
377; AVX512BW-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0
378; AVX512BW-NEXT:    kmovd %k0, %edx
379; AVX512BW-NEXT:    addl %ecx, %edx
380; AVX512BW-NEXT:    vpcmpordw %zmm1, %zmm0, %k0
381; AVX512BW-NEXT:    kmovd %k0, %eax
382; AVX512BW-NEXT:    addl %edx, %eax
383; AVX512BW-NEXT:    retq
384;
385; AVX512F-32-LABEL: test_cmp_w_512:
386; AVX512F-32:       # BB#0:
387; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
388; AVX512F-32-NEXT:    kmovd %k0, %eax
389; AVX512F-32-NEXT:    vpcmpltw %zmm1, %zmm0, %k0
390; AVX512F-32-NEXT:    kmovd %k0, %ecx
391; AVX512F-32-NEXT:    addl %eax, %ecx
392; AVX512F-32-NEXT:    vpcmplew %zmm1, %zmm0, %k0
393; AVX512F-32-NEXT:    kmovd %k0, %eax
394; AVX512F-32-NEXT:    addl %ecx, %eax
395; AVX512F-32-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0
396; AVX512F-32-NEXT:    kmovd %k0, %ecx
397; AVX512F-32-NEXT:    addl %eax, %ecx
398; AVX512F-32-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0
399; AVX512F-32-NEXT:    kmovd %k0, %eax
400; AVX512F-32-NEXT:    addl %ecx, %eax
401; AVX512F-32-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0
402; AVX512F-32-NEXT:    kmovd %k0, %ecx
403; AVX512F-32-NEXT:    addl %eax, %ecx
404; AVX512F-32-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0
405; AVX512F-32-NEXT:    kmovd %k0, %edx
406; AVX512F-32-NEXT:    addl %ecx, %edx
407; AVX512F-32-NEXT:    vpcmpordw %zmm1, %zmm0, %k0
408; AVX512F-32-NEXT:    kmovd %k0, %eax
409; AVX512F-32-NEXT:    addl %edx, %eax
410; AVX512F-32-NEXT:    retl
411  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
412  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
413  %ret1 = add i32 %res0, %res1
414  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
415  %ret2 = add i32 %ret1, %res2
416  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
417  %ret3 = add i32 %ret2, %res3
418  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
419  %ret4 = add i32 %ret3, %res4
420  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
421  %ret5 = add i32 %ret4, %res5
422  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
423  %ret6 = add i32 %ret5, %res6
424  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
425  %ret7 = add i32 %ret6, %res7
426  ret i32 %ret7
427}
428
429define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
430; AVX512BW-LABEL: test_mask_cmp_w_512:
431; AVX512BW:       ## BB#0:
432; AVX512BW-NEXT:    kmovd %edi, %k1
433; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
434; AVX512BW-NEXT:    kmovd %k0, %eax
435; AVX512BW-NEXT:    vpcmpltw %zmm1, %zmm0, %k0 {%k1}
436; AVX512BW-NEXT:    kmovd %k0, %ecx
437; AVX512BW-NEXT:    addl %eax, %ecx
438; AVX512BW-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1}
439; AVX512BW-NEXT:    kmovd %k0, %eax
440; AVX512BW-NEXT:    addl %ecx, %eax
441; AVX512BW-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
442; AVX512BW-NEXT:    kmovd %k0, %ecx
443; AVX512BW-NEXT:    addl %eax, %ecx
444; AVX512BW-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
445; AVX512BW-NEXT:    kmovd %k0, %eax
446; AVX512BW-NEXT:    addl %ecx, %eax
447; AVX512BW-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
448; AVX512BW-NEXT:    kmovd %k0, %ecx
449; AVX512BW-NEXT:    addl %eax, %ecx
450; AVX512BW-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
451; AVX512BW-NEXT:    kmovd %k0, %edx
452; AVX512BW-NEXT:    addl %ecx, %edx
453; AVX512BW-NEXT:    vpcmpordw %zmm1, %zmm0, %k0 {%k1}
454; AVX512BW-NEXT:    kmovd %k0, %eax
455; AVX512BW-NEXT:    addl %edx, %eax
456; AVX512BW-NEXT:    retq
457;
458; AVX512F-32-LABEL: test_mask_cmp_w_512:
459; AVX512F-32:       # BB#0:
460; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
461; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
462; AVX512F-32-NEXT:    kmovd %k0, %eax
463; AVX512F-32-NEXT:    vpcmpltw %zmm1, %zmm0, %k0 {%k1}
464; AVX512F-32-NEXT:    kmovd %k0, %ecx
465; AVX512F-32-NEXT:    addl %eax, %ecx
466; AVX512F-32-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1}
467; AVX512F-32-NEXT:    kmovd %k0, %eax
468; AVX512F-32-NEXT:    addl %ecx, %eax
469; AVX512F-32-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
470; AVX512F-32-NEXT:    kmovd %k0, %ecx
471; AVX512F-32-NEXT:    addl %eax, %ecx
472; AVX512F-32-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
473; AVX512F-32-NEXT:    kmovd %k0, %eax
474; AVX512F-32-NEXT:    addl %ecx, %eax
475; AVX512F-32-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
476; AVX512F-32-NEXT:    kmovd %k0, %ecx
477; AVX512F-32-NEXT:    addl %eax, %ecx
478; AVX512F-32-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
479; AVX512F-32-NEXT:    kmovd %k0, %edx
480; AVX512F-32-NEXT:    addl %ecx, %edx
481; AVX512F-32-NEXT:    vpcmpordw %zmm1, %zmm0, %k0 {%k1}
482; AVX512F-32-NEXT:    kmovd %k0, %eax
483; AVX512F-32-NEXT:    addl %edx, %eax
484; AVX512F-32-NEXT:    retl
485  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
486  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
487  %ret1 = add i32 %res0, %res1
488  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
489  %ret2 = add i32 %ret1, %res2
490  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
491  %ret3 = add i32 %ret2, %res3
492  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
493  %ret4 = add i32 %ret3, %res4
494  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
495  %ret5 = add i32 %ret4, %res5
496  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
497  %ret6 = add i32 %ret5, %res6
498  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
499  %ret7 = add i32 %ret6, %res7
500  ret i32 %ret7
501}
502
503declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
504
505define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
506; AVX512BW-LABEL: test_ucmp_w_512:
507; AVX512BW:       ## BB#0:
508; AVX512BW-NEXT:    vpcmpequw %zmm1, %zmm0, %k0
509; AVX512BW-NEXT:    kmovd %k0, %eax
510; AVX512BW-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0
511; AVX512BW-NEXT:    kmovd %k0, %ecx
512; AVX512BW-NEXT:    addl %eax, %ecx
513; AVX512BW-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0
514; AVX512BW-NEXT:    kmovd %k0, %eax
515; AVX512BW-NEXT:    addl %ecx, %eax
516; AVX512BW-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0
517; AVX512BW-NEXT:    kmovd %k0, %ecx
518; AVX512BW-NEXT:    addl %eax, %ecx
519; AVX512BW-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0
520; AVX512BW-NEXT:    kmovd %k0, %eax
521; AVX512BW-NEXT:    addl %ecx, %eax
522; AVX512BW-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0
523; AVX512BW-NEXT:    kmovd %k0, %ecx
524; AVX512BW-NEXT:    addl %eax, %ecx
525; AVX512BW-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0
526; AVX512BW-NEXT:    kmovd %k0, %edx
527; AVX512BW-NEXT:    addl %ecx, %edx
528; AVX512BW-NEXT:    vpcmporduw %zmm1, %zmm0, %k0
529; AVX512BW-NEXT:    kmovd %k0, %eax
530; AVX512BW-NEXT:    addl %edx, %eax
531; AVX512BW-NEXT:    retq
532;
533; AVX512F-32-LABEL: test_ucmp_w_512:
534; AVX512F-32:       # BB#0:
535; AVX512F-32-NEXT:    vpcmpequw %zmm1, %zmm0, %k0
536; AVX512F-32-NEXT:    kmovd %k0, %eax
537; AVX512F-32-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0
538; AVX512F-32-NEXT:    kmovd %k0, %ecx
539; AVX512F-32-NEXT:    addl %eax, %ecx
540; AVX512F-32-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0
541; AVX512F-32-NEXT:    kmovd %k0, %eax
542; AVX512F-32-NEXT:    addl %ecx, %eax
543; AVX512F-32-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0
544; AVX512F-32-NEXT:    kmovd %k0, %ecx
545; AVX512F-32-NEXT:    addl %eax, %ecx
546; AVX512F-32-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0
547; AVX512F-32-NEXT:    kmovd %k0, %eax
548; AVX512F-32-NEXT:    addl %ecx, %eax
549; AVX512F-32-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0
550; AVX512F-32-NEXT:    kmovd %k0, %ecx
551; AVX512F-32-NEXT:    addl %eax, %ecx
552; AVX512F-32-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0
553; AVX512F-32-NEXT:    kmovd %k0, %edx
554; AVX512F-32-NEXT:    addl %ecx, %edx
555; AVX512F-32-NEXT:    vpcmporduw %zmm1, %zmm0, %k0
556; AVX512F-32-NEXT:    kmovd %k0, %eax
557; AVX512F-32-NEXT:    addl %edx, %eax
558; AVX512F-32-NEXT:    retl
559  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
560  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
561  %ret1 = add i32 %res0, %res1
562  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
563  %ret2 = add i32 %ret1, %res2
564  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
565  %ret3 = add i32 %ret2, %res3
566  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
567  %ret4 = add i32 %ret3, %res4
568  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
569  %ret5 = add i32 %ret4, %res5
570  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
571  %ret6 = add i32 %ret5, %res6
572  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
573  %ret7 = add i32 %ret6, %res7
574  ret i32 %ret7
575}
576
577define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
578; AVX512BW-LABEL: test_mask_ucmp_w_512:
579; AVX512BW:       ## BB#0:
580; AVX512BW-NEXT:    kmovd %edi, %k1
581; AVX512BW-NEXT:    vpcmpequw %zmm1, %zmm0, %k0 {%k1}
582; AVX512BW-NEXT:    kmovd %k0, %eax
583; AVX512BW-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
584; AVX512BW-NEXT:    kmovd %k0, %ecx
585; AVX512BW-NEXT:    addl %eax, %ecx
586; AVX512BW-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
587; AVX512BW-NEXT:    kmovd %k0, %eax
588; AVX512BW-NEXT:    addl %ecx, %eax
589; AVX512BW-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
590; AVX512BW-NEXT:    kmovd %k0, %ecx
591; AVX512BW-NEXT:    addl %eax, %ecx
592; AVX512BW-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
593; AVX512BW-NEXT:    kmovd %k0, %eax
594; AVX512BW-NEXT:    addl %ecx, %eax
595; AVX512BW-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
596; AVX512BW-NEXT:    kmovd %k0, %ecx
597; AVX512BW-NEXT:    addl %eax, %ecx
598; AVX512BW-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
599; AVX512BW-NEXT:    kmovd %k0, %edx
600; AVX512BW-NEXT:    addl %ecx, %edx
601; AVX512BW-NEXT:    vpcmporduw %zmm1, %zmm0, %k0 {%k1}
602; AVX512BW-NEXT:    kmovd %k0, %eax
603; AVX512BW-NEXT:    addl %edx, %eax
604; AVX512BW-NEXT:    retq
605;
606; AVX512F-32-LABEL: test_mask_ucmp_w_512:
607; AVX512F-32:       # BB#0:
608; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
609; AVX512F-32-NEXT:    vpcmpequw %zmm1, %zmm0, %k0 {%k1}
610; AVX512F-32-NEXT:    kmovd %k0, %eax
611; AVX512F-32-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
612; AVX512F-32-NEXT:    kmovd %k0, %ecx
613; AVX512F-32-NEXT:    addl %eax, %ecx
614; AVX512F-32-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
615; AVX512F-32-NEXT:    kmovd %k0, %eax
616; AVX512F-32-NEXT:    addl %ecx, %eax
617; AVX512F-32-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
618; AVX512F-32-NEXT:    kmovd %k0, %ecx
619; AVX512F-32-NEXT:    addl %eax, %ecx
620; AVX512F-32-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
621; AVX512F-32-NEXT:    kmovd %k0, %eax
622; AVX512F-32-NEXT:    addl %ecx, %eax
623; AVX512F-32-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
624; AVX512F-32-NEXT:    kmovd %k0, %ecx
625; AVX512F-32-NEXT:    addl %eax, %ecx
626; AVX512F-32-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
627; AVX512F-32-NEXT:    kmovd %k0, %edx
628; AVX512F-32-NEXT:    addl %ecx, %edx
629; AVX512F-32-NEXT:    vpcmporduw %zmm1, %zmm0, %k0 {%k1}
630; AVX512F-32-NEXT:    kmovd %k0, %eax
631; AVX512F-32-NEXT:    addl %edx, %eax
632; AVX512F-32-NEXT:    retl
633  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
634  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
635  %ret1 = add i32 %res0, %res1
636  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
637  %ret2 = add i32 %ret1, %res2
638  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
639  %ret3 = add i32 %ret2, %res3
640  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
641  %ret4 = add i32 %ret3, %res4
642  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
643  %ret5 = add i32 %ret4, %res5
644  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
645  %ret6 = add i32 %ret5, %res6
646  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
647  %ret7 = add i32 %ret6, %res7
648  ret i32 %ret7
649}
650
651declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
652
653define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
654; AVX512BW-LABEL: test_mask_packs_epi32_rr_512:
655; AVX512BW:       ## BB#0:
656; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
657; AVX512BW-NEXT:    retq
658;
659; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512:
660; AVX512F-32:       # BB#0:
661; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
662; AVX512F-32-NEXT:    retl
663  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
664  ret <32 x i16> %res
665}
666
667define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
668; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512:
669; AVX512BW:       ## BB#0:
670; AVX512BW-NEXT:    kmovd %edi, %k1
671; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
672; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
673; AVX512BW-NEXT:    retq
674;
675; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512:
676; AVX512F-32:       # BB#0:
677; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
678; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
679; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
680; AVX512F-32-NEXT:    retl
681  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
682  ret <32 x i16> %res
683}
684
685define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
686; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512:
687; AVX512BW:       ## BB#0:
688; AVX512BW-NEXT:    kmovd %edi, %k1
689; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
690; AVX512BW-NEXT:    retq
691;
692; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512:
693; AVX512F-32:       # BB#0:
694; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
695; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
696; AVX512F-32-NEXT:    retl
697  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
698  ret <32 x i16> %res
699}
700
701define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
702; AVX512BW-LABEL: test_mask_packs_epi32_rm_512:
703; AVX512BW:       ## BB#0:
704; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0
705; AVX512BW-NEXT:    retq
706;
707; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512:
708; AVX512F-32:       # BB#0:
709; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
710; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm0
711; AVX512F-32-NEXT:    retl
712  %b = load <16 x i32>, <16 x i32>* %ptr_b
713  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
714  ret <32 x i16> %res
715}
716
717define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
718; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512:
719; AVX512BW:       ## BB#0:
720; AVX512BW-NEXT:    kmovd %esi, %k1
721; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
722; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
723; AVX512BW-NEXT:    retq
724;
725; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512:
726; AVX512F-32:       # BB#0:
727; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
728; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
729; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1}
730; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
731; AVX512F-32-NEXT:    retl
732  %b = load <16 x i32>, <16 x i32>* %ptr_b
733  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
734  ret <32 x i16> %res
735}
736
737define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
738; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512:
739; AVX512BW:       ## BB#0:
740; AVX512BW-NEXT:    kmovd %esi, %k1
741; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z}
742; AVX512BW-NEXT:    retq
743;
744; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512:
745; AVX512F-32:       # BB#0:
746; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
747; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
748; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
749; AVX512F-32-NEXT:    retl
750  %b = load <16 x i32>, <16 x i32>* %ptr_b
751  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
752  ret <32 x i16> %res
753}
754
755define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
756; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512:
757; AVX512BW:       ## BB#0:
758; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0
759; AVX512BW-NEXT:    retq
760;
761; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512:
762; AVX512F-32:       # BB#0:
763; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
764; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0
765; AVX512F-32-NEXT:    retl
766  %q = load i32, i32* %ptr_b
767  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
768  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
769  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
770  ret <32 x i16> %res
771}
772
773define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
774; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512:
775; AVX512BW:       ## BB#0:
776; AVX512BW-NEXT:    kmovd %esi, %k1
777; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
778; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
779; AVX512BW-NEXT:    retq
780;
781; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512:
782; AVX512F-32:       # BB#0:
783; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
784; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
785; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
786; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
787; AVX512F-32-NEXT:    retl
788  %q = load i32, i32* %ptr_b
789  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
790  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
791  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
792  ret <32 x i16> %res
793}
794
795define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
796; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512:
797; AVX512BW:       ## BB#0:
798; AVX512BW-NEXT:    kmovd %esi, %k1
799; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
800; AVX512BW-NEXT:    retq
801;
802; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512:
803; AVX512F-32:       # BB#0:
804; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
805; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
806; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
807; AVX512F-32-NEXT:    retl
808  %q = load i32, i32* %ptr_b
809  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
810  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
811  %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
812  ret <32 x i16> %res
813}
814
815declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
816
817define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
818; AVX512BW-LABEL: test_mask_packs_epi16_rr_512:
819; AVX512BW:       ## BB#0:
820; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0
821; AVX512BW-NEXT:    retq
822;
823; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512:
824; AVX512F-32:       # BB#0:
825; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0
826; AVX512F-32-NEXT:    retl
827  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
828  ret <64 x i8> %res
829}
830
831define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
832; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512:
833; AVX512BW:       ## BB#0:
834; AVX512BW-NEXT:    kmovq %rdi, %k1
835; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
836; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
837; AVX512BW-NEXT:    retq
838;
839; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
840; AVX512F-32:       # BB#0:
841; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
842; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
843; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
844; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
845; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
846; AVX512F-32-NEXT:    retl
847  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
848  ret <64 x i8> %res
849}
850
851define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
852; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512:
853; AVX512BW:       ## BB#0:
854; AVX512BW-NEXT:    kmovq %rdi, %k1
855; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
856; AVX512BW-NEXT:    retq
857;
858; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512:
859; AVX512F-32:       # BB#0:
860; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
861; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
862; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
863; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
864; AVX512F-32-NEXT:    retl
865  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
866  ret <64 x i8> %res
867}
868
869define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
870; AVX512BW-LABEL: test_mask_packs_epi16_rm_512:
871; AVX512BW:       ## BB#0:
872; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0
873; AVX512BW-NEXT:    retq
874;
875; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512:
876; AVX512F-32:       # BB#0:
877; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
878; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm0
879; AVX512F-32-NEXT:    retl
880  %b = load <32 x i16>, <32 x i16>* %ptr_b
881  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
882  ret <64 x i8> %res
883}
884
885define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
886; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512:
887; AVX512BW:       ## BB#0:
888; AVX512BW-NEXT:    kmovq %rsi, %k1
889; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
890; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
891; AVX512BW-NEXT:    retq
892;
893; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
894; AVX512F-32:       # BB#0:
895; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
896; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
897; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
898; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
899; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1}
900; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
901; AVX512F-32-NEXT:    retl
902  %b = load <32 x i16>, <32 x i16>* %ptr_b
903  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
904  ret <64 x i8> %res
905}
906
907define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
908; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512:
909; AVX512BW:       ## BB#0:
910; AVX512BW-NEXT:    kmovq %rsi, %k1
911; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z}
912; AVX512BW-NEXT:    retq
913;
914; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512:
915; AVX512F-32:       # BB#0:
916; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
917; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
918; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
919; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
920; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
921; AVX512F-32-NEXT:    retl
922  %b = load <32 x i16>, <32 x i16>* %ptr_b
923  %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
924  ret <64 x i8> %res
925}
926
927declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
928
929
930define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
931; AVX512BW-LABEL: test_mask_packus_epi32_rr_512:
932; AVX512BW:       ## BB#0:
933; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
934; AVX512BW-NEXT:    retq
935;
936; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512:
937; AVX512F-32:       # BB#0:
938; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
939; AVX512F-32-NEXT:    retl
940  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
941  ret <32 x i16> %res
942}
943
944define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
945; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512:
946; AVX512BW:       ## BB#0:
947; AVX512BW-NEXT:    kmovd %edi, %k1
948; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
949; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
950; AVX512BW-NEXT:    retq
951;
952; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512:
953; AVX512F-32:       # BB#0:
954; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
955; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
956; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
957; AVX512F-32-NEXT:    retl
958  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
959  ret <32 x i16> %res
960}
961
962define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
963; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512:
964; AVX512BW:       ## BB#0:
965; AVX512BW-NEXT:    kmovd %edi, %k1
966; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
967; AVX512BW-NEXT:    retq
968;
969; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512:
970; AVX512F-32:       # BB#0:
971; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
972; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
973; AVX512F-32-NEXT:    retl
974  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
975  ret <32 x i16> %res
976}
977
978define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
979; AVX512BW-LABEL: test_mask_packus_epi32_rm_512:
980; AVX512BW:       ## BB#0:
981; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0
982; AVX512BW-NEXT:    retq
983;
984; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512:
985; AVX512F-32:       # BB#0:
986; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
987; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm0
988; AVX512F-32-NEXT:    retl
989  %b = load <16 x i32>, <16 x i32>* %ptr_b
990  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
991  ret <32 x i16> %res
992}
993
994define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
995; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512:
996; AVX512BW:       ## BB#0:
997; AVX512BW-NEXT:    kmovd %esi, %k1
998; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
999; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1000; AVX512BW-NEXT:    retq
1001;
1002; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512:
1003; AVX512F-32:       # BB#0:
1004; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1005; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1006; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1}
1007; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1008; AVX512F-32-NEXT:    retl
1009  %b = load <16 x i32>, <16 x i32>* %ptr_b
1010  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1011  ret <32 x i16> %res
1012}
1013
1014define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
1015; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512:
1016; AVX512BW:       ## BB#0:
1017; AVX512BW-NEXT:    kmovd %esi, %k1
1018; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
1019; AVX512BW-NEXT:    retq
1020;
1021; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512:
1022; AVX512F-32:       # BB#0:
1023; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1024; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1025; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
1026; AVX512F-32-NEXT:    retl
1027  %b = load <16 x i32>, <16 x i32>* %ptr_b
1028  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1029  ret <32 x i16> %res
1030}
1031
1032define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
1033; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512:
1034; AVX512BW:       ## BB#0:
1035; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0
1036; AVX512BW-NEXT:    retq
1037;
1038; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512:
1039; AVX512F-32:       # BB#0:
1040; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1041; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0
1042; AVX512F-32-NEXT:    retl
1043  %q = load i32, i32* %ptr_b
1044  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1045  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1046  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1047  ret <32 x i16> %res
1048}
1049
1050define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1051; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512:
1052; AVX512BW:       ## BB#0:
1053; AVX512BW-NEXT:    kmovd %esi, %k1
1054; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
1055; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1056; AVX512BW-NEXT:    retq
1057;
1058; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512:
1059; AVX512F-32:       # BB#0:
1060; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1061; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1062; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
1063; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1064; AVX512F-32-NEXT:    retl
1065  %q = load i32, i32* %ptr_b
1066  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1067  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1068  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1069  ret <32 x i16> %res
1070}
1071
1072define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1073; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512:
1074; AVX512BW:       ## BB#0:
1075; AVX512BW-NEXT:    kmovd %esi, %k1
1076; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
1077; AVX512BW-NEXT:    retq
1078;
1079; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512:
1080; AVX512F-32:       # BB#0:
1081; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1082; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1083; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
1084; AVX512F-32-NEXT:    retl
1085  %q = load i32, i32* %ptr_b
1086  %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1087  %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1088  %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1089  ret <32 x i16> %res
1090}
1091
1092declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1093
1094define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1095; AVX512BW-LABEL: test_mask_packus_epi16_rr_512:
1096; AVX512BW:       ## BB#0:
1097; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
1098; AVX512BW-NEXT:    retq
1099;
1100; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512:
1101; AVX512F-32:       # BB#0:
1102; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
1103; AVX512F-32-NEXT:    retl
1104  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1105  ret <64 x i8> %res
1106}
1107
1108define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1109; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512:
1110; AVX512BW:       ## BB#0:
1111; AVX512BW-NEXT:    kmovq %rdi, %k1
1112; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1113; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1114; AVX512BW-NEXT:    retq
1115;
1116; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
1117; AVX512F-32:       # BB#0:
1118; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1119; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1120; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1121; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1122; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1123; AVX512F-32-NEXT:    retl
1124  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1125  ret <64 x i8> %res
1126}
1127
1128define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1129; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512:
1130; AVX512BW:       ## BB#0:
1131; AVX512BW-NEXT:    kmovq %rdi, %k1
1132; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1133; AVX512BW-NEXT:    retq
1134;
1135; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512:
1136; AVX512F-32:       # BB#0:
1137; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1138; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1139; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1140; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1141; AVX512F-32-NEXT:    retl
1142  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1143  ret <64 x i8> %res
1144}
1145
1146define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1147; AVX512BW-LABEL: test_mask_packus_epi16_rm_512:
1148; AVX512BW:       ## BB#0:
1149; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0
1150; AVX512BW-NEXT:    retq
1151;
1152; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512:
1153; AVX512F-32:       # BB#0:
1154; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1155; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm0
1156; AVX512F-32-NEXT:    retl
1157  %b = load <32 x i16>, <32 x i16>* %ptr_b
1158  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1159  ret <64 x i8> %res
1160}
1161
1162define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1163; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512:
1164; AVX512BW:       ## BB#0:
1165; AVX512BW-NEXT:    kmovq %rsi, %k1
1166; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
1167; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1168; AVX512BW-NEXT:    retq
1169;
1170; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
1171; AVX512F-32:       # BB#0:
1172; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1173; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1174; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1175; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1176; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1}
1177; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1178; AVX512F-32-NEXT:    retl
1179  %b = load <32 x i16>, <32 x i16>* %ptr_b
1180  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1181  ret <64 x i8> %res
1182}
1183
1184define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1185; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512:
1186; AVX512BW:       ## BB#0:
1187; AVX512BW-NEXT:    kmovq %rsi, %k1
1188; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
1189; AVX512BW-NEXT:    retq
1190;
1191; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512:
1192; AVX512F-32:       # BB#0:
1193; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1194; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1195; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1196; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1197; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
1198; AVX512F-32-NEXT:    retl
1199  %b = load <32 x i16>, <32 x i16>* %ptr_b
1200  %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1201  ret <64 x i8> %res
1202}
1203
1204declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1205
1206define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1207; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
1208; AVX512BW:       ## BB#0:
1209; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
1210; AVX512BW-NEXT:    retq
1211;
1212; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
1213; AVX512F-32:       # BB#0:
1214; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
1215; AVX512F-32-NEXT:    retl
1216  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1217  ret <32 x i16> %res
1218}
1219
1220define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1221; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
1222; AVX512BW:       ## BB#0:
1223; AVX512BW-NEXT:    kmovd %edi, %k1
1224; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1225; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1226; AVX512BW-NEXT:    retq
1227;
1228; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
1229; AVX512F-32:       # BB#0:
1230; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1231; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1232; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1233; AVX512F-32-NEXT:    retl
1234  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1235  ret <32 x i16> %res
1236}
1237
1238define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1239; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
1240; AVX512BW:       ## BB#0:
1241; AVX512BW-NEXT:    kmovd %edi, %k1
1242; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1243; AVX512BW-NEXT:    retq
1244;
1245; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
1246; AVX512F-32:       # BB#0:
1247; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1248; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1249; AVX512F-32-NEXT:    retl
1250  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1251  ret <32 x i16> %res
1252}
1253
1254define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1255; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
1256; AVX512BW:       ## BB#0:
1257; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0
1258; AVX512BW-NEXT:    retq
1259;
1260; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
1261; AVX512F-32:       # BB#0:
1262; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1263; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm0
1264; AVX512F-32-NEXT:    retl
1265  %b = load <32 x i16>, <32 x i16>* %ptr_b
1266  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1267  ret <32 x i16> %res
1268}
1269
1270define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1271; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
1272; AVX512BW:       ## BB#0:
1273; AVX512BW-NEXT:    kmovd %esi, %k1
1274; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
1275; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1276; AVX512BW-NEXT:    retq
1277;
1278; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
1279; AVX512F-32:       # BB#0:
1280; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1281; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1282; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm1 {%k1}
1283; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1284; AVX512F-32-NEXT:    retl
1285  %b = load <32 x i16>, <32 x i16>* %ptr_b
1286  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1287  ret <32 x i16> %res
1288}
1289
1290define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1291; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
1292; AVX512BW:       ## BB#0:
1293; AVX512BW-NEXT:    kmovd %esi, %k1
1294; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
1295; AVX512BW-NEXT:    retq
1296;
1297; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
1298; AVX512F-32:       # BB#0:
1299; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1300; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1301; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
1302; AVX512F-32-NEXT:    retl
1303  %b = load <32 x i16>, <32 x i16>* %ptr_b
1304  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1305  ret <32 x i16> %res
1306}
1307
1308declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1309
1310define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1311; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
1312; AVX512BW:       ## BB#0:
1313; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
1314; AVX512BW-NEXT:    retq
1315;
1316; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
1317; AVX512F-32:       # BB#0:
1318; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
1319; AVX512F-32-NEXT:    retl
1320  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1321  ret <32 x i16> %res
1322}
1323
1324define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1325; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
1326; AVX512BW:       ## BB#0:
1327; AVX512BW-NEXT:    kmovd %edi, %k1
1328; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1329; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1330; AVX512BW-NEXT:    retq
1331;
1332; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
1333; AVX512F-32:       # BB#0:
1334; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1335; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1336; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1337; AVX512F-32-NEXT:    retl
1338  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1339  ret <32 x i16> %res
1340}
1341
1342define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1343; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
1344; AVX512BW:       ## BB#0:
1345; AVX512BW-NEXT:    kmovd %edi, %k1
1346; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1347; AVX512BW-NEXT:    retq
1348;
1349; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
1350; AVX512F-32:       # BB#0:
1351; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1352; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1353; AVX512F-32-NEXT:    retl
1354  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1355  ret <32 x i16> %res
1356}
1357
1358define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1359; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
1360; AVX512BW:       ## BB#0:
1361; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0
1362; AVX512BW-NEXT:    retq
1363;
1364; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
1365; AVX512F-32:       # BB#0:
1366; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1367; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm0
1368; AVX512F-32-NEXT:    retl
1369  %b = load <32 x i16>, <32 x i16>* %ptr_b
1370  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1371  ret <32 x i16> %res
1372}
1373
1374define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1375; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
1376; AVX512BW:       ## BB#0:
1377; AVX512BW-NEXT:    kmovd %esi, %k1
1378; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
1379; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1380; AVX512BW-NEXT:    retq
1381;
1382; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
1383; AVX512F-32:       # BB#0:
1384; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1385; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1386; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm1 {%k1}
1387; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1388; AVX512F-32-NEXT:    retl
1389  %b = load <32 x i16>, <32 x i16>* %ptr_b
1390  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1391  ret <32 x i16> %res
1392}
1393
1394define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1395; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
1396; AVX512BW:       ## BB#0:
1397; AVX512BW-NEXT:    kmovd %esi, %k1
1398; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
1399; AVX512BW-NEXT:    retq
1400;
1401; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
1402; AVX512F-32:       # BB#0:
1403; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1404; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1405; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
1406; AVX512F-32-NEXT:    retl
1407  %b = load <32 x i16>, <32 x i16>* %ptr_b
1408  %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1409  ret <32 x i16> %res
1410}
1411
1412declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1413
1414define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1415; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
1416; AVX512BW:       ## BB#0:
1417; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
1418; AVX512BW-NEXT:    retq
1419;
1420; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
1421; AVX512F-32:       # BB#0:
1422; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
1423; AVX512F-32-NEXT:    retl
1424  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1425  ret <32 x i16> %res
1426}
1427
1428define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1429; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
1430; AVX512BW:       ## BB#0:
1431; AVX512BW-NEXT:    kmovd %edi, %k1
1432; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1433; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1434; AVX512BW-NEXT:    retq
1435;
1436; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
1437; AVX512F-32:       # BB#0:
1438; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1439; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1440; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1441; AVX512F-32-NEXT:    retl
1442  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1443  ret <32 x i16> %res
1444}
1445
1446define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1447; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
1448; AVX512BW:       ## BB#0:
1449; AVX512BW-NEXT:    kmovd %edi, %k1
1450; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1451; AVX512BW-NEXT:    retq
1452;
1453; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
1454; AVX512F-32:       # BB#0:
1455; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1456; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1457; AVX512F-32-NEXT:    retl
1458  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1459  ret <32 x i16> %res
1460}
1461
1462define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1463; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
1464; AVX512BW:       ## BB#0:
1465; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0
1466; AVX512BW-NEXT:    retq
1467;
1468; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
1469; AVX512F-32:       # BB#0:
1470; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1471; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm0
1472; AVX512F-32-NEXT:    retl
1473  %b = load <32 x i16>, <32 x i16>* %ptr_b
1474  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1475  ret <32 x i16> %res
1476}
1477
1478define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1479; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
1480; AVX512BW:       ## BB#0:
1481; AVX512BW-NEXT:    kmovd %esi, %k1
1482; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
1483; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1484; AVX512BW-NEXT:    retq
1485;
1486; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
1487; AVX512F-32:       # BB#0:
1488; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1489; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1490; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm1 {%k1}
1491; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1492; AVX512F-32-NEXT:    retl
1493  %b = load <32 x i16>, <32 x i16>* %ptr_b
1494  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1495  ret <32 x i16> %res
1496}
1497
1498define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1499; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
1500; AVX512BW:       ## BB#0:
1501; AVX512BW-NEXT:    kmovd %esi, %k1
1502; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1503; AVX512BW-NEXT:    retq
1504;
1505; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
1506; AVX512F-32:       # BB#0:
1507; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1508; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1509; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
1510; AVX512F-32-NEXT:    retl
1511  %b = load <32 x i16>, <32 x i16>* %ptr_b
1512  %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1513  ret <32 x i16> %res
1514}
1515
1516declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1517
1518define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1519; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
1520; AVX512BW:       ## BB#0:
1521; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
1522; AVX512BW-NEXT:    retq
1523;
1524; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
1525; AVX512F-32:       # BB#0:
1526; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
1527; AVX512F-32-NEXT:    retl
1528  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1529  ret <32 x i16> %res
1530}
1531
1532define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1533; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
1534; AVX512BW:       ## BB#0:
1535; AVX512BW-NEXT:    kmovd %edi, %k1
1536; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1537; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
1538; AVX512BW-NEXT:    retq
1539;
1540; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
1541; AVX512F-32:       # BB#0:
1542; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1543; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1544; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
1545; AVX512F-32-NEXT:    retl
1546  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1547  ret <32 x i16> %res
1548}
1549
1550define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1551; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
1552; AVX512BW:       ## BB#0:
1553; AVX512BW-NEXT:    kmovd %edi, %k1
1554; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1555; AVX512BW-NEXT:    retq
1556;
1557; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
1558; AVX512F-32:       # BB#0:
1559; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1560; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1561; AVX512F-32-NEXT:    retl
1562  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1563  ret <32 x i16> %res
1564}
1565
1566define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1567; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
1568; AVX512BW:       ## BB#0:
1569; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0
1570; AVX512BW-NEXT:    retq
1571;
1572; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
1573; AVX512F-32:       # BB#0:
1574; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1575; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm0
1576; AVX512F-32-NEXT:    retl
1577  %b = load <32 x i16>, <32 x i16>* %ptr_b
1578  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1579  ret <32 x i16> %res
1580}
1581
1582define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1583; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
1584; AVX512BW:       ## BB#0:
1585; AVX512BW-NEXT:    kmovd %esi, %k1
1586; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
1587; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
1588; AVX512BW-NEXT:    retq
1589;
1590; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
1591; AVX512F-32:       # BB#0:
1592; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1593; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1594; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm1 {%k1}
1595; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
1596; AVX512F-32-NEXT:    retl
1597  %b = load <32 x i16>, <32 x i16>* %ptr_b
1598  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1599  ret <32 x i16> %res
1600}
1601
1602define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1603; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
1604; AVX512BW:       ## BB#0:
1605; AVX512BW-NEXT:    kmovd %esi, %k1
1606; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1607; AVX512BW-NEXT:    retq
1608;
1609; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
1610; AVX512F-32:       # BB#0:
1611; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1612; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1613; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
1614; AVX512F-32-NEXT:    retl
1615  %b = load <32 x i16>, <32 x i16>* %ptr_b
1616  %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1617  ret <32 x i16> %res
1618}
1619
1620declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1621
1622declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1623
1624define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1625; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
1626; AVX512BW:       ## BB#0:
1627; AVX512BW-NEXT:    kmovq %rdi, %k1
1628; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1629; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
1630; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1631; AVX512BW-NEXT:    retq
1632;
1633; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
1634; AVX512F-32:       # BB#0:
1635; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1636; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1637; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1638; AVX512F-32-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1639; AVX512F-32-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
1640; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1641; AVX512F-32-NEXT:    retl
1642  %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1643  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1644  %res2 = add <64 x i8> %res, %res1
1645  ret <64 x i8> %res2
1646}
1647
1648declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1649
1650define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1651; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
1652; AVX512BW:       ## BB#0:
1653; AVX512BW-NEXT:    kmovd %edi, %k1
1654; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1655; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
1656; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1657; AVX512BW-NEXT:    retq
1658;
1659; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
1660; AVX512F-32:       # BB#0:
1661; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1662; AVX512F-32-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1663; AVX512F-32-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
1664; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1665; AVX512F-32-NEXT:    retl
1666  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1667  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1668  %res2 = add <32 x i16> %res, %res1
1669  ret <32 x i16> %res2
1670}
1671
1672declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1673
1674define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1675; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
1676; AVX512BW:       ## BB#0:
1677; AVX512BW-NEXT:    kmovq %rdi, %k1
1678; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1679; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
1680; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1681; AVX512BW-NEXT:    retq
1682;
1683; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
1684; AVX512F-32:       # BB#0:
1685; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1686; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1687; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1688; AVX512F-32-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1689; AVX512F-32-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
1690; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1691; AVX512F-32-NEXT:    retl
1692  %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1693  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1694  %res2 = add <64 x i8> %res, %res1
1695  ret <64 x i8> %res2
1696}
1697
1698declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1699
1700define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1701; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
1702; AVX512BW:       ## BB#0:
1703; AVX512BW-NEXT:    kmovd %edi, %k1
1704; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1705; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
1706; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1707; AVX512BW-NEXT:    retq
1708;
1709; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
1710; AVX512F-32:       # BB#0:
1711; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1712; AVX512F-32-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1713; AVX512F-32-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
1714; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1715; AVX512F-32-NEXT:    retl
1716  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1717  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1718  %res2 = add <32 x i16> %res, %res1
1719  ret <32 x i16> %res2
1720}
1721
1722declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1723
1724define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1725; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512:
1726; AVX512BW:       ## BB#0:
1727; AVX512BW-NEXT:    kmovq %rdi, %k1
1728; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1729; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
1730; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1731; AVX512BW-NEXT:    retq
1732;
1733; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512:
1734; AVX512F-32:       # BB#0:
1735; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1736; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1737; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1738; AVX512F-32-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1739; AVX512F-32-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
1740; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1741; AVX512F-32-NEXT:    retl
1742  %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1743  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1744  %res2 = add <64 x i8> %res, %res1
1745  ret <64 x i8> %res2
1746}
1747
1748declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1749
1750define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1751; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512:
1752; AVX512BW:       ## BB#0:
1753; AVX512BW-NEXT:    kmovd %edi, %k1
1754; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1755; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
1756; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1757; AVX512BW-NEXT:    retq
1758;
1759; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512:
1760; AVX512F-32:       # BB#0:
1761; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1762; AVX512F-32-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1763; AVX512F-32-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
1764; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1765; AVX512F-32-NEXT:    retl
1766  %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1767  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1768  %res2 = add <32 x i16> %res, %res1
1769  ret <32 x i16> %res2
1770}
1771
1772declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1773
1774define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1775; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512:
1776; AVX512BW:       ## BB#0:
1777; AVX512BW-NEXT:    kmovq %rdi, %k1
1778; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1}
1779; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm0
1780; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1781; AVX512BW-NEXT:    retq
1782;
1783; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512:
1784; AVX512F-32:       # BB#0:
1785; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1786; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1787; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1788; AVX512F-32-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1}
1789; AVX512F-32-NEXT:    vpminub %zmm1, %zmm0, %zmm0
1790; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1791; AVX512F-32-NEXT:    retl
1792  %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1793  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1794  %res2 = add <64 x i8> %res, %res1
1795  ret <64 x i8> %res2
1796}
1797
1798declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1799
1800define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1801; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512:
1802; AVX512BW:       ## BB#0:
1803; AVX512BW-NEXT:    kmovd %edi, %k1
1804; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1}
1805; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
1806; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1807; AVX512BW-NEXT:    retq
1808;
1809; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512:
1810; AVX512F-32:       # BB#0:
1811; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1812; AVX512F-32-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1}
1813; AVX512F-32-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
1814; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1815; AVX512F-32-NEXT:    retl
1816  %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1817  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1818  %res2 = add <32 x i16> %res, %res1
1819  ret <32 x i16> %res2
1820}
1821
1822declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1823
1824define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1825; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
1826; AVX512BW:       ## BB#0:
1827; AVX512BW-NEXT:    kmovd %edi, %k1
1828; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
1829; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
1830; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
1831; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
1832; AVX512BW-NEXT:    retq
1833;
1834; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
1835; AVX512F-32:       # BB#0:
1836; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1837; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
1838; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
1839; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
1840; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
1841; AVX512F-32-NEXT:    retl
1842  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1843  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1844  %res2 = add <32 x i16> %res, %res1
1845  ret <32 x i16> %res2
1846}
1847
1848declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1849
1850define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1851; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
1852; AVX512BW:       ## BB#0:
1853; AVX512BW-NEXT:    kmovd %edi, %k1
1854; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
1855; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
1856; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
1857; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
1858; AVX512BW-NEXT:    retq
1859;
1860; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
1861; AVX512F-32:       # BB#0:
1862; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1863; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
1864; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
1865; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
1866; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
1867; AVX512F-32-NEXT:    retl
1868  %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1869  %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1870  %res2 = add <32 x i16> %res, %res1
1871  ret <32 x i16> %res2
1872}
1873
1874declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1875
1876define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1877; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
1878; AVX512BW:       ## BB#0:
1879; AVX512BW-NEXT:    kmovd %edi, %k1
1880; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
1881; AVX512BW-NEXT:    vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1882; AVX512BW-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
1883; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
1884; AVX512BW-NEXT:    retq
1885;
1886; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
1887; AVX512F-32:       # BB#0:
1888; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1889; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
1890; AVX512F-32-NEXT:    vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1891; AVX512F-32-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
1892; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
1893; AVX512F-32-NEXT:    retl
1894  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1895  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1896  %res2 = add <32 x i16> %res, %res1
1897  ret <32 x i16> %res2
1898}
1899
1900declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1901
1902define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1903; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512:
1904; AVX512BW:       ## BB#0:
1905; AVX512BW-NEXT:    kmovq %rdi, %k1
1906; AVX512BW-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1}
1907; AVX512BW-NEXT:    vpavgb %zmm1, %zmm0, %zmm0
1908; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1909; AVX512BW-NEXT:    retq
1910;
1911; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512:
1912; AVX512F-32:       # BB#0:
1913; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1914; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1915; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1916; AVX512F-32-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1}
1917; AVX512F-32-NEXT:    vpavgb %zmm1, %zmm0, %zmm0
1918; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1919; AVX512F-32-NEXT:    retl
1920  %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1921  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1922  %res2 = add <64 x i8> %res, %res1
1923  ret <64 x i8> %res2
1924}
1925
1926declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1927
1928define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1929; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512:
1930; AVX512BW:       ## BB#0:
1931; AVX512BW-NEXT:    kmovd %edi, %k1
1932; AVX512BW-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1}
1933; AVX512BW-NEXT:    vpavgw %zmm1, %zmm0, %zmm0
1934; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1935; AVX512BW-NEXT:    retq
1936;
1937; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512:
1938; AVX512F-32:       # BB#0:
1939; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1940; AVX512F-32-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1}
1941; AVX512F-32-NEXT:    vpavgw %zmm1, %zmm0, %zmm0
1942; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
1943; AVX512F-32-NEXT:    retl
1944  %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1945  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1946  %res2 = add <32 x i16> %res, %res1
1947  ret <32 x i16> %res2
1948}
1949
1950declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1951
1952define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1953; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1954; AVX512BW:       ## BB#0:
1955; AVX512BW-NEXT:    kmovq %rdi, %k1
1956; AVX512BW-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1}
1957; AVX512BW-NEXT:    vpshufb %zmm1, %zmm0, %zmm0
1958; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1959; AVX512BW-NEXT:    retq
1960;
1961; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1962; AVX512F-32:       # BB#0:
1963; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
1964; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1965; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
1966; AVX512F-32-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1}
1967; AVX512F-32-NEXT:    vpshufb %zmm1, %zmm0, %zmm0
1968; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
1969; AVX512F-32-NEXT:    retl
1970  %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1971  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1972  %res2 = add <64 x i8> %res, %res1
1973  ret <64 x i8> %res2
1974}
1975
1976declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
1977
1978define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
1979; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512:
1980; AVX512BW:       ## BB#0:
1981; AVX512BW-NEXT:    kmovd %edi, %k1
1982; AVX512BW-NEXT:    vpabsw %zmm0, %zmm1 {%k1}
1983; AVX512BW-NEXT:    vpabsw %zmm0, %zmm0
1984; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
1985; AVX512BW-NEXT:    retq
1986;
1987; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512:
1988; AVX512F-32:       # BB#0:
1989; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
1990; AVX512F-32-NEXT:    vpabsw %zmm0, %zmm1 {%k1}
1991; AVX512F-32-NEXT:    vpabsw %zmm0, %zmm0
1992; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
1993; AVX512F-32-NEXT:    retl
1994  %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
1995  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
1996  %res2 = add <32 x i16> %res, %res1
1997  ret <32 x i16> %res2
1998}
1999
2000declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
2001
2002define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2003; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2004; AVX512BW:       ## BB#0:
2005; AVX512BW-NEXT:    kmovq %rdi, %k1
2006; AVX512BW-NEXT:    vpabsb %zmm0, %zmm1 {%k1}
2007; AVX512BW-NEXT:    vpabsb %zmm0, %zmm0
2008; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
2009; AVX512BW-NEXT:    retq
2010;
2011; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2012; AVX512F-32:       # BB#0:
2013; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2014; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2015; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2016; AVX512F-32-NEXT:    vpabsb %zmm0, %zmm1 {%k1}
2017; AVX512F-32-NEXT:    vpabsb %zmm0, %zmm0
2018; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
2019; AVX512F-32-NEXT:    retl
2020  %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2021  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
2022  %res2 = add <64 x i8> %res, %res1
2023  ret <64 x i8> %res2
2024}
2025
2026declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2027
2028define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2029; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2030; AVX512BW:       ## BB#0:
2031; AVX512BW-NEXT:    kmovd %edi, %k1
2032; AVX512BW-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2033; AVX512BW-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0
2034; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2035; AVX512BW-NEXT:    retq
2036;
2037; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2038; AVX512F-32:       # BB#0:
2039; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2040; AVX512F-32-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2041; AVX512F-32-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0
2042; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2043; AVX512F-32-NEXT:    retl
2044  %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2045  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2046  %res2 = add <32 x i16> %res, %res1
2047  ret <32 x i16> %res2
2048}
2049
2050declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2051
2052define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2053; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2054; AVX512BW:       ## BB#0:
2055; AVX512BW-NEXT:    kmovd %edi, %k1
2056; AVX512BW-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2057; AVX512BW-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0
2058; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2059; AVX512BW-NEXT:    retq
2060;
2061; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2062; AVX512F-32:       # BB#0:
2063; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2064; AVX512F-32-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2065; AVX512F-32-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0
2066; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2067; AVX512F-32-NEXT:    retl
2068  %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2069  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2070  %res2 = add <32 x i16> %res, %res1
2071  ret <32 x i16> %res2
2072}
2073
2074declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2075
2076define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2077; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2078; AVX512BW:       ## BB#0:
2079; AVX512BW-NEXT:    kmovd %edi, %k1
2080; AVX512BW-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2081; AVX512BW-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0
2082; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2083; AVX512BW-NEXT:    retq
2084;
2085; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2086; AVX512F-32:       # BB#0:
2087; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2088; AVX512F-32-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2089; AVX512F-32-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0
2090; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2091; AVX512F-32-NEXT:    retl
2092  %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2093  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2094  %res2 = add <32 x i16> %res, %res1
2095  ret <32 x i16> %res2
2096}
2097
2098declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
2099
2100define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2101; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
2102; AVX512BW:       ## BB#0:
2103; AVX512BW-NEXT:    kmovd %edi, %k1
2104; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm1 {%k1}
2105; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z}
2106; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
2107; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2108; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2109; AVX512BW-NEXT:    retq
2110;
2111; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
2112; AVX512F-32:       # BB#0:
2113; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2114; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm1 {%k1}
2115; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z}
2116; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm0
2117; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2118; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2119; AVX512F-32-NEXT:    retl
2120    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2121    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2122    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2123    %res3 = add <32 x i8> %res0, %res1
2124    %res4 = add <32 x i8> %res3, %res2
2125    ret <32 x i8> %res4
2126}
2127
2128declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2129
2130define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2131; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
2132; AVX512BW:       ## BB#0:
2133; AVX512BW-NEXT:    kmovd %esi, %k1
2134; AVX512BW-NEXT:    vpmovwb %zmm0, (%rdi)
2135; AVX512BW-NEXT:    vpmovwb %zmm0, (%rdi) {%k1}
2136; AVX512BW-NEXT:    retq
2137;
2138; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
2139; AVX512F-32:       # BB#0:
2140; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2141; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2142; AVX512F-32-NEXT:    vpmovwb %zmm0, (%eax)
2143; AVX512F-32-NEXT:    vpmovwb %zmm0, (%eax) {%k1}
2144; AVX512F-32-NEXT:    retl
2145    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2146    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2147    ret void
2148}
2149
2150declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
2151
2152define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2153; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
2154; AVX512BW:       ## BB#0:
2155; AVX512BW-NEXT:    kmovd %edi, %k1
2156; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm1 {%k1}
2157; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z}
2158; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm0
2159; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2160; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2161; AVX512BW-NEXT:    retq
2162;
2163; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
2164; AVX512F-32:       # BB#0:
2165; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2166; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm1 {%k1}
2167; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z}
2168; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm0
2169; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2170; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2171; AVX512F-32-NEXT:    retl
2172    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2173    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2174    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2175    %res3 = add <32 x i8> %res0, %res1
2176    %res4 = add <32 x i8> %res3, %res2
2177    ret <32 x i8> %res4
2178}
2179
2180declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2181
2182define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2183; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
2184; AVX512BW:       ## BB#0:
2185; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi)
2186; AVX512BW-NEXT:    kmovd %esi, %k1
2187; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi) {%k1}
2188; AVX512BW-NEXT:    retq
2189;
2190; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
2191; AVX512F-32:       # BB#0:
2192; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2193; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2194; AVX512F-32-NEXT:    vpmovswb %zmm0, (%ecx)
2195; AVX512F-32-NEXT:    kmovd %eax, %k1
2196; AVX512F-32-NEXT:    vpmovswb %zmm0, (%ecx) {%k1}
2197; AVX512F-32-NEXT:    retl
2198    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2199    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2200    ret void
2201}
2202
2203declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
2204
2205define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2206; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
2207; AVX512BW:       ## BB#0:
2208; AVX512BW-NEXT:    kmovd %edi, %k1
2209; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1}
2210; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z}
2211; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm0
2212; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2213; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2214; AVX512BW-NEXT:    retq
2215;
2216; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
2217; AVX512F-32:       # BB#0:
2218; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2219; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1}
2220; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z}
2221; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm0
2222; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
2223; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
2224; AVX512F-32-NEXT:    retl
2225    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2226    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2227    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2228    %res3 = add <32 x i8> %res0, %res1
2229    %res4 = add <32 x i8> %res3, %res2
2230    ret <32 x i8> %res4
2231}
2232
2233declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2234
2235define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2236; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
2237; AVX512BW:       ## BB#0:
2238; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi)
2239; AVX512BW-NEXT:    kmovd %esi, %k1
2240; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi) {%k1}
2241; AVX512BW-NEXT:    retq
2242;
2243; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
2244; AVX512F-32:       # BB#0:
2245; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2246; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2247; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%ecx)
2248; AVX512F-32-NEXT:    kmovd %eax, %k1
2249; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%ecx) {%k1}
2250; AVX512F-32-NEXT:    retl
2251    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2252    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2253    ret void
2254}
2255
2256declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
2257
2258define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
2259; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2260; AVX512BW:       ## BB#0:
2261; AVX512BW-NEXT:    kmovd %edi, %k1
2262; AVX512BW-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2263; AVX512BW-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0
2264; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2265; AVX512BW-NEXT:    retq
2266;
2267; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2268; AVX512F-32:       # BB#0:
2269; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2270; AVX512F-32-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2271; AVX512F-32-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0
2272; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2273; AVX512F-32-NEXT:    retl
2274  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
2275  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
2276  %res2 = add <32 x i16> %res, %res1
2277  ret <32 x i16> %res2
2278}
2279
2280declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
2281
2282define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
2283; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2284; AVX512BW:       ## BB#0:
2285; AVX512BW-NEXT:    kmovw %edi, %k1
2286; AVX512BW-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2287; AVX512BW-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0
2288; AVX512BW-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
2289; AVX512BW-NEXT:    retq
2290;
2291; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2292; AVX512F-32:       # BB#0:
2293; AVX512F-32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
2294; AVX512F-32-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2295; AVX512F-32-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0
2296; AVX512F-32-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
2297; AVX512F-32-NEXT:    retl
2298  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
2299  %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
2300  %res2 = add <16 x i32> %res, %res1
2301  ret <16 x i32> %res2
2302}
2303
2304declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
2305
2306define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
2307; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2308; AVX512BW:       ## BB#0:
2309; AVX512BW-NEXT:    kmovd %edi, %k1
2310; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2311; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2312; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2313; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2314; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2315; AVX512BW-NEXT:    retq
2316;
2317; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2318; AVX512F-32:       # BB#0:
2319; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2320; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2321; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2322; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2323; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2324; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2325; AVX512F-32-NEXT:    retl
2326  %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
2327  %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
2328  %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
2329  %res3 = add <32 x i16> %res, %res1
2330  %res4 = add <32 x i16> %res3, %res2
2331  ret <32 x i16> %res4
2332}
2333
2334declare  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
2335
2336define  <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
2337; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
2338; AVX512BW:       ## BB#0:
2339; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1
2340; AVX512BW-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0
2341; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
2342; AVX512BW-NEXT:    retq
2343;
2344; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
2345; AVX512F-32:       # BB#0:
2346; AVX512F-32-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1
2347; AVX512F-32-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0
2348; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
2349; AVX512F-32-NEXT:    retl
2350  %res = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
2351  %res1 = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
2352  %res2 = add  <8 x i64> %res, %res1
2353  ret  <8 x i64> %res2
2354}
2355
2356declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
2357
2358define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
2359; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
2360; AVX512BW:       ## BB#0:
2361; AVX512BW-NEXT:    kmovd %edi, %k0
2362; AVX512BW-NEXT:    kmovd %esi, %k1
2363; AVX512BW-NEXT:    kunpckwd %k1, %k0, %k0
2364; AVX512BW-NEXT:    kmovd %k0, %eax
2365; AVX512BW-NEXT:    retq
2366;
2367; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
2368; AVX512F-32:       # BB#0:
2369; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2370; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2371; AVX512F-32-NEXT:    kunpckwd %k1, %k0, %k0
2372; AVX512F-32-NEXT:    kmovd %k0, %eax
2373; AVX512F-32-NEXT:    retl
2374  %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
2375  ret i32 %res
2376}
2377
2378declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
2379
2380define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
2381; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
2382; AVX512BW:       ## BB#0:
2383; AVX512BW-NEXT:    kmovq %rdi, %k0
2384; AVX512BW-NEXT:    kmovq %rsi, %k1
2385; AVX512BW-NEXT:    kunpckdq %k1, %k0, %k0
2386; AVX512BW-NEXT:    kmovq %k0, %rax
2387; AVX512BW-NEXT:    retq
2388;
2389; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
2390; AVX512F-32:       # BB#0:
2391; AVX512F-32-NEXT:    subl $12, %esp
2392; AVX512F-32-NEXT:  .Ltmp4:
2393; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
2394; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2395; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2396; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k0
2397; AVX512F-32-NEXT:    kmovq %k0, (%esp)
2398; AVX512F-32-NEXT:    movl (%esp), %eax
2399; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
2400; AVX512F-32-NEXT:    addl $12, %esp
2401; AVX512F-32-NEXT:    retl
2402  %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
2403  ret i64 %res
2404}
2405
2406declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
2407
2408define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
2409; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512:
2410; AVX512BW:       ## BB#0:
2411; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
2412; AVX512BW-NEXT:    kmovq %k0, %rax
2413; AVX512BW-NEXT:    retq
2414;
2415; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
2416; AVX512F-32:       # BB#0:
2417; AVX512F-32-NEXT:    subl $12, %esp
2418; AVX512F-32-NEXT:  .Ltmp5:
2419; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
2420; AVX512F-32-NEXT:    vpmovb2m %zmm0, %k0
2421; AVX512F-32-NEXT:    kmovq %k0, (%esp)
2422; AVX512F-32-NEXT:    movl (%esp), %eax
2423; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
2424; AVX512F-32-NEXT:    addl $12, %esp
2425; AVX512F-32-NEXT:    retl
2426    %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
2427    ret i64 %res
2428}
2429
2430declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
2431
2432define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
2433; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512:
2434; AVX512BW:       ## BB#0:
2435; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
2436; AVX512BW-NEXT:    kmovd %k0, %eax
2437; AVX512BW-NEXT:    retq
2438;
2439; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
2440; AVX512F-32:       # BB#0:
2441; AVX512F-32-NEXT:    vpmovw2m %zmm0, %k0
2442; AVX512F-32-NEXT:    kmovd %k0, %eax
2443; AVX512F-32-NEXT:    retl
2444    %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
2445    ret i32 %res
2446}
2447
2448declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64)
2449
2450define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) {
2451; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2b_512:
2452; AVX512BW:       ## BB#0:
2453; AVX512BW-NEXT:    kmovq %rdi, %k0
2454; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
2455; AVX512BW-NEXT:    retq
2456;
2457; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512:
2458; AVX512F-32:       # BB#0:
2459; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2460; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2461; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k0
2462; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm0
2463; AVX512F-32-NEXT:    retl
2464  %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0)
2465  ret <64 x i8> %res
2466}
2467
2468declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
2469
2470define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) {
2471; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2w_512:
2472; AVX512BW:       ## BB#0:
2473; AVX512BW-NEXT:    kmovd %edi, %k0
2474; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
2475; AVX512BW-NEXT:    retq
2476;
2477; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512:
2478; AVX512F-32:       # BB#0:
2479; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2480; AVX512F-32-NEXT:    vpmovm2w %k0, %zmm0
2481; AVX512F-32-NEXT:    retl
2482  %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
2483  ret <32 x i16> %res
2484}
2485
2486declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
2487
2488define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2489; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_w_512:
2490; AVX512BW:       ## BB#0:
2491; AVX512BW-NEXT:    kmovd %edi, %k1
2492; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
2493; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
2494; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
2495; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2496; AVX512BW-NEXT:    vpaddw %zmm3, %zmm0, %zmm0
2497; AVX512BW-NEXT:    retq
2498;
2499; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512:
2500; AVX512F-32:       # BB#0:
2501; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2502; AVX512F-32-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
2503; AVX512F-32-NEXT:    vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
2504; AVX512F-32-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
2505; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
2506; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm0, %zmm0
2507; AVX512F-32-NEXT:    retl
2508  %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
2509  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
2510  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2511  %res3 = add <32 x i16> %res, %res1
2512  %res4 = add <32 x i16> %res3, %res2
2513  ret <32 x i16> %res4
2514}
2515
2516declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32)
2517
2518define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
2519; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
2520; AVX512BW:       ## BB#0:
2521; AVX512BW-NEXT:    kmovd %esi, %k1
2522; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm1 {%k1}
2523; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
2524; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
2525; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2526; AVX512BW-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
2527; AVX512BW-NEXT:    retq
2528;
2529; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
2530; AVX512F-32:       # BB#0:
2531; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2532; AVX512F-32-NEXT:    vpsrlw $3, %zmm0, %zmm1 {%k1}
2533; AVX512F-32-NEXT:    vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
2534; AVX512F-32-NEXT:    vpsrlw $3, %zmm0, %zmm0
2535; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2536; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
2537; AVX512F-32-NEXT:    retl
2538  %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
2539  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
2540  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
2541  %res3 = add <32 x i16> %res, %res1
2542  %res4 = add <32 x i16> %res3, %res2
2543  ret <32 x i16> %res4
2544}
2545
2546declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2547
2548define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2549; AVX512BW-LABEL: test_int_x86_avx512_mask_psrlv32hi:
2550; AVX512BW:       ## BB#0:
2551; AVX512BW-NEXT:    kmovd %edi, %k1
2552; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
2553; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2554; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
2555; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2556; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2557; AVX512BW-NEXT:    retq
2558;
2559; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
2560; AVX512F-32:       # BB#0:
2561; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2562; AVX512F-32-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
2563; AVX512F-32-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2564; AVX512F-32-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
2565; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2566; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2567; AVX512F-32-NEXT:    retl
2568  %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2569  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2570  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2571  %res3 = add <32 x i16> %res, %res1
2572  %res4 = add <32 x i16> %res3, %res2
2573  ret <32 x i16> %res4
2574}
2575
2576declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
2577
2578define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2579; AVX512BW-LABEL: test_int_x86_avx512_mask_psra_w_512:
2580; AVX512BW:       ## BB#0:
2581; AVX512BW-NEXT:    kmovd %edi, %k1
2582; AVX512BW-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1}
2583; AVX512BW-NEXT:    vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
2584; AVX512BW-NEXT:    vpsraw %xmm1, %zmm0, %zmm0
2585; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2586; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2587; AVX512BW-NEXT:    retq
2588;
2589; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512:
2590; AVX512F-32:       # BB#0:
2591; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2592; AVX512F-32-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1}
2593; AVX512F-32-NEXT:    vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
2594; AVX512F-32-NEXT:    vpsraw %xmm1, %zmm0, %zmm0
2595; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2596; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2597; AVX512F-32-NEXT:    retl
2598  %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
2599  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2600  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
2601  %res3 = add <32 x i16> %res, %res1
2602  %res4 = add <32 x i16> %res3, %res2
2603  ret <32 x i16> %res4
2604}
2605
2606declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32)
2607
2608define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
2609; AVX512BW-LABEL: test_int_x86_avx512_mask_psra_wi_512:
2610; AVX512BW:       ## BB#0:
2611; AVX512BW-NEXT:    kmovd %esi, %k1
2612; AVX512BW-NEXT:    vpsraw $3, %zmm0, %zmm1 {%k1}
2613; AVX512BW-NEXT:    vpsraw $3, %zmm0, %zmm2 {%k1} {z}
2614; AVX512BW-NEXT:    vpsraw $3, %zmm0, %zmm0
2615; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
2616; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2617; AVX512BW-NEXT:    retq
2618;
2619; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512:
2620; AVX512F-32:       # BB#0:
2621; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2622; AVX512F-32-NEXT:    vpsraw $3, %zmm0, %zmm1 {%k1}
2623; AVX512F-32-NEXT:    vpsraw $3, %zmm0, %zmm2 {%k1} {z}
2624; AVX512F-32-NEXT:    vpsraw $3, %zmm0, %zmm0
2625; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
2626; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2627; AVX512F-32-NEXT:    retl
2628  %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
2629  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
2630  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
2631  %res3 = add <32 x i16> %res, %res1
2632  %res4 = add <32 x i16> %res3, %res2
2633  ret <32 x i16> %res4
2634}
2635
2636declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2637
2638define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2639; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi:
2640; AVX512BW:       ## BB#0:
2641; AVX512BW-NEXT:    kmovd %edi, %k1
2642; AVX512BW-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1}
2643; AVX512BW-NEXT:    vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
2644; AVX512BW-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
2645; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2646; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2647; AVX512BW-NEXT:    retq
2648;
2649; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
2650; AVX512F-32:       # BB#0:
2651; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2652; AVX512F-32-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1}
2653; AVX512F-32-NEXT:    vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
2654; AVX512F-32-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
2655; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2656; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2657; AVX512F-32-NEXT:    retl
2658  %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2659  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2660  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2661  %res3 = add <32 x i16> %res, %res1
2662  %res4 = add <32 x i16> %res3, %res2
2663  ret <32 x i16> %res4
2664}
2665
2666define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2667; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
2668; AVX512BW:       ## BB#0:
2669; AVX512BW-NEXT:    vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
2670; AVX512BW-NEXT:    vpsravw {{.*}}(%rip), %zmm0, %zmm0
2671; AVX512BW-NEXT:    retq
2672;
2673; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
2674; AVX512F-32:       # BB#0:
2675; AVX512F-32-NEXT:    vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
2676; AVX512F-32-NEXT:    vpsravw {{\.LCPI.*}}, %zmm0, %zmm0
2677; AVX512F-32-NEXT:    retl
2678  %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>,
2679                                                          <32 x i16> <i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49>,
2680                                                          <32 x i16> zeroinitializer, i32 -1)
2681  ret <32 x i16> %res
2682}
2683
2684declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
2685
2686define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2687; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_w_512:
2688; AVX512BW:       ## BB#0:
2689; AVX512BW-NEXT:    kmovd %edi, %k1
2690; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1}
2691; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
2692; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
2693; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2694; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2695; AVX512BW-NEXT:    retq
2696;
2697; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512:
2698; AVX512F-32:       # BB#0:
2699; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2700; AVX512F-32-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1}
2701; AVX512F-32-NEXT:    vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
2702; AVX512F-32-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
2703; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2704; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2705; AVX512F-32-NEXT:    retl
2706  %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
2707  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2708  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
2709  %res3 = add <32 x i16> %res, %res1
2710  %res4 = add <32 x i16> %res3, %res2
2711  ret <32 x i16> %res4
2712}
2713
2714declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32)
2715
2716define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
2717; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_wi_512:
2718; AVX512BW:       ## BB#0:
2719; AVX512BW-NEXT:    kmovd %esi, %k1
2720; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm1 {%k1}
2721; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm2 {%k1} {z}
2722; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
2723; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
2724; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2725; AVX512BW-NEXT:    retq
2726;
2727; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512:
2728; AVX512F-32:       # BB#0:
2729; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2730; AVX512F-32-NEXT:    vpsllw $3, %zmm0, %zmm1 {%k1}
2731; AVX512F-32-NEXT:    vpsllw $3, %zmm0, %zmm2 {%k1} {z}
2732; AVX512F-32-NEXT:    vpsllw $3, %zmm0, %zmm0
2733; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
2734; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2735; AVX512F-32-NEXT:    retl
2736  %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
2737  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
2738  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
2739  %res3 = add <32 x i16> %res, %res1
2740  %res4 = add <32 x i16> %res3, %res2
2741  ret <32 x i16> %res4
2742}
2743
2744declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2745
2746define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2747; AVX512BW-LABEL: test_int_x86_avx512_mask_psllv32hi:
2748; AVX512BW:       ## BB#0:
2749; AVX512BW-NEXT:    kmovd %edi, %k1
2750; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
2751; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2752; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
2753; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2754; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2755; AVX512BW-NEXT:    retq
2756;
2757; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
2758; AVX512F-32:       # BB#0:
2759; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2760; AVX512F-32-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
2761; AVX512F-32-NEXT:    vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2762; AVX512F-32-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
2763; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2764; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2765; AVX512F-32-NEXT:    retl
2766  %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2767  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2768  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2769  %res3 = add <32 x i16> %res, %res1
2770  %res4 = add <32 x i16> %res3, %res2
2771  ret <32 x i16> %res4
2772}
2773
2774declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32)
2775
2776define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
2777; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
2778; AVX512BW:       ## BB#0:
2779; AVX512BW-NEXT:    kmovd %edi, %k1
2780; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2781; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2782; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2783; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
2784; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2785; AVX512BW-NEXT:    retq
2786;
2787; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
2788; AVX512F-32:       # BB#0:
2789; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2790; AVX512F-32-NEXT:    vpmovzxbw {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2791; AVX512F-32-NEXT:    vpmovzxbw {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2792; AVX512F-32-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2793; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
2794; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2795; AVX512F-32-NEXT:    retl
2796  %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
2797  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
2798  %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
2799  %res3 = add <32 x i16> %res, %res1
2800  %res4 = add <32 x i16> %res3, %res2
2801  ret <32 x i16> %res4
2802}
2803
2804declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32)
2805
2806define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
2807; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
2808; AVX512BW:       ## BB#0:
2809; AVX512BW-NEXT:    kmovd %edi, %k1
2810; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1}
2811; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm2 {%k1} {z}
2812; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0
2813; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
2814; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2815; AVX512BW-NEXT:    retq
2816;
2817; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
2818; AVX512F-32:       # BB#0:
2819; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2820; AVX512F-32-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1}
2821; AVX512F-32-NEXT:    vpmovsxbw %ymm0, %zmm2 {%k1} {z}
2822; AVX512F-32-NEXT:    vpmovsxbw %ymm0, %zmm0
2823; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
2824; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2825; AVX512F-32-NEXT:    retl
2826  %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
2827  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
2828  %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
2829  %res3 = add <32 x i16> %res, %res1
2830  %res4 = add <32 x i16> %res3, %res2
2831  ret <32 x i16> %res4
2832}
2833
2834declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2835
2836define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2837; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2838; AVX512BW:       ## BB#0:
2839; AVX512BW-NEXT:    kmovd %edi, %k1
2840; AVX512BW-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1}
2841; AVX512BW-NEXT:    vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
2842; AVX512BW-NEXT:    vpermw %zmm0, %zmm1, %zmm0
2843; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2844; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2845; AVX512BW-NEXT:    retq
2846;
2847; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2848; AVX512F-32:       # BB#0:
2849; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2850; AVX512F-32-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1}
2851; AVX512F-32-NEXT:    vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
2852; AVX512F-32-NEXT:    vpermw %zmm0, %zmm1, %zmm0
2853; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
2854; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
2855; AVX512F-32-NEXT:    retl
2856  %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2857  %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2858  %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2859  %res3 = add <32 x i16> %res, %res1
2860  %res4 = add <32 x i16> %res3, %res2
2861  ret <32 x i16> %res4
2862}
2863
2864declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64)
2865
2866define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2867; AVX512BW-LABEL: test_int_x86_avx512_ptestm_b_512:
2868; AVX512BW:       ## BB#0:
2869; AVX512BW-NEXT:    kmovq %rdi, %k1
2870; AVX512BW-NEXT:    vptestmb %zmm1, %zmm0, %k0 {%k1}
2871; AVX512BW-NEXT:    kmovq %k0, %rcx
2872; AVX512BW-NEXT:    vptestmb %zmm1, %zmm0, %k0
2873; AVX512BW-NEXT:    kmovq %k0, %rax
2874; AVX512BW-NEXT:    addq %rcx, %rax
2875; AVX512BW-NEXT:    retq
2876;
2877; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512:
2878; AVX512F-32:       # BB#0:
2879; AVX512F-32-NEXT:    subl $20, %esp
2880; AVX512F-32-NEXT:  .Ltmp6:
2881; AVX512F-32-NEXT:    .cfi_def_cfa_offset 24
2882; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2883; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2884; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2885; AVX512F-32-NEXT:    vptestmb %zmm1, %zmm0, %k0 {%k1}
2886; AVX512F-32-NEXT:    kmovq %k0, (%esp)
2887; AVX512F-32-NEXT:    vptestmb %zmm1, %zmm0, %k0
2888; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
2889; AVX512F-32-NEXT:    movl (%esp), %eax
2890; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
2891; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
2892; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
2893; AVX512F-32-NEXT:    addl $20, %esp
2894; AVX512F-32-NEXT:    retl
2895  %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2896  %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
2897  %res2 = add i64 %res, %res1
2898  ret i64 %res2
2899}
2900
2901declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32)
2902
2903define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2904; AVX512BW-LABEL: test_int_x86_avx512_ptestm_w_512:
2905; AVX512BW:       ## BB#0:
2906; AVX512BW-NEXT:    kmovd %edi, %k1
2907; AVX512BW-NEXT:    vptestmw %zmm1, %zmm0, %k0 {%k1}
2908; AVX512BW-NEXT:    kmovd %k0, %ecx
2909; AVX512BW-NEXT:    vptestmw %zmm1, %zmm0, %k0
2910; AVX512BW-NEXT:    kmovd %k0, %eax
2911; AVX512BW-NEXT:    addl %ecx, %eax
2912; AVX512BW-NEXT:    retq
2913;
2914; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512:
2915; AVX512F-32:       # BB#0:
2916; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2917; AVX512F-32-NEXT:    vptestmw %zmm1, %zmm0, %k0 {%k1}
2918; AVX512F-32-NEXT:    kmovd %k0, %ecx
2919; AVX512F-32-NEXT:    vptestmw %zmm1, %zmm0, %k0
2920; AVX512F-32-NEXT:    kmovd %k0, %eax
2921; AVX512F-32-NEXT:    addl %ecx, %eax
2922; AVX512F-32-NEXT:    retl
2923  %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2924  %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2925  %res2 = add i32 %res, %res1
2926  ret i32 %res2
2927}
2928
2929declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2)
2930
2931define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2932; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_b_512:
2933; AVX512BW:       ## BB#0:
2934; AVX512BW-NEXT:    kmovq %rdi, %k1
2935; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm0, %k0 {%k1}
2936; AVX512BW-NEXT:    kmovq %k0, %rcx
2937; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm0, %k0
2938; AVX512BW-NEXT:    kmovq %k0, %rax
2939; AVX512BW-NEXT:    addq %rcx, %rax
2940; AVX512BW-NEXT:    retq
2941;
2942; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512:
2943; AVX512F-32:       # BB#0:
2944; AVX512F-32-NEXT:    subl $20, %esp
2945; AVX512F-32-NEXT:  .Ltmp7:
2946; AVX512F-32-NEXT:    .cfi_def_cfa_offset 24
2947; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
2948; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2949; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
2950; AVX512F-32-NEXT:    vptestnmb %zmm1, %zmm0, %k0 {%k1}
2951; AVX512F-32-NEXT:    kmovq %k0, (%esp)
2952; AVX512F-32-NEXT:    vptestnmb %zmm1, %zmm0, %k0
2953; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
2954; AVX512F-32-NEXT:    movl (%esp), %eax
2955; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
2956; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
2957; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
2958; AVX512F-32-NEXT:    addl $20, %esp
2959; AVX512F-32-NEXT:    retl
2960  %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2961  %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
2962  %res2 = add i64 %res, %res1
2963  ret i64 %res2
2964}
2965
2966declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2)
2967
2968define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2969; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_w_512:
2970; AVX512BW:       ## BB#0:
2971; AVX512BW-NEXT:    kmovd %edi, %k1
2972; AVX512BW-NEXT:    vptestnmw %zmm1, %zmm0, %k0 {%k1}
2973; AVX512BW-NEXT:    kmovd %k0, %ecx
2974; AVX512BW-NEXT:    vptestnmw %zmm1, %zmm0, %k0
2975; AVX512BW-NEXT:    kmovd %k0, %eax
2976; AVX512BW-NEXT:    addl %ecx, %eax
2977; AVX512BW-NEXT:    retq
2978;
2979; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512:
2980; AVX512F-32:       # BB#0:
2981; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
2982; AVX512F-32-NEXT:    vptestnmw %zmm1, %zmm0, %k0 {%k1}
2983; AVX512F-32-NEXT:    kmovd %k0, %ecx
2984; AVX512F-32-NEXT:    vptestnmw %zmm1, %zmm0, %k0
2985; AVX512F-32-NEXT:    kmovd %k0, %eax
2986; AVX512F-32-NEXT:    addl %ecx, %eax
2987; AVX512F-32-NEXT:    retl
2988  %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2989  %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2990  %res2 = add i32 %res, %res1
2991  ret i32 %res2
2992}
2993
2994declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
2995
2996define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
2997; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
2998; AVX512BW:       ## BB#0:
2999; AVX512BW-NEXT:    kmovq %rsi, %k1
3000; AVX512BW-NEXT:    vpbroadcastb %dil, %zmm0 {%k1}
3001; AVX512BW-NEXT:    vpbroadcastb %dil, %zmm1 {%k1} {z}
3002; AVX512BW-NEXT:    vpbroadcastb %dil, %zmm2
3003; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
3004; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
3005; AVX512BW-NEXT:    retq
3006;
3007; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
3008; AVX512F-32:       # BB#0:
3009; AVX512F-32-NEXT:    movb {{[0-9]+}}(%esp), %al
3010; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
3011; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
3012; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
3013; AVX512F-32-NEXT:    vpbroadcastb %al, %zmm1 {%k1} {z}
3014; AVX512F-32-NEXT:    vpbroadcastb %al, %zmm0 {%k1}
3015; AVX512F-32-NEXT:    vpbroadcastb %al, %zmm2
3016; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
3017; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
3018; AVX512F-32-NEXT:    retl
3019  %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
3020  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
3021  %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
3022  %res3 = add <64 x i8> %res, %res1
3023  %res4 = add <64 x i8> %res2, %res3
3024  ret <64 x i8> %res4
3025}
3026
3027declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
3028
3029define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) {
3030; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
3031; AVX512BW:       ## BB#0:
3032; AVX512BW-NEXT:    kmovd %esi, %k1
3033; AVX512BW-NEXT:    vpbroadcastw %di, %zmm0 {%k1}
3034; AVX512BW-NEXT:    vpbroadcastw %di, %zmm1 {%k1} {z}
3035; AVX512BW-NEXT:    vpbroadcastw %di, %zmm2
3036; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
3037; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
3038; AVX512BW-NEXT:    retq
3039;
3040; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
3041; AVX512F-32:       # BB#0:
3042; AVX512F-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
3043; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
3044; AVX512F-32-NEXT:    vpbroadcastw %ax, %zmm0 {%k1}
3045; AVX512F-32-NEXT:    vpbroadcastw %ax, %zmm1 {%k1} {z}
3046; AVX512F-32-NEXT:    vpbroadcastw %ax, %zmm2
3047; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
3048; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
3049; AVX512F-32-NEXT:    retl
3050  %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
3051  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
3052  %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
3053  %res3 = add <32 x i16> %res, %res1
3054  %res4 = add <32 x i16> %res2, %res3
3055  ret <32 x i16> %res4
3056}
3057