Lines Matching full:32

3 …wn-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
33 ; AVX512F-32-LABEL: test_cmp_b_512:
34 ; AVX512F-32: # BB#0:
35 ; AVX512F-32-NEXT: subl $68, %esp
36 ; AVX512F-32-NEXT: .Ltmp0:
37 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
38 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
39 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
40 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
41 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
42 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
43 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
44 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
45 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
46 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
47 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
48 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
49 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
50 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
51 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
52 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
53 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
54 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
55 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
56 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
57 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
58 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
59 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
60 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
61 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
62 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
63 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
64 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
65 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
66 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
67 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
68 ; AVX512F-32-NEXT: addl (%esp), %eax
69 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
70 ; AVX512F-32-NEXT: addl $68, %esp
71 ; AVX512F-32-NEXT: retl
119 ; AVX512F-32-LABEL: test_mask_cmp_b_512:
120 ; AVX512F-32: # BB#0:
121 ; AVX512F-32-NEXT: subl $68, %esp
122 ; AVX512F-32-NEXT: .Ltmp1:
123 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
124 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
125 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
126 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
127 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
128 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
129 ; AVX512F-32-NEXT: movl (%esp), %eax
130 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
131 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
132 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
133 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
134 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
135 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
136 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
137 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
138 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
139 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
140 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
141 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
142 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
143 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
144 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
145 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
146 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
147 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
148 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
149 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
150 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
151 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
152 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
153 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
154 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
155 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
156 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
157 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
158 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
159 ; AVX512F-32-NEXT: addl $68, %esp
160 ; AVX512F-32-NEXT: retl
209 ; AVX512F-32-LABEL: test_ucmp_b_512:
210 ; AVX512F-32: # BB#0:
211 ; AVX512F-32-NEXT: subl $68, %esp
212 ; AVX512F-32-NEXT: .Ltmp2:
213 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
214 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0
215 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
216 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
217 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
218 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
219 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
220 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
221 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
222 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
223 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
224 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
225 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
226 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
227 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
228 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
229 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
230 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
231 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
232 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
233 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
234 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
235 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
236 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
237 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
238 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
239 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
240 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
241 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
242 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
243 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
244 ; AVX512F-32-NEXT: addl (%esp), %eax
245 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
246 ; AVX512F-32-NEXT: addl $68, %esp
247 ; AVX512F-32-NEXT: retl
295 ; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
296 ; AVX512F-32: # BB#0:
297 ; AVX512F-32-NEXT: subl $68, %esp
298 ; AVX512F-32-NEXT: .Ltmp3:
299 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
300 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
301 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
302 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
303 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1}
304 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
305 ; AVX512F-32-NEXT: movl (%esp), %eax
306 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
307 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
308 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
309 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
310 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
311 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
312 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
313 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
314 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
315 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
316 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
317 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
318 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
319 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
320 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
321 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
322 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
323 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
324 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
325 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
326 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
327 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
328 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
329 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
330 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
331 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
332 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
333 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
334 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
335 ; AVX512F-32-NEXT: addl $68, %esp
336 ; AVX512F-32-NEXT: retl
357 define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
385 ; AVX512F-32-LABEL: test_cmp_w_512:
386 ; AVX512F-32: # BB#0:
387 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
388 ; AVX512F-32-NEXT: kmovd %k0, %eax
389 ; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0
390 ; AVX512F-32-NEXT: kmovd %k0, %ecx
391 ; AVX512F-32-NEXT: addl %eax, %ecx
392 ; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0
393 ; AVX512F-32-NEXT: kmovd %k0, %eax
394 ; AVX512F-32-NEXT: addl %ecx, %eax
395 ; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0
396 ; AVX512F-32-NEXT: kmovd %k0, %ecx
397 ; AVX512F-32-NEXT: addl %eax, %ecx
398 ; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
399 ; AVX512F-32-NEXT: kmovd %k0, %eax
400 ; AVX512F-32-NEXT: addl %ecx, %eax
401 ; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
402 ; AVX512F-32-NEXT: kmovd %k0, %ecx
403 ; AVX512F-32-NEXT: addl %eax, %ecx
404 ; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0
405 ; AVX512F-32-NEXT: kmovd %k0, %edx
406 ; AVX512F-32-NEXT: addl %ecx, %edx
407 ; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0
408 ; AVX512F-32-NEXT: kmovd %k0, %eax
409 ; AVX512F-32-NEXT: addl %edx, %eax
410 ; AVX512F-32-NEXT: retl
411 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
412 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
414 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
416 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
418 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
420 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
422 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
424 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
429 define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
458 ; AVX512F-32-LABEL: test_mask_cmp_w_512:
459 ; AVX512F-32: # BB#0:
460 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
461 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
462 ; AVX512F-32-NEXT: kmovd %k0, %eax
463 ; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1}
464 ; AVX512F-32-NEXT: kmovd %k0, %ecx
465 ; AVX512F-32-NEXT: addl %eax, %ecx
466 ; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
467 ; AVX512F-32-NEXT: kmovd %k0, %eax
468 ; AVX512F-32-NEXT: addl %ecx, %eax
469 ; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
470 ; AVX512F-32-NEXT: kmovd %k0, %ecx
471 ; AVX512F-32-NEXT: addl %eax, %ecx
472 ; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
473 ; AVX512F-32-NEXT: kmovd %k0, %eax
474 ; AVX512F-32-NEXT: addl %ecx, %eax
475 ; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
476 ; AVX512F-32-NEXT: kmovd %k0, %ecx
477 ; AVX512F-32-NEXT: addl %eax, %ecx
478 ; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
479 ; AVX512F-32-NEXT: kmovd %k0, %edx
480 ; AVX512F-32-NEXT: addl %ecx, %edx
481 ; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1}
482 ; AVX512F-32-NEXT: kmovd %k0, %eax
483 ; AVX512F-32-NEXT: addl %edx, %eax
484 ; AVX512F-32-NEXT: retl
485 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
486 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
488 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
490 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
492 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
494 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
496 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
498 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
503 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
505 define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
533 ; AVX512F-32-LABEL: test_ucmp_w_512:
534 ; AVX512F-32: # BB#0:
535 ; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0
536 ; AVX512F-32-NEXT: kmovd %k0, %eax
537 ; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
538 ; AVX512F-32-NEXT: kmovd %k0, %ecx
539 ; AVX512F-32-NEXT: addl %eax, %ecx
540 ; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0
541 ; AVX512F-32-NEXT: kmovd %k0, %eax
542 ; AVX512F-32-NEXT: addl %ecx, %eax
543 ; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0
544 ; AVX512F-32-NEXT: kmovd %k0, %ecx
545 ; AVX512F-32-NEXT: addl %eax, %ecx
546 ; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0
547 ; AVX512F-32-NEXT: kmovd %k0, %eax
548 ; AVX512F-32-NEXT: addl %ecx, %eax
549 ; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0
550 ; AVX512F-32-NEXT: kmovd %k0, %ecx
551 ; AVX512F-32-NEXT: addl %eax, %ecx
552 ; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
553 ; AVX512F-32-NEXT: kmovd %k0, %edx
554 ; AVX512F-32-NEXT: addl %ecx, %edx
555 ; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0
556 ; AVX512F-32-NEXT: kmovd %k0, %eax
557 ; AVX512F-32-NEXT: addl %edx, %eax
558 ; AVX512F-32-NEXT: retl
559 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
560 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
562 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
564 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
566 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
568 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
570 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
572 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
577 define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
606 ; AVX512F-32-LABEL: test_mask_ucmp_w_512:
607 ; AVX512F-32: # BB#0:
608 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
609 ; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1}
610 ; AVX512F-32-NEXT: kmovd %k0, %eax
611 ; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
612 ; AVX512F-32-NEXT: kmovd %k0, %ecx
613 ; AVX512F-32-NEXT: addl %eax, %ecx
614 ; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
615 ; AVX512F-32-NEXT: kmovd %k0, %eax
616 ; AVX512F-32-NEXT: addl %ecx, %eax
617 ; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
618 ; AVX512F-32-NEXT: kmovd %k0, %ecx
619 ; AVX512F-32-NEXT: addl %eax, %ecx
620 ; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
621 ; AVX512F-32-NEXT: kmovd %k0, %eax
622 ; AVX512F-32-NEXT: addl %ecx, %eax
623 ; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
624 ; AVX512F-32-NEXT: kmovd %k0, %ecx
625 ; AVX512F-32-NEXT: addl %eax, %ecx
626 ; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
627 ; AVX512F-32-NEXT: kmovd %k0, %edx
628 ; AVX512F-32-NEXT: addl %ecx, %edx
629 ; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1}
630 ; AVX512F-32-NEXT: kmovd %k0, %eax
631 ; AVX512F-32-NEXT: addl %edx, %eax
632 ; AVX512F-32-NEXT: retl
633 …%res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
634 …%res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
636 …%res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
638 …%res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
640 …%res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
642 …%res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
644 …%res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
646 …%res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
651 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
653 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
659 ; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512:
660 ; AVX512F-32: # BB#0:
661 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
662 ; AVX512F-32-NEXT: retl
663 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
664 ret <32 x i16> %res
667 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru…
675 ; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512:
676 ; AVX512F-32: # BB#0:
677 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
678 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
679 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
680 ; AVX512F-32-NEXT: retl
681 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
682 ret <32 x i16> %res
685 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
692 ; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512:
693 ; AVX512F-32: # BB#0:
694 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
695 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
696 ; AVX512F-32-NEXT: retl
697 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
698 ret <32 x i16> %res
701 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
707 ; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512:
708 ; AVX512F-32: # BB#0:
709 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
710 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0
711 ; AVX512F-32-NEXT: retl
713 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
714 ret <32 x i16> %res
717 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %pas…
725 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512:
726 ; AVX512F-32: # BB#0:
727 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
728 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
729 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1}
730 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
731 ; AVX512F-32-NEXT: retl
733 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
734 ret <32 x i16> %res
737 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
744 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512:
745 ; AVX512F-32: # BB#0:
746 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
747 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
748 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
749 ; AVX512F-32-NEXT: retl
751 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
752 ret <32 x i16> %res
755 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
761 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512:
762 ; AVX512F-32: # BB#0:
763 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
764 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0
765 ; AVX512F-32-NEXT: retl
769 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
770 ret <32 x i16> %res
773 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru,…
781 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512:
782 ; AVX512F-32: # BB#0:
783 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
784 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
785 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
786 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
787 ; AVX512F-32-NEXT: retl
791 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
792 ret <32 x i16> %res
795 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
802 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512:
803 ; AVX512F-32: # BB#0:
804 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
805 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
806 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
807 ; AVX512F-32-NEXT: retl
811 …%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
812 ret <32 x i16> %res
815 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
817 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
823 ; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512:
824 ; AVX512F-32: # BB#0:
825 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
826 ; AVX512F-32-NEXT: retl
827 …%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> z…
831 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, …
839 ; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
840 ; AVX512F-32: # BB#0:
841 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
842 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
843 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
844 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
845 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
846 ; AVX512F-32-NEXT: retl
847 …%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %…
851 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
858 ; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512:
859 ; AVX512F-32: # BB#0:
860 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
861 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
862 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
863 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
864 ; AVX512F-32-NEXT: retl
865 …%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> z…
869 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
875 ; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512:
876 ; AVX512F-32: # BB#0:
877 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
878 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0
879 ; AVX512F-32-NEXT: retl
880 %b = load <32 x i16>, <32 x i16>* %ptr_b
881 …%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> z…
885 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passT…
893 ; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
894 ; AVX512F-32: # BB#0:
895 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
896 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
897 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
898 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
899 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1}
900 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
901 ; AVX512F-32-NEXT: retl
902 %b = load <32 x i16>, <32 x i16>* %ptr_b
903 …%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %…
907 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
914 ; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512:
915 ; AVX512F-32: # BB#0:
916 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
917 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
918 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
919 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
920 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
921 ; AVX512F-32-NEXT: retl
922 %b = load <32 x i16>, <32 x i16>* %ptr_b
923 …%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> z…
927 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
930 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
936 ; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512:
937 ; AVX512F-32: # BB#0:
938 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
939 ; AVX512F-32-NEXT: retl
940 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
941 ret <32 x i16> %res
944 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThr…
952 ; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512:
953 ; AVX512F-32: # BB#0:
954 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
955 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
956 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
957 ; AVX512F-32-NEXT: retl
958 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
959 ret <32 x i16> %res
962 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
969 ; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512:
970 ; AVX512F-32: # BB#0:
971 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
972 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
973 ; AVX512F-32-NEXT: retl
974 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
975 ret <32 x i16> %res
978 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
984 ; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512:
985 ; AVX512F-32: # BB#0:
986 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
987 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0
988 ; AVX512F-32-NEXT: retl
990 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
991 ret <32 x i16> %res
994 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %pa…
1002 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512:
1003 ; AVX512F-32: # BB#0:
1004 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1005 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1006 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1}
1007 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1008 ; AVX512F-32-NEXT: retl
1010 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
1011 ret <32 x i16> %res
1014 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
1021 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512:
1022 ; AVX512F-32: # BB#0:
1023 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1024 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1025 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
1026 ; AVX512F-32-NEXT: retl
1028 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
1029 ret <32 x i16> %res
1032 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
1038 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512:
1039 ; AVX512F-32: # BB#0:
1040 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1041 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0
1042 ; AVX512F-32-NEXT: retl
1046 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
1047 ret <32 x i16> %res
1050 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru…
1058 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512:
1059 ; AVX512F-32: # BB#0:
1060 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1061 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1062 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
1063 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1064 ; AVX512F-32-NEXT: retl
1068 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
1069 ret <32 x i16> %res
1072 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1079 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512:
1080 ; AVX512F-32: # BB#0:
1081 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1082 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1083 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
1084 ; AVX512F-32-NEXT: retl
1088 …%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16>…
1089 ret <32 x i16> %res
1092 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1094 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1100 ; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512:
1101 ; AVX512F-32: # BB#0:
1102 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
1103 ; AVX512F-32-NEXT: retl
1104 …%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> z…
1108 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru,…
1116 ; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
1117 ; AVX512F-32: # BB#0:
1118 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1119 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1120 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1121 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1122 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1123 ; AVX512F-32-NEXT: retl
1124 …%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %…
1128 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1135 ; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512:
1136 ; AVX512F-32: # BB#0:
1137 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1138 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1139 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1140 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1141 ; AVX512F-32-NEXT: retl
1142 …%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> z…
1146 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1152 ; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512:
1153 ; AVX512F-32: # BB#0:
1154 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1155 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0
1156 ; AVX512F-32-NEXT: retl
1157 %b = load <32 x i16>, <32 x i16>* %ptr_b
1158 …%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> z…
1162 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %pass…
1170 ; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
1171 ; AVX512F-32: # BB#0:
1172 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1173 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1174 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1175 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1176 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1}
1177 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1178 ; AVX512F-32-NEXT: retl
1179 %b = load <32 x i16>, <32 x i16>* %ptr_b
1180 …%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %…
1184 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1191 ; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512:
1192 ; AVX512F-32: # BB#0:
1193 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1194 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1195 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1196 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1197 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
1198 ; AVX512F-32-NEXT: retl
1199 %b = load <32 x i16>, <32 x i16>* %ptr_b
1200 …%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> z…
1204 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1206 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1212 ; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
1213 ; AVX512F-32: # BB#0:
1214 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
1215 ; AVX512F-32-NEXT: retl
1216 …%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1217 ret <32 x i16> %res
1220 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru,…
1228 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
1229 ; AVX512F-32: # BB#0:
1230 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1231 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1232 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1233 ; AVX512F-32-NEXT: retl
1234 …%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1235 ret <32 x i16> %res
1238 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1245 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
1246 ; AVX512F-32: # BB#0:
1247 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1248 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1249 ; AVX512F-32-NEXT: retl
1250 …%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1251 ret <32 x i16> %res
1254 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1260 ; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
1261 ; AVX512F-32: # BB#0:
1262 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1263 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0
1264 ; AVX512F-32-NEXT: retl
1265 %b = load <32 x i16>, <32 x i16>* %ptr_b
1266 …%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1267 ret <32 x i16> %res
1270 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %pass…
1278 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
1279 ; AVX512F-32: # BB#0:
1280 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1281 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1282 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
1283 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1284 ; AVX512F-32-NEXT: retl
1285 %b = load <32 x i16>, <32 x i16>* %ptr_b
1286 …%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1287 ret <32 x i16> %res
1290 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1297 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
1298 ; AVX512F-32: # BB#0:
1299 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1300 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1301 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
1302 ; AVX512F-32-NEXT: retl
1303 %b = load <32 x i16>, <32 x i16>* %ptr_b
1304 …%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1305 ret <32 x i16> %res
1308 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1310 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1316 ; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
1317 ; AVX512F-32: # BB#0:
1318 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
1319 ; AVX512F-32-NEXT: retl
1320 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1321 ret <32 x i16> %res
1324 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru,…
1332 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
1333 ; AVX512F-32: # BB#0:
1334 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1335 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1336 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1337 ; AVX512F-32-NEXT: retl
1338 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1339 ret <32 x i16> %res
1342 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1349 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
1350 ; AVX512F-32: # BB#0:
1351 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1352 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1353 ; AVX512F-32-NEXT: retl
1354 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1355 ret <32 x i16> %res
1358 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1364 ; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
1365 ; AVX512F-32: # BB#0:
1366 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1367 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0
1368 ; AVX512F-32-NEXT: retl
1369 %b = load <32 x i16>, <32 x i16>* %ptr_b
1370 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1371 ret <32 x i16> %res
1374 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %pass…
1382 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
1383 ; AVX512F-32: # BB#0:
1384 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1385 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1386 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
1387 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1388 ; AVX512F-32-NEXT: retl
1389 %b = load <32 x i16>, <32 x i16>* %ptr_b
1390 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1391 ret <32 x i16> %res
1394 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1401 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
1402 ; AVX512F-32: # BB#0:
1403 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1404 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1405 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
1406 ; AVX512F-32-NEXT: retl
1407 %b = load <32 x i16>, <32 x i16>* %ptr_b
1408 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> …
1409 ret <32 x i16> %res
1412 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1414 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1420 ; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
1421 ; AVX512F-32: # BB#0:
1422 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
1423 ; AVX512F-32-NEXT: retl
1424 …%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1425 ret <32 x i16> %res
1428 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru,…
1436 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
1437 ; AVX512F-32: # BB#0:
1438 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1439 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1440 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1441 ; AVX512F-32-NEXT: retl
1442 …%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1443 ret <32 x i16> %res
1446 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1453 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
1454 ; AVX512F-32: # BB#0:
1455 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1456 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1457 ; AVX512F-32-NEXT: retl
1458 …%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1459 ret <32 x i16> %res
1462 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1468 ; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
1469 ; AVX512F-32: # BB#0:
1470 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1471 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0
1472 ; AVX512F-32-NEXT: retl
1473 %b = load <32 x i16>, <32 x i16>* %ptr_b
1474 …%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1475 ret <32 x i16> %res
1478 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %pass…
1486 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
1487 ; AVX512F-32: # BB#0:
1488 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1489 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1490 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
1491 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1492 ; AVX512F-32-NEXT: retl
1493 %b = load <32 x i16>, <32 x i16>* %ptr_b
1494 …%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1495 ret <32 x i16> %res
1498 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1505 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
1506 ; AVX512F-32: # BB#0:
1507 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1508 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1509 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
1510 ; AVX512F-32-NEXT: retl
1511 %b = load <32 x i16>, <32 x i16>* %ptr_b
1512 …%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1513 ret <32 x i16> %res
1516 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1518 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1524 ; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
1525 ; AVX512F-32: # BB#0:
1526 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
1527 ; AVX512F-32-NEXT: retl
1528 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1529 ret <32 x i16> %res
1532 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru,…
1540 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
1541 ; AVX512F-32: # BB#0:
1542 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1543 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1544 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1545 ; AVX512F-32-NEXT: retl
1546 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1547 ret <32 x i16> %res
1550 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1557 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
1558 ; AVX512F-32: # BB#0:
1559 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1560 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1561 ; AVX512F-32-NEXT: retl
1562 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1563 ret <32 x i16> %res
1566 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1572 ; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
1573 ; AVX512F-32: # BB#0:
1574 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1575 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0
1576 ; AVX512F-32-NEXT: retl
1577 %b = load <32 x i16>, <32 x i16>* %ptr_b
1578 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1579 ret <32 x i16> %res
1582 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %pass…
1590 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
1591 ; AVX512F-32: # BB#0:
1592 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1593 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1594 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
1595 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1596 ; AVX512F-32-NEXT: retl
1597 %b = load <32 x i16>, <32 x i16>* %ptr_b
1598 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1599 ret <32 x i16> %res
1602 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1609 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
1610 ; AVX512F-32: # BB#0:
1611 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1612 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1613 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
1614 ; AVX512F-32-NEXT: retl
1615 %b = load <32 x i16>, <32 x i16>* %ptr_b
1616 …%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16>…
1617 ret <32 x i16> %res
1620 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1633 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
1634 ; AVX512F-32: # BB#0:
1635 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1636 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1637 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1638 ; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1639 ; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
1640 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1641 ; AVX512F-32-NEXT: retl
1648 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1650 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %…
1659 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
1660 ; AVX512F-32: # BB#0:
1661 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1662 ; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1663 ; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
1664 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1665 ; AVX512F-32-NEXT: retl
1666 …%res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
1667 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i1…
1668 %res2 = add <32 x i16> %res, %res1
1669 ret <32 x i16> %res2
1683 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
1684 ; AVX512F-32: # BB#0:
1685 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1686 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1687 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1688 ; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1689 ; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
1690 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1691 ; AVX512F-32-NEXT: retl
1698 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1700 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %…
1709 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
1710 ; AVX512F-32: # BB#0:
1711 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1712 ; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1713 ; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
1714 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1715 ; AVX512F-32-NEXT: retl
1716 …%res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
1717 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i1…
1718 %res2 = add <32 x i16> %res, %res1
1719 ret <32 x i16> %res2
1733 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512:
1734 ; AVX512F-32: # BB#0:
1735 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1736 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1737 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1738 ; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1739 ; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm0
1740 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1741 ; AVX512F-32-NEXT: retl
1748 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1750 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %…
1759 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512:
1760 ; AVX512F-32: # BB#0:
1761 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1762 ; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1763 ; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm0
1764 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1765 ; AVX512F-32-NEXT: retl
1766 …%res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
1767 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i1…
1768 %res2 = add <32 x i16> %res, %res1
1769 ret <32 x i16> %res2
1783 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512:
1784 ; AVX512F-32: # BB#0:
1785 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1786 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1787 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1788 ; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1}
1789 ; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm0
1790 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1791 ; AVX512F-32-NEXT: retl
1798 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1800 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %…
1809 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512:
1810 ; AVX512F-32: # BB#0:
1811 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1812 ; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1}
1813 ; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm0
1814 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1815 ; AVX512F-32-NEXT: retl
1816 …%res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
1817 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i1…
1818 %res2 = add <32 x i16> %res, %res1
1819 ret <32 x i16> %res2
1822 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1824 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x …
1834 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
1835 ; AVX512F-32: # BB#0:
1836 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1837 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
1838 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
1839 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1840 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1841 ; AVX512F-32-NEXT: retl
1842 …%res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32
1843 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <3…
1844 %res2 = add <32 x i16> %res, %res1
1845 ret <32 x i16> %res2
1848 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1850 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x…
1860 ; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
1861 ; AVX512F-32: # BB#0:
1862 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1863 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
1864 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
1865 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1866 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1867 ; AVX512F-32-NEXT: retl
1868 …%res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <3…
1869 …%res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <
1870 %res2 = add <32 x i16> %res, %res1
1871 ret <32 x i16> %res2
1874 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1876 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x …
1886 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
1887 ; AVX512F-32: # BB#0:
1888 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1889 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
1890 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1891 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
1892 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1893 ; AVX512F-32-NEXT: retl
1894 …%res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32
1895 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <3…
1896 %res2 = add <32 x i16> %res, %res1
1897 ret <32 x i16> %res2
1911 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512:
1912 ; AVX512F-32: # BB#0:
1913 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1914 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1915 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1916 ; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
1917 ; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm0
1918 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1919 ; AVX512F-32-NEXT: retl
1926 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1928 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x…
1937 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512:
1938 ; AVX512F-32: # BB#0:
1939 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1940 ; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
1941 ; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm0
1942 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1943 ; AVX512F-32-NEXT: retl
1944 …%res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>…
1945 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
1946 %res2 = add <32 x i16> %res, %res1
1947 ret <32 x i16> %res2
1961 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1962 ; AVX512F-32: # BB#0:
1963 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1964 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1965 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1966 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
1967 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
1968 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1969 ; AVX512F-32-NEXT: retl
1976 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
1978 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
1987 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512:
1988 ; AVX512F-32: # BB#0:
1989 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1990 ; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1}
1991 ; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0
1992 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
1993 ; AVX512F-32-NEXT: retl
1994 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
1995 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
1996 %res2 = add <32 x i16> %res, %res1
1997 ret <32 x i16> %res2
2011 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2012 ; AVX512F-32: # BB#0:
2013 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2014 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2015 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2016 ; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1}
2017 ; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0
2018 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
2019 ; AVX512F-32-NEXT: retl
2026 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2028 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> …
2037 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2038 ; AVX512F-32: # BB#0:
2039 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2040 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2041 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
2042 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2043 ; AVX512F-32-NEXT: retl
2044 …%res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i1…
2045 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i…
2046 %res2 = add <32 x i16> %res, %res1
2047 ret <32 x i16> %res2
2050 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2052 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %…
2061 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2062 ; AVX512F-32: # BB#0:
2063 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2064 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2065 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
2066 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2067 ; AVX512F-32-NEXT: retl
2068 …%res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
2069 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i1…
2070 %res2 = add <32 x i16> %res, %res1
2071 ret <32 x i16> %res2
2074 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2076 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>…
2085 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2086 ; AVX512F-32: # BB#0:
2087 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2088 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2089 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
2090 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2091 ; AVX512F-32-NEXT: retl
2092 …%res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x …
2093 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x…
2094 %res2 = add <32 x i16> %res, %res1
2095 ret <32 x i16> %res2
2098 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
2100 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2111 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
2112 ; AVX512F-32: # BB#0:
2113 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2114 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
2115 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
2116 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0
2117 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2118 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2119 ; AVX512F-32-NEXT: retl
2120 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2121 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2122 …%res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer…
2123 %res3 = add <32 x i8> %res0, %res1
2124 %res4 = add <32 x i8> %res3, %res2
2125 ret <32 x i8> %res4
2128 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2130 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2138 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
2139 ; AVX512F-32: # BB#0:
2140 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2141 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2142 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax)
2143 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1}
2144 ; AVX512F-32-NEXT: retl
2145 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2146 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2150 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
2152 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2163 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
2164 ; AVX512F-32: # BB#0:
2165 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2166 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
2167 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
2168 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0
2169 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2170 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2171 ; AVX512F-32-NEXT: retl
2172 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2173 … %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2174 …%res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitialize…
2175 %res3 = add <32 x i8> %res0, %res1
2176 %res4 = add <32 x i8> %res3, %res2
2177 ret <32 x i8> %res4
2180 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2182 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2190 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
2191 ; AVX512F-32: # BB#0:
2192 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2193 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2194 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx)
2195 ; AVX512F-32-NEXT: kmovd %eax, %k1
2196 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1}
2197 ; AVX512F-32-NEXT: retl
2198 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2199 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2203 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
2205 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2216 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
2217 ; AVX512F-32: # BB#0:
2218 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2219 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
2220 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
2221 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0
2222 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2223 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2224 ; AVX512F-32-NEXT: retl
2225 … %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2226 … %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2227 …%res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializ…
2228 %res3 = add <32 x i8> %res0, %res1
2229 %res4 = add <32 x i8> %res3, %res2
2230 ret <32 x i8> %res4
2233 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2235 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2243 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
2244 ; AVX512F-32: # BB#0:
2245 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2246 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2247 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx)
2248 ; AVX512F-32-NEXT: kmovd %eax, %k1
2249 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1}
2250 ; AVX512F-32-NEXT: retl
2251 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2252 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2256 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
2258 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> …
2267 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2268 ; AVX512F-32: # BB#0:
2269 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2270 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2271 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
2272 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2273 ; AVX512F-32-NEXT: retl
2274 …%res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i1…
2275 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i…
2276 %res2 = add <32 x i16> %res, %res1
2277 ret <32 x i16> %res2
2280 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
2282 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> …
2291 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2292 ; AVX512F-32: # BB#0:
2293 ; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
2294 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2295 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
2296 ; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0
2297 ; AVX512F-32-NEXT: retl
2298 …%res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i3…
2299 …%res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i…
2304 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
2306 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x…
2317 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2318 ; AVX512F-32: # BB#0:
2319 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2320 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2321 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2322 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2323 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2324 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2325 ; AVX512F-32-NEXT: retl
2326 …%res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32
2327 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <3…
2328 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <3…
2329 %res3 = add <32 x i16> %res, %res1
2330 %res4 = add <32 x i16> %res3, %res2
2331 ret <32 x i16> %res4
2344 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
2345 ; AVX512F-32: # BB#0:
2346 ; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
2347 ; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
2348 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2349 ; AVX512F-32-NEXT: retl
2367 ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
2368 ; AVX512F-32: # BB#0:
2369 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2370 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2371 ; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0
2372 ; AVX512F-32-NEXT: kmovd %k0, %eax
2373 ; AVX512F-32-NEXT: retl
2389 ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
2390 ; AVX512F-32: # BB#0:
2391 ; AVX512F-32-NEXT: subl $12, %esp
2392 ; AVX512F-32-NEXT: .Ltmp4:
2393 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
2394 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2395 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2396 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
2397 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2398 ; AVX512F-32-NEXT: movl (%esp), %eax
2399 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2400 ; AVX512F-32-NEXT: addl $12, %esp
2401 ; AVX512F-32-NEXT: retl
2415 ; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
2416 ; AVX512F-32: # BB#0:
2417 ; AVX512F-32-NEXT: subl $12, %esp
2418 ; AVX512F-32-NEXT: .Ltmp5:
2419 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
2420 ; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
2421 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2422 ; AVX512F-32-NEXT: movl (%esp), %eax
2423 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2424 ; AVX512F-32-NEXT: addl $12, %esp
2425 ; AVX512F-32-NEXT: retl
2430 declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
2432 define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
2439 ; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
2440 ; AVX512F-32: # BB#0:
2441 ; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0
2442 ; AVX512F-32-NEXT: kmovd %k0, %eax
2443 ; AVX512F-32-NEXT: retl
2444 %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
2457 ; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512:
2458 ; AVX512F-32: # BB#0:
2459 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2460 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2461 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
2462 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
2463 ; AVX512F-32-NEXT: retl
2468 declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
2470 define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) {
2477 ; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512:
2478 ; AVX512F-32: # BB#0:
2479 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2480 ; AVX512F-32-NEXT: vpmovm2w %k0, %zmm0
2481 ; AVX512F-32-NEXT: retl
2482 %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
2483 ret <32 x i16> %res
2486 declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
2488 define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2…
2499 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512:
2500 ; AVX512F-32: # BB#0:
2501 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2502 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
2503 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
2504 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
2505 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2506 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
2507 ; AVX512F-32-NEXT: retl
2508 …%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> …
2509 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16>…
2510 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16>…
2511 %res3 = add <32 x i16> %res, %res1
2512 %res4 = add <32 x i16> %res3, %res2
2513 ret <32 x i16> %res4
2516 declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32)
2518 define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32…
2529 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
2530 ; AVX512F-32: # BB#0:
2531 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2532 ; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1}
2533 ; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
2534 ; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm0
2535 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2536 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0
2537 ; AVX512F-32-NEXT: retl
2538 …%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i3…
2539 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i…
2540 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroin…
2541 %res3 = add <32 x i16> %res, %res1
2542 %res4 = add <32 x i16> %res3, %res2
2543 ret <32 x i16> %res4
2546 declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2548 define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2…
2559 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
2560 ; AVX512F-32: # BB#0:
2561 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2562 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
2563 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2564 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
2565 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2566 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2567 ; AVX512F-32-NEXT: retl
2568 …%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> …
2569 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>…
2570 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>…
2571 %res3 = add <32 x i16> %res, %res1
2572 %res4 = add <32 x i16> %res3, %res2
2573 ret <32 x i16> %res4
2576 declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
2578 define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2…
2589 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512:
2590 ; AVX512F-32: # BB#0:
2591 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2592 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
2593 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
2594 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0
2595 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2596 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2597 ; AVX512F-32-NEXT: retl
2598 …%res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> …
2599 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16>…
2600 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16>…
2601 %res3 = add <32 x i16> %res, %res1
2602 %res4 = add <32 x i16> %res3, %res2
2603 ret <32 x i16> %res4
2606 declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32)
2608 define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32…
2619 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512:
2620 ; AVX512F-32: # BB#0:
2621 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2622 ; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
2623 ; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
2624 ; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm0
2625 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2626 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2627 ; AVX512F-32-NEXT: retl
2628 …%res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i3…
2629 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroin…
2630 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i…
2631 %res3 = add <32 x i16> %res, %res1
2632 %res4 = add <32 x i16> %res3, %res2
2633 ret <32 x i16> %res4
2636 declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2638 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x…
2649 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
2650 ; AVX512F-32: # BB#0:
2651 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2652 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
2653 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
2654 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0
2655 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2656 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2657 ; AVX512F-32-NEXT: retl
2658 …%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>…
2659 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
2660 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
2661 %res3 = add <32 x i16> %res, %res1
2662 %res4 = add <32 x i16> %res3, %res2
2663 ret <32 x i16> %res4
2666 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i…
2673 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
2674 ; AVX512F-32: # BB#0:
2675 ; AVX512F-32-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,…
2676 ; AVX512F-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0
2677 ; AVX512F-32-NEXT: retl
2678 …%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23…
2679 …<32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 3…
2680 <32 x i16> zeroinitializer, i32 -1)
2681 ret <32 x i16> %res
2684 declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
2686 define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2…
2697 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512:
2698 ; AVX512F-32: # BB#0:
2699 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2700 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
2701 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
2702 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0
2703 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2704 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2705 ; AVX512F-32-NEXT: retl
2706 …%res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> …
2707 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16>…
2708 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16>…
2709 %res3 = add <32 x i16> %res, %res1
2710 %res4 = add <32 x i16> %res3, %res2
2711 ret <32 x i16> %res4
2714 declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32)
2716 define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32…
2727 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512:
2728 ; AVX512F-32: # BB#0:
2729 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2730 ; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1}
2731 ; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z}
2732 ; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm0
2733 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2734 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2735 ; AVX512F-32-NEXT: retl
2736 …%res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i3…
2737 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroin…
2738 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i…
2739 %res3 = add <32 x i16> %res, %res1
2740 %res4 = add <32 x i16> %res3, %res2
2741 ret <32 x i16> %res4
2744 declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2746 define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2…
2757 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
2758 ; AVX512F-32: # BB#0:
2759 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2760 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
2761 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2762 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
2763 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2764 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2765 ; AVX512F-32-NEXT: retl
2766 …%res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> …
2767 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>…
2768 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>…
2769 %res3 = add <32 x i16> %res, %res1
2770 %res4 = add <32 x i16> %res3, %res2
2771 ret <32 x i16> %res4
2774 declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32)
2776 define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
2787 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
2788 ; AVX512F-32: # BB#0:
2789 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2790 ; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3…
2791 ; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ym…
2792 ; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero…
2793 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2794 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2795 ; AVX512F-32-NEXT: retl
2796 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
2797 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitiali…
2798 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
2799 %res3 = add <32 x i16> %res, %res1
2800 %res4 = add <32 x i16> %res3, %res2
2801 ret <32 x i16> %res4
2804 declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32)
2806 define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
2817 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
2818 ; AVX512F-32: # BB#0:
2819 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2820 ; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
2821 ; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
2822 ; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm0
2823 ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
2824 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2825 ; AVX512F-32-NEXT: retl
2826 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
2827 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitiali…
2828 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
2829 %res3 = add <32 x i16> %res, %res1
2830 %res4 = add <32 x i16> %res3, %res2
2831 ret <32 x i16> %res4
2834 declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2836 define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16…
2847 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2848 ; AVX512F-32: # BB#0:
2849 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2850 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
2851 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
2852 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0
2853 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2854 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2855 ; AVX512F-32-NEXT: retl
2856 …%res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x …
2857 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x…
2858 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x…
2859 %res3 = add <32 x i16> %res, %res1
2860 %res4 = add <32 x i16> %res3, %res2
2861 ret <32 x i16> %res4
2877 ; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512:
2878 ; AVX512F-32: # BB#0:
2879 ; AVX512F-32-NEXT: subl $20, %esp
2880 ; AVX512F-32-NEXT: .Ltmp6:
2881 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
2882 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2883 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2884 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2885 ; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1}
2886 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2887 ; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0
2888 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
2889 ; AVX512F-32-NEXT: movl (%esp), %eax
2890 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2891 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
2892 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
2893 ; AVX512F-32-NEXT: addl $20, %esp
2894 ; AVX512F-32-NEXT: retl
2901 declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32)
2903 define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2914 ; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512:
2915 ; AVX512F-32: # BB#0:
2916 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2917 ; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1}
2918 ; AVX512F-32-NEXT: kmovd %k0, %ecx
2919 ; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0
2920 ; AVX512F-32-NEXT: kmovd %k0, %eax
2921 ; AVX512F-32-NEXT: addl %ecx, %eax
2922 ; AVX512F-32-NEXT: retl
2923 %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2924 %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2942 ; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512:
2943 ; AVX512F-32: # BB#0:
2944 ; AVX512F-32-NEXT: subl $20, %esp
2945 ; AVX512F-32-NEXT: .Ltmp7:
2946 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
2947 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2948 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2949 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2950 ; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1}
2951 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2952 ; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0
2953 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
2954 ; AVX512F-32-NEXT: movl (%esp), %eax
2955 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2956 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
2957 ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
2958 ; AVX512F-32-NEXT: addl $20, %esp
2959 ; AVX512F-32-NEXT: retl
2966 declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2)
2968 define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2979 ; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512:
2980 ; AVX512F-32: # BB#0:
2981 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2982 ; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1}
2983 ; AVX512F-32-NEXT: kmovd %k0, %ecx
2984 ; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0
2985 ; AVX512F-32-NEXT: kmovd %k0, %eax
2986 ; AVX512F-32-NEXT: addl %ecx, %eax
2987 ; AVX512F-32-NEXT: retl
2988 %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2989 %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
3007 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
3008 ; AVX512F-32: # BB#0:
3009 ; AVX512F-32-NEXT: movb {{[0-9]+}}(%esp), %al
3010 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
3011 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
3012 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
3013 ; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z}
3014 ; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1}
3015 ; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2
3016 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
3017 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
3018 ; AVX512F-32-NEXT: retl
3027 declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
3029 define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask)…
3040 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
3041 ; AVX512F-32: # BB#0:
3042 ; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
3043 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
3044 ; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1}
3045 ; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z}
3046 ; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2
3047 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
3048 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
3049 ; AVX512F-32-NEXT: retl
3050 %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
3051 …%res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %m…
3052 …%res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitial…
3053 %res3 = add <32 x i16> %res, %res1
3054 %res4 = add <32 x i16> %res2, %res3
3055 ret <32 x i16> %res4