Lines Matching refs:AVX2

4 … %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
5 …nown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
32 ; AVX2-SLOW-LABEL: trunc_add_v4i64_v4i32:
33 ; AVX2-SLOW: # %bb.0:
34 ; AVX2-SLOW-NEXT: vpaddq %ymm1, %ymm0, %ymm0
35 ; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm1
36 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
37 ; AVX2-SLOW-NEXT: vzeroupper
38 ; AVX2-SLOW-NEXT: retq
40 ; AVX2-FAST-LABEL: trunc_add_v4i64_v4i32:
41 ; AVX2-FAST: # %bb.0:
42 ; AVX2-FAST-NEXT: vpaddq %ymm1, %ymm0, %ymm0
43 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
44 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
45 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
46 ; AVX2-FAST-NEXT: vzeroupper
47 ; AVX2-FAST-NEXT: retq
102 ; AVX2-SLOW-LABEL: trunc_add_v8i64_v8i16:
103 ; AVX2-SLOW: # %bb.0:
104 ; AVX2-SLOW-NEXT: vpaddq %ymm3, %ymm1, %ymm1
105 ; AVX2-SLOW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
106 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
107 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
108 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
109 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
110 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
111 ; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
112 ; AVX2-SLOW-NEXT: vzeroupper
113 ; AVX2-SLOW-NEXT: retq
115 ; AVX2-FAST-LABEL: trunc_add_v8i64_v8i16:
116 ; AVX2-FAST: # %bb.0:
117 ; AVX2-FAST-NEXT: vpaddq %ymm3, %ymm1, %ymm1
118 ; AVX2-FAST-NEXT: vpaddq %ymm2, %ymm0, %ymm0
119 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
120 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
121 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
122 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
123 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
124 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
125 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
126 ; AVX2-FAST-NEXT: vzeroupper
127 ; AVX2-FAST-NEXT: retq
165 ; AVX2-LABEL: trunc_add_v8i32_v8i16:
166 ; AVX2: # %bb.0:
167 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
168 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
169 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
170 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
171 ; AVX2-NEXT: vzeroupper
172 ; AVX2-NEXT: retq
252 ; AVX2-SLOW-LABEL: trunc_add_v16i64_v16i8:
253 ; AVX2-SLOW: # %bb.0:
254 ; AVX2-SLOW-NEXT: vpaddq %ymm5, %ymm1, %ymm1
255 ; AVX2-SLOW-NEXT: vpaddq %ymm4, %ymm0, %ymm0
256 ; AVX2-SLOW-NEXT: vpaddq %ymm7, %ymm3, %ymm3
257 ; AVX2-SLOW-NEXT: vpaddq %ymm6, %ymm2, %ymm2
258 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
259 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
260 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
261 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
262 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
263 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
264 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
265 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
266 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
267 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
268 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
269 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
270 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
271 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
272 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
273 ; AVX2-SLOW-NEXT: vzeroupper
274 ; AVX2-SLOW-NEXT: retq
276 ; AVX2-FAST-LABEL: trunc_add_v16i64_v16i8:
277 ; AVX2-FAST: # %bb.0:
278 ; AVX2-FAST-NEXT: vpaddq %ymm5, %ymm1, %ymm1
279 ; AVX2-FAST-NEXT: vpaddq %ymm4, %ymm0, %ymm0
280 ; AVX2-FAST-NEXT: vpaddq %ymm7, %ymm3, %ymm3
281 ; AVX2-FAST-NEXT: vpaddq %ymm6, %ymm2, %ymm2
282 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
283 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
284 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
285 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
286 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
287 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
288 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
289 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
290 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
291 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
292 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
293 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
294 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
295 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
296 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
297 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
298 ; AVX2-FAST-NEXT: vzeroupper
299 ; AVX2-FAST-NEXT: retq
353 ; AVX2-LABEL: trunc_add_v16i32_v16i8:
354 ; AVX2: # %bb.0:
355 ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
356 ; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
357 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
358 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
359 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
360 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
361 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
362 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
363 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
364 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
365 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
366 ; AVX2-NEXT: vzeroupper
367 ; AVX2-NEXT: retq
404 ; AVX2-LABEL: trunc_add_v16i16_v16i8:
405 ; AVX2: # %bb.0:
406 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
407 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
408 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
409 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
410 ; AVX2-NEXT: vzeroupper
411 ; AVX2-NEXT: retq
466 ; AVX2-LABEL: trunc_add_v8i32_v8i16_sext_8i8:
467 ; AVX2: # %bb.0:
468 ; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
469 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
470 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
471 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
472 ; AVX2-NEXT: vzeroupper
473 ; AVX2-NEXT: retq
509 ; AVX2-SLOW-LABEL: trunc_add_const_v4i64_v4i32:
510 ; AVX2-SLOW: # %bb.0:
511 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
512 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
513 ; AVX2-SLOW-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
514 ; AVX2-SLOW-NEXT: vzeroupper
515 ; AVX2-SLOW-NEXT: retq
517 ; AVX2-FAST-LABEL: trunc_add_const_v4i64_v4i32:
518 ; AVX2-FAST: # %bb.0:
519 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
520 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
521 ; AVX2-FAST-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
522 ; AVX2-FAST-NEXT: vzeroupper
523 ; AVX2-FAST-NEXT: retq
568 ; AVX2-SLOW-LABEL: trunc_add_const_v8i64_v8i16:
569 ; AVX2-SLOW: # %bb.0:
570 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
571 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
572 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
573 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
574 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
575 ; AVX2-SLOW-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
576 ; AVX2-SLOW-NEXT: vzeroupper
577 ; AVX2-SLOW-NEXT: retq
579 ; AVX2-FAST-LABEL: trunc_add_const_v8i64_v8i16:
580 ; AVX2-FAST: # %bb.0:
581 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
582 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
583 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
584 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
585 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
586 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
587 ; AVX2-FAST-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
588 ; AVX2-FAST-NEXT: vzeroupper
589 ; AVX2-FAST-NEXT: retq
624 ; AVX2-LABEL: trunc_add_const_v8i32_v8i16:
625 ; AVX2: # %bb.0:
626 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
627 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
628 ; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
629 ; AVX2-NEXT: vzeroupper
630 ; AVX2-NEXT: retq
688 ; AVX2-SLOW-LABEL: trunc_add_const_v16i64_v16i8:
689 ; AVX2-SLOW: # %bb.0:
690 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
691 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
692 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
693 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
694 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
695 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
696 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
697 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
698 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
699 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
700 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
701 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
702 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
703 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
704 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
705 ; AVX2-SLOW-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
706 ; AVX2-SLOW-NEXT: vzeroupper
707 ; AVX2-SLOW-NEXT: retq
709 ; AVX2-FAST-LABEL: trunc_add_const_v16i64_v16i8:
710 ; AVX2-FAST: # %bb.0:
711 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
712 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
713 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
714 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
715 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
716 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
717 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
718 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
719 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
720 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
721 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
722 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
723 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
724 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
725 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
726 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
727 ; AVX2-FAST-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
728 ; AVX2-FAST-NEXT: vzeroupper
729 ; AVX2-FAST-NEXT: retq
772 ; AVX2-LABEL: trunc_add_const_v16i32_v16i8:
773 ; AVX2: # %bb.0:
774 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
775 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
776 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
777 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
778 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
779 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
780 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
781 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
782 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
783 ; AVX2-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
784 ; AVX2-NEXT: vzeroupper
785 ; AVX2-NEXT: retq
817 ; AVX2-LABEL: trunc_add_const_v16i16_v16i8:
818 ; AVX2: # %bb.0:
819 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
820 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
821 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
822 ; AVX2-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
823 ; AVX2-NEXT: vzeroupper
824 ; AVX2-NEXT: retq
876 ; AVX2-SLOW-LABEL: trunc_sub_v4i64_v4i32:
877 ; AVX2-SLOW: # %bb.0:
878 ; AVX2-SLOW-NEXT: vpsubq %ymm1, %ymm0, %ymm0
879 ; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm1
880 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
881 ; AVX2-SLOW-NEXT: vzeroupper
882 ; AVX2-SLOW-NEXT: retq
884 ; AVX2-FAST-LABEL: trunc_sub_v4i64_v4i32:
885 ; AVX2-FAST: # %bb.0:
886 ; AVX2-FAST-NEXT: vpsubq %ymm1, %ymm0, %ymm0
887 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
888 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
889 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
890 ; AVX2-FAST-NEXT: vzeroupper
891 ; AVX2-FAST-NEXT: retq
946 ; AVX2-SLOW-LABEL: trunc_sub_v8i64_v8i16:
947 ; AVX2-SLOW: # %bb.0:
948 ; AVX2-SLOW-NEXT: vpsubq %ymm3, %ymm1, %ymm1
949 ; AVX2-SLOW-NEXT: vpsubq %ymm2, %ymm0, %ymm0
950 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
951 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
952 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
953 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
954 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
955 ; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
956 ; AVX2-SLOW-NEXT: vzeroupper
957 ; AVX2-SLOW-NEXT: retq
959 ; AVX2-FAST-LABEL: trunc_sub_v8i64_v8i16:
960 ; AVX2-FAST: # %bb.0:
961 ; AVX2-FAST-NEXT: vpsubq %ymm3, %ymm1, %ymm1
962 ; AVX2-FAST-NEXT: vpsubq %ymm2, %ymm0, %ymm0
963 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
964 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
965 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
966 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
967 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
968 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
969 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
970 ; AVX2-FAST-NEXT: vzeroupper
971 ; AVX2-FAST-NEXT: retq
1009 ; AVX2-LABEL: trunc_sub_v8i32_v8i16:
1010 ; AVX2: # %bb.0:
1011 ; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
1012 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
1013 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1014 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1015 ; AVX2-NEXT: vzeroupper
1016 ; AVX2-NEXT: retq
1096 ; AVX2-SLOW-LABEL: trunc_sub_v16i64_v16i8:
1097 ; AVX2-SLOW: # %bb.0:
1098 ; AVX2-SLOW-NEXT: vpsubq %ymm5, %ymm1, %ymm1
1099 ; AVX2-SLOW-NEXT: vpsubq %ymm4, %ymm0, %ymm0
1100 ; AVX2-SLOW-NEXT: vpsubq %ymm7, %ymm3, %ymm3
1101 ; AVX2-SLOW-NEXT: vpsubq %ymm6, %ymm2, %ymm2
1102 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
1103 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
1104 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
1105 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
1106 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
1107 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1108 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1109 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
1110 ; AVX2-SLOW-NEXT: vperm2i128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
1111 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1112 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
1113 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
1114 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1115 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
1116 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1117 ; AVX2-SLOW-NEXT: vzeroupper
1118 ; AVX2-SLOW-NEXT: retq
1120 ; AVX2-FAST-LABEL: trunc_sub_v16i64_v16i8:
1121 ; AVX2-FAST: # %bb.0:
1122 ; AVX2-FAST-NEXT: vpsubq %ymm5, %ymm1, %ymm1
1123 ; AVX2-FAST-NEXT: vpsubq %ymm4, %ymm0, %ymm0
1124 ; AVX2-FAST-NEXT: vpsubq %ymm7, %ymm3, %ymm3
1125 ; AVX2-FAST-NEXT: vpsubq %ymm6, %ymm2, %ymm2
1126 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
1127 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
1128 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
1129 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
1130 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
1131 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
1132 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1133 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
1134 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
1135 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
1136 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
1137 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1138 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
1139 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1140 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
1141 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1142 ; AVX2-FAST-NEXT: vzeroupper
1143 ; AVX2-FAST-NEXT: retq
1197 ; AVX2-LABEL: trunc_sub_v16i32_v16i8:
1198 ; AVX2: # %bb.0:
1199 ; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0
1200 ; AVX2-NEXT: vpsubd %ymm3, %ymm1, %ymm1
1201 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
1202 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
1203 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
1204 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
1205 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
1206 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
1207 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1208 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
1209 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1210 ; AVX2-NEXT: vzeroupper
1211 ; AVX2-NEXT: retq
1248 ; AVX2-LABEL: trunc_sub_v16i16_v16i8:
1249 ; AVX2: # %bb.0:
1250 ; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0
1251 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1252 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1253 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1254 ; AVX2-NEXT: vzeroupper
1255 ; AVX2-NEXT: retq
1321 ; AVX2-SLOW-LABEL: trunc_sub_const_v4i64_v4i32:
1322 ; AVX2-SLOW: # %bb.0:
1323 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
1324 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1325 ; AVX2-SLOW-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
1326 ; AVX2-SLOW-NEXT: vzeroupper
1327 ; AVX2-SLOW-NEXT: retq
1329 ; AVX2-FAST-LABEL: trunc_sub_const_v4i64_v4i32:
1330 ; AVX2-FAST: # %bb.0:
1331 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
1332 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
1333 ; AVX2-FAST-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
1334 ; AVX2-FAST-NEXT: vzeroupper
1335 ; AVX2-FAST-NEXT: retq
1380 ; AVX2-SLOW-LABEL: trunc_sub_const_v8i64_v8i16:
1381 ; AVX2-SLOW: # %bb.0:
1382 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1383 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1384 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1385 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
1386 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1387 ; AVX2-SLOW-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
1388 ; AVX2-SLOW-NEXT: vzeroupper
1389 ; AVX2-SLOW-NEXT: retq
1391 ; AVX2-FAST-LABEL: trunc_sub_const_v8i64_v8i16:
1392 ; AVX2-FAST: # %bb.0:
1393 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1394 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
1395 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
1396 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1397 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
1398 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1399 ; AVX2-FAST-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
1400 ; AVX2-FAST-NEXT: vzeroupper
1401 ; AVX2-FAST-NEXT: retq
1436 ; AVX2-LABEL: trunc_sub_const_v8i32_v8i16:
1437 ; AVX2: # %bb.0:
1438 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
1439 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1440 ; AVX2-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
1441 ; AVX2-NEXT: vzeroupper
1442 ; AVX2-NEXT: retq
1500 ; AVX2-SLOW-LABEL: trunc_sub_const_v16i64_v16i8:
1501 ; AVX2-SLOW: # %bb.0:
1502 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
1503 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1504 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
1505 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
1506 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
1507 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1508 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1509 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
1510 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
1511 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1512 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
1513 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
1514 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1515 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
1516 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1517 ; AVX2-SLOW-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1518 ; AVX2-SLOW-NEXT: vzeroupper
1519 ; AVX2-SLOW-NEXT: retq
1521 ; AVX2-FAST-LABEL: trunc_sub_const_v16i64_v16i8:
1522 ; AVX2-FAST: # %bb.0:
1523 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
1524 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
1525 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
1526 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
1527 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
1528 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
1529 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1530 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
1531 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
1532 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
1533 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
1534 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1535 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
1536 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1537 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
1538 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
1539 ; AVX2-FAST-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1540 ; AVX2-FAST-NEXT: vzeroupper
1541 ; AVX2-FAST-NEXT: retq
1584 ; AVX2-LABEL: trunc_sub_const_v16i32_v16i8:
1585 ; AVX2: # %bb.0:
1586 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
1587 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
1588 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
1589 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
1590 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
1591 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
1592 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1593 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
1594 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1595 ; AVX2-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1596 ; AVX2-NEXT: vzeroupper
1597 ; AVX2-NEXT: retq
1629 ; AVX2-LABEL: trunc_sub_const_v16i16_v16i8:
1630 ; AVX2: # %bb.0:
1631 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1632 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1633 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1634 ; AVX2-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
1635 ; AVX2-NEXT: vzeroupper
1636 ; AVX2-NEXT: retq
1723 ; AVX2-SLOW-LABEL: trunc_mul_v4i64_v4i32:
1724 ; AVX2-SLOW: # %bb.0:
1725 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
1726 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1727 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2
1728 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1729 ; AVX2-SLOW-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1730 ; AVX2-SLOW-NEXT: vzeroupper
1731 ; AVX2-SLOW-NEXT: retq
1733 ; AVX2-FAST-LABEL: trunc_mul_v4i64_v4i32:
1734 ; AVX2-FAST: # %bb.0:
1735 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1736 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
1737 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
1738 ; AVX2-FAST-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1739 ; AVX2-FAST-NEXT: vzeroupper
1740 ; AVX2-FAST-NEXT: retq
1825 ; AVX2-SLOW-LABEL: trunc_mul_v8i64_v8i16:
1826 ; AVX2-SLOW: # %bb.0:
1827 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
1828 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1829 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
1830 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
1831 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
1832 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1833 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3],ymm1[2,3]
1834 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1835 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm4[0,2],ymm0[4,6],ymm4[4,6]
1836 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
1837 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1838 ; AVX2-SLOW-NEXT: vpmullw %xmm2, %xmm0, %xmm0
1839 ; AVX2-SLOW-NEXT: vzeroupper
1840 ; AVX2-SLOW-NEXT: retq
1842 ; AVX2-FAST-LABEL: trunc_mul_v8i64_v8i16:
1843 ; AVX2-FAST: # %bb.0:
1844 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
1845 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
1846 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
1847 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
1848 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
1849 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
1850 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
1851 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
1852 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
1853 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1854 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
1855 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1856 ; AVX2-FAST-NEXT: vpmullw %xmm2, %xmm0, %xmm0
1857 ; AVX2-FAST-NEXT: vzeroupper
1858 ; AVX2-FAST-NEXT: retq
1924 ; AVX2-LABEL: trunc_mul_v8i32_v8i16:
1925 ; AVX2: # %bb.0:
1926 ; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
1927 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
1928 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
1929 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1930 ; AVX2-NEXT: vzeroupper
1931 ; AVX2-NEXT: retq
2011 ; AVX2-SLOW-LABEL: trunc_mul_v16i64_v16i8:
2012 ; AVX2-SLOW: # %bb.0:
2013 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm7, %xmm8
2014 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm8 = xmm7[0,2],xmm8[0,2]
2015 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm3, %xmm7
2016 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm3 = xmm3[0,2],xmm7[0,2]
2017 ; AVX2-SLOW-NEXT: vpmulld %xmm8, %xmm3, %xmm3
2018 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm6, %xmm7
2019 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
2020 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm2, %xmm7
2021 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm7[0,2]
2022 ; AVX2-SLOW-NEXT: vpmulld %xmm6, %xmm2, %xmm2
2023 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2024 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
2025 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2026 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2027 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
2028 ; AVX2-SLOW-NEXT: vpand %xmm6, %xmm2, %xmm2
2029 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm5, %xmm7
2030 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm5 = xmm5[0,2],xmm7[0,2]
2031 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm7
2032 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm7[0,2]
2033 ; AVX2-SLOW-NEXT: vpmulld %xmm5, %xmm1, %xmm1
2034 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm4, %xmm5
2035 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm4 = xmm4[0,2],xmm5[0,2]
2036 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm5
2037 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
2038 ; AVX2-SLOW-NEXT: vpmulld %xmm4, %xmm0, %xmm0
2039 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2040 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
2041 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2042 ; AVX2-SLOW-NEXT: vpand %xmm6, %xmm0, %xmm0
2043 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2044 ; AVX2-SLOW-NEXT: vzeroupper
2045 ; AVX2-SLOW-NEXT: retq
2047 ; AVX2-FAST-LABEL: trunc_mul_v16i64_v16i8:
2048 ; AVX2-FAST: # %bb.0:
2049 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm8 = [0,2,4,6,4,6,6,7]
2050 ; AVX2-FAST-NEXT: vpermd %ymm7, %ymm8, %ymm7
2051 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm8, %ymm3
2052 ; AVX2-FAST-NEXT: vpmulld %xmm7, %xmm3, %xmm3
2053 ; AVX2-FAST-NEXT: vpermd %ymm6, %ymm8, %ymm6
2054 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm8, %ymm2
2055 ; AVX2-FAST-NEXT: vpmulld %xmm6, %xmm2, %xmm2
2056 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2057 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
2058 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2059 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2060 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
2061 ; AVX2-FAST-NEXT: vpand %xmm6, %xmm2, %xmm2
2062 ; AVX2-FAST-NEXT: vpermd %ymm5, %ymm8, %ymm5
2063 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm8, %ymm1
2064 ; AVX2-FAST-NEXT: vpmulld %xmm5, %xmm1, %xmm1
2065 ; AVX2-FAST-NEXT: vpermd %ymm4, %ymm8, %ymm4
2066 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm8, %ymm0
2067 ; AVX2-FAST-NEXT: vpmulld %xmm4, %xmm0, %xmm0
2068 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2069 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
2070 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2071 ; AVX2-FAST-NEXT: vpand %xmm6, %xmm0, %xmm0
2072 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2073 ; AVX2-FAST-NEXT: vzeroupper
2074 ; AVX2-FAST-NEXT: retq
2172 ; AVX2-LABEL: trunc_mul_v16i32_v16i8:
2173 ; AVX2: # %bb.0:
2174 ; AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm0
2175 ; AVX2-NEXT: vpmulld %ymm3, %ymm1, %ymm1
2176 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
2177 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
2178 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
2179 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
2180 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
2181 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
2182 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2183 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
2184 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2185 ; AVX2-NEXT: vzeroupper
2186 ; AVX2-NEXT: retq
2223 ; AVX2-LABEL: trunc_mul_v16i16_v16i8:
2224 ; AVX2: # %bb.0:
2225 ; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
2226 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
2227 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2228 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2229 ; AVX2-NEXT: vzeroupper
2230 ; AVX2-NEXT: retq
2285 ; AVX2-LABEL: trunc_mul_v8i32_v8i16_zext_8i8:
2286 ; AVX2: # %bb.0:
2287 ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[…
2288 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
2289 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
2290 ; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
2291 ; AVX2-NEXT: vzeroupper
2292 ; AVX2-NEXT: retq
2331 ; AVX2-SLOW-LABEL: trunc_mul_const_v4i64_v4i32:
2332 ; AVX2-SLOW: # %bb.0:
2333 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
2334 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
2335 ; AVX2-SLOW-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2336 ; AVX2-SLOW-NEXT: vzeroupper
2337 ; AVX2-SLOW-NEXT: retq
2339 ; AVX2-FAST-LABEL: trunc_mul_const_v4i64_v4i32:
2340 ; AVX2-FAST: # %bb.0:
2341 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
2342 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
2343 ; AVX2-FAST-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2344 ; AVX2-FAST-NEXT: vzeroupper
2345 ; AVX2-FAST-NEXT: retq
2390 ; AVX2-SLOW-LABEL: trunc_mul_const_v8i64_v8i16:
2391 ; AVX2-SLOW: # %bb.0:
2392 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
2393 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2394 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
2395 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
2396 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2397 ; AVX2-SLOW-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
2398 ; AVX2-SLOW-NEXT: vzeroupper
2399 ; AVX2-SLOW-NEXT: retq
2401 ; AVX2-FAST-LABEL: trunc_mul_const_v8i64_v8i16:
2402 ; AVX2-FAST: # %bb.0:
2403 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
2404 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
2405 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
2406 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2407 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
2408 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2409 ; AVX2-FAST-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
2410 ; AVX2-FAST-NEXT: vzeroupper
2411 ; AVX2-FAST-NEXT: retq
2446 ; AVX2-LABEL: trunc_mul_const_v8i32_v8i16:
2447 ; AVX2: # %bb.0:
2448 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
2449 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2450 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
2451 ; AVX2-NEXT: vzeroupper
2452 ; AVX2-NEXT: retq
2527 ; AVX2-SLOW-LABEL: trunc_mul_const_v16i64_v16i8:
2528 ; AVX2-SLOW: # %bb.0:
2529 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm2, %xmm4
2530 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm4[0,2]
2531 ; AVX2-SLOW-NEXT: vpmulld {{.*}}(%rip), %xmm2, %xmm2
2532 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm3, %xmm4
2533 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm3 = xmm3[0,2],xmm4[0,2]
2534 ; AVX2-SLOW-NEXT: vpmulld {{.*}}(%rip), %xmm3, %xmm3
2535 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2536 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
2537 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2538 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2539 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
2540 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
2541 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm5
2542 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
2543 ; AVX2-SLOW-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2544 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm5
2545 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm5[0,2]
2546 ; AVX2-SLOW-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2547 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2548 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
2549 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2550 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
2551 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2552 ; AVX2-SLOW-NEXT: vzeroupper
2553 ; AVX2-SLOW-NEXT: retq
2555 ; AVX2-FAST-LABEL: trunc_mul_const_v16i64_v16i8:
2556 ; AVX2-FAST: # %bb.0:
2557 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
2558 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
2559 ; AVX2-FAST-NEXT: vpmulld {{.*}}(%rip), %xmm2, %xmm2
2560 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
2561 ; AVX2-FAST-NEXT: vpmulld {{.*}}(%rip), %xmm3, %xmm3
2562 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2563 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
2564 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2565 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2566 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
2567 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
2568 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
2569 ; AVX2-FAST-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2570 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
2571 ; AVX2-FAST-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2572 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2573 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
2574 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2575 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
2576 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
2577 ; AVX2-FAST-NEXT: vzeroupper
2578 ; AVX2-FAST-NEXT: retq
2678 ; AVX2-LABEL: trunc_mul_const_v16i32_v16i8:
2679 ; AVX2: # %bb.0:
2680 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
2681 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
2682 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
2683 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
2684 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
2685 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
2686 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
2687 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2688 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
2689 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
2690 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2691 ; AVX2-NEXT: vzeroupper
2692 ; AVX2-NEXT: retq
2728 ; AVX2-LABEL: trunc_mul_const_v16i16_v16i8:
2729 ; AVX2: # %bb.0:
2730 ; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
2731 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
2732 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2733 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2734 ; AVX2-NEXT: vzeroupper
2735 ; AVX2-NEXT: retq
2785 ; AVX2-SLOW-LABEL: trunc_and_v4i64_v4i32:
2786 ; AVX2-SLOW: # %bb.0:
2787 ; AVX2-SLOW-NEXT: vandps %ymm1, %ymm0, %ymm0
2788 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
2789 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
2790 ; AVX2-SLOW-NEXT: vzeroupper
2791 ; AVX2-SLOW-NEXT: retq
2793 ; AVX2-FAST-LABEL: trunc_and_v4i64_v4i32:
2794 ; AVX2-FAST: # %bb.0:
2795 ; AVX2-FAST-NEXT: vandps %ymm1, %ymm0, %ymm0
2796 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
2797 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
2798 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2799 ; AVX2-FAST-NEXT: vzeroupper
2800 ; AVX2-FAST-NEXT: retq
2849 ; AVX2-SLOW-LABEL: trunc_and_v8i64_v8i16:
2850 ; AVX2-SLOW: # %bb.0:
2851 ; AVX2-SLOW-NEXT: vandps %ymm3, %ymm1, %ymm1
2852 ; AVX2-SLOW-NEXT: vandps %ymm2, %ymm0, %ymm0
2853 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
2854 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2855 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
2856 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
2857 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2858 ; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2859 ; AVX2-SLOW-NEXT: vzeroupper
2860 ; AVX2-SLOW-NEXT: retq
2862 ; AVX2-FAST-LABEL: trunc_and_v8i64_v8i16:
2863 ; AVX2-FAST: # %bb.0:
2864 ; AVX2-FAST-NEXT: vpand %ymm3, %ymm1, %ymm1
2865 ; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0
2866 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
2867 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
2868 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
2869 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2870 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
2871 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2872 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2873 ; AVX2-FAST-NEXT: vzeroupper
2874 ; AVX2-FAST-NEXT: retq
2910 ; AVX2-LABEL: trunc_and_v8i32_v8i16:
2911 ; AVX2: # %bb.0:
2912 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
2913 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
2914 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2915 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2916 ; AVX2-NEXT: vzeroupper
2917 ; AVX2-NEXT: retq
2985 ; AVX2-SLOW-LABEL: trunc_and_v16i64_v16i8:
2986 ; AVX2-SLOW: # %bb.0:
2987 ; AVX2-SLOW-NEXT: vandps %ymm5, %ymm1, %ymm1
2988 ; AVX2-SLOW-NEXT: vandps %ymm4, %ymm0, %ymm0
2989 ; AVX2-SLOW-NEXT: vandps %ymm7, %ymm3, %ymm3
2990 ; AVX2-SLOW-NEXT: vandps %ymm6, %ymm2, %ymm2
2991 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
2992 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
2993 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
2994 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
2995 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
2996 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
2997 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
2998 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
2999 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
3000 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3001 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
3002 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
3003 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3004 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
3005 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3006 ; AVX2-SLOW-NEXT: vzeroupper
3007 ; AVX2-SLOW-NEXT: retq
3009 ; AVX2-FAST-LABEL: trunc_and_v16i64_v16i8:
3010 ; AVX2-FAST: # %bb.0:
3011 ; AVX2-FAST-NEXT: vpand %ymm5, %ymm1, %ymm1
3012 ; AVX2-FAST-NEXT: vpand %ymm4, %ymm0, %ymm0
3013 ; AVX2-FAST-NEXT: vpand %ymm7, %ymm3, %ymm3
3014 ; AVX2-FAST-NEXT: vpand %ymm6, %ymm2, %ymm2
3015 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
3016 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
3017 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
3018 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
3019 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
3020 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
3021 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3022 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
3023 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
3024 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
3025 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
3026 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3027 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
3028 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3029 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
3030 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3031 ; AVX2-FAST-NEXT: vzeroupper
3032 ; AVX2-FAST-NEXT: retq
3080 ; AVX2-LABEL: trunc_and_v16i32_v16i8:
3081 ; AVX2: # %bb.0:
3082 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
3083 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
3084 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
3085 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
3086 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
3087 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
3088 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
3089 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
3090 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3091 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
3092 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3093 ; AVX2-NEXT: vzeroupper
3094 ; AVX2-NEXT: retq
3127 ; AVX2-LABEL: trunc_and_v16i16_v16i8:
3128 ; AVX2: # %bb.0:
3129 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
3130 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3131 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3132 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3133 ; AVX2-NEXT: vzeroupper
3134 ; AVX2-NEXT: retq
3183 ; AVX2-SLOW-LABEL: trunc_and_const_v4i64_v4i32:
3184 ; AVX2-SLOW: # %bb.0:
3185 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
3186 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
3187 ; AVX2-SLOW-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
3188 ; AVX2-SLOW-NEXT: vzeroupper
3189 ; AVX2-SLOW-NEXT: retq
3191 ; AVX2-FAST-LABEL: trunc_and_const_v4i64_v4i32:
3192 ; AVX2-FAST: # %bb.0:
3193 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
3194 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
3195 ; AVX2-FAST-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
3196 ; AVX2-FAST-NEXT: vzeroupper
3197 ; AVX2-FAST-NEXT: retq
3242 ; AVX2-SLOW-LABEL: trunc_and_const_v8i64_v8i16:
3243 ; AVX2-SLOW: # %bb.0:
3244 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
3245 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3246 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
3247 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
3248 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3249 ; AVX2-SLOW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3250 ; AVX2-SLOW-NEXT: vzeroupper
3251 ; AVX2-SLOW-NEXT: retq
3253 ; AVX2-FAST-LABEL: trunc_and_const_v8i64_v8i16:
3254 ; AVX2-FAST: # %bb.0:
3255 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
3256 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
3257 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
3258 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3259 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
3260 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3261 ; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3262 ; AVX2-FAST-NEXT: vzeroupper
3263 ; AVX2-FAST-NEXT: retq
3298 ; AVX2-LABEL: trunc_and_const_v8i32_v8i16:
3299 ; AVX2: # %bb.0:
3300 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
3301 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3302 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3303 ; AVX2-NEXT: vzeroupper
3304 ; AVX2-NEXT: retq
3362 ; AVX2-SLOW-LABEL: trunc_and_const_v16i64_v16i8:
3363 ; AVX2-SLOW: # %bb.0:
3364 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
3365 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
3366 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
3367 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
3368 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
3369 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3370 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
3371 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
3372 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
3373 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3374 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
3375 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
3376 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3377 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
3378 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3379 ; AVX2-SLOW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3380 ; AVX2-SLOW-NEXT: vzeroupper
3381 ; AVX2-SLOW-NEXT: retq
3383 ; AVX2-FAST-LABEL: trunc_and_const_v16i64_v16i8:
3384 ; AVX2-FAST: # %bb.0:
3385 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
3386 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
3387 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
3388 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
3389 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
3390 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
3391 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3392 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
3393 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
3394 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
3395 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
3396 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3397 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
3398 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3399 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
3400 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3401 ; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3402 ; AVX2-FAST-NEXT: vzeroupper
3403 ; AVX2-FAST-NEXT: retq
3446 ; AVX2-LABEL: trunc_and_const_v16i32_v16i8:
3447 ; AVX2: # %bb.0:
3448 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
3449 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
3450 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
3451 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
3452 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
3453 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
3454 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3455 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
3456 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3457 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3458 ; AVX2-NEXT: vzeroupper
3459 ; AVX2-NEXT: retq
3491 ; AVX2-LABEL: trunc_and_const_v16i16_v16i8:
3492 ; AVX2: # %bb.0:
3493 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3494 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3495 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3496 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
3497 ; AVX2-NEXT: vzeroupper
3498 ; AVX2-NEXT: retq
3548 ; AVX2-SLOW-LABEL: trunc_xor_v4i64_v4i32:
3549 ; AVX2-SLOW: # %bb.0:
3550 ; AVX2-SLOW-NEXT: vxorps %ymm1, %ymm0, %ymm0
3551 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
3552 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
3553 ; AVX2-SLOW-NEXT: vzeroupper
3554 ; AVX2-SLOW-NEXT: retq
3556 ; AVX2-FAST-LABEL: trunc_xor_v4i64_v4i32:
3557 ; AVX2-FAST: # %bb.0:
3558 ; AVX2-FAST-NEXT: vxorps %ymm1, %ymm0, %ymm0
3559 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
3560 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
3561 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3562 ; AVX2-FAST-NEXT: vzeroupper
3563 ; AVX2-FAST-NEXT: retq
3612 ; AVX2-SLOW-LABEL: trunc_xor_v8i64_v8i16:
3613 ; AVX2-SLOW: # %bb.0:
3614 ; AVX2-SLOW-NEXT: vxorps %ymm3, %ymm1, %ymm1
3615 ; AVX2-SLOW-NEXT: vxorps %ymm2, %ymm0, %ymm0
3616 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
3617 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3618 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
3619 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
3620 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3621 ; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3622 ; AVX2-SLOW-NEXT: vzeroupper
3623 ; AVX2-SLOW-NEXT: retq
3625 ; AVX2-FAST-LABEL: trunc_xor_v8i64_v8i16:
3626 ; AVX2-FAST: # %bb.0:
3627 ; AVX2-FAST-NEXT: vpxor %ymm3, %ymm1, %ymm1
3628 ; AVX2-FAST-NEXT: vpxor %ymm2, %ymm0, %ymm0
3629 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
3630 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
3631 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
3632 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3633 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
3634 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3635 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3636 ; AVX2-FAST-NEXT: vzeroupper
3637 ; AVX2-FAST-NEXT: retq
3673 ; AVX2-LABEL: trunc_xor_v8i32_v8i16:
3674 ; AVX2: # %bb.0:
3675 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
3676 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
3677 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3678 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
3679 ; AVX2-NEXT: vzeroupper
3680 ; AVX2-NEXT: retq
3748 ; AVX2-SLOW-LABEL: trunc_xor_v16i64_v16i8:
3749 ; AVX2-SLOW: # %bb.0:
3750 ; AVX2-SLOW-NEXT: vxorps %ymm5, %ymm1, %ymm1
3751 ; AVX2-SLOW-NEXT: vxorps %ymm4, %ymm0, %ymm0
3752 ; AVX2-SLOW-NEXT: vxorps %ymm7, %ymm3, %ymm3
3753 ; AVX2-SLOW-NEXT: vxorps %ymm6, %ymm2, %ymm2
3754 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
3755 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
3756 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
3757 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
3758 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
3759 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3760 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
3761 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
3762 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
3763 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3764 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
3765 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
3766 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3767 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
3768 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3769 ; AVX2-SLOW-NEXT: vzeroupper
3770 ; AVX2-SLOW-NEXT: retq
3772 ; AVX2-FAST-LABEL: trunc_xor_v16i64_v16i8:
3773 ; AVX2-FAST: # %bb.0:
3774 ; AVX2-FAST-NEXT: vpxor %ymm5, %ymm1, %ymm1
3775 ; AVX2-FAST-NEXT: vpxor %ymm4, %ymm0, %ymm0
3776 ; AVX2-FAST-NEXT: vpxor %ymm7, %ymm3, %ymm3
3777 ; AVX2-FAST-NEXT: vpxor %ymm6, %ymm2, %ymm2
3778 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
3779 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
3780 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
3781 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
3782 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
3783 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
3784 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
3785 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
3786 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
3787 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
3788 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
3789 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3790 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
3791 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3792 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
3793 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
3794 ; AVX2-FAST-NEXT: vzeroupper
3795 ; AVX2-FAST-NEXT: retq
3843 ; AVX2-LABEL: trunc_xor_v16i32_v16i8:
3844 ; AVX2: # %bb.0:
3845 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
3846 ; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
3847 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
3848 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
3849 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
3850 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
3851 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
3852 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
3853 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
3854 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
3855 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3856 ; AVX2-NEXT: vzeroupper
3857 ; AVX2-NEXT: retq
3890 ; AVX2-LABEL: trunc_xor_v16i16_v16i8:
3891 ; AVX2: # %bb.0:
3892 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
3893 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
3894 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3895 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
3896 ; AVX2-NEXT: vzeroupper
3897 ; AVX2-NEXT: retq
3946 ; AVX2-SLOW-LABEL: trunc_xor_const_v4i64_v4i32:
3947 ; AVX2-SLOW: # %bb.0:
3948 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
3949 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
3950 ; AVX2-SLOW-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
3951 ; AVX2-SLOW-NEXT: vzeroupper
3952 ; AVX2-SLOW-NEXT: retq
3954 ; AVX2-FAST-LABEL: trunc_xor_const_v4i64_v4i32:
3955 ; AVX2-FAST: # %bb.0:
3956 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
3957 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
3958 ; AVX2-FAST-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
3959 ; AVX2-FAST-NEXT: vzeroupper
3960 ; AVX2-FAST-NEXT: retq
4005 ; AVX2-SLOW-LABEL: trunc_xor_const_v8i64_v8i16:
4006 ; AVX2-SLOW: # %bb.0:
4007 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
4008 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4009 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
4010 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
4011 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4012 ; AVX2-SLOW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
4013 ; AVX2-SLOW-NEXT: vzeroupper
4014 ; AVX2-SLOW-NEXT: retq
4016 ; AVX2-FAST-LABEL: trunc_xor_const_v8i64_v8i16:
4017 ; AVX2-FAST: # %bb.0:
4018 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
4019 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
4020 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
4021 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4022 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
4023 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4024 ; AVX2-FAST-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
4025 ; AVX2-FAST-NEXT: vzeroupper
4026 ; AVX2-FAST-NEXT: retq
4061 ; AVX2-LABEL: trunc_xor_const_v8i32_v8i16:
4062 ; AVX2: # %bb.0:
4063 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
4064 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4065 ; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
4066 ; AVX2-NEXT: vzeroupper
4067 ; AVX2-NEXT: retq
4125 ; AVX2-SLOW-LABEL: trunc_xor_const_v16i64_v16i8:
4126 ; AVX2-SLOW: # %bb.0:
4127 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
4128 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
4129 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
4130 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
4131 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4132 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4133 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
4134 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
4135 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
4136 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4137 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
4138 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4139 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4140 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
4141 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4142 ; AVX2-SLOW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
4143 ; AVX2-SLOW-NEXT: vzeroupper
4144 ; AVX2-SLOW-NEXT: retq
4146 ; AVX2-FAST-LABEL: trunc_xor_const_v16i64_v16i8:
4147 ; AVX2-FAST: # %bb.0:
4148 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
4149 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
4150 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
4151 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
4152 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
4153 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4154 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4155 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
4156 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
4157 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
4158 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
4159 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4160 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4161 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4162 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
4163 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4164 ; AVX2-FAST-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
4165 ; AVX2-FAST-NEXT: vzeroupper
4166 ; AVX2-FAST-NEXT: retq
4209 ; AVX2-LABEL: trunc_xor_const_v16i32_v16i8:
4210 ; AVX2: # %bb.0:
4211 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
4212 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
4213 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
4214 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
4215 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
4216 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
4217 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4218 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
4219 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4220 ; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
4221 ; AVX2-NEXT: vzeroupper
4222 ; AVX2-NEXT: retq
4254 ; AVX2-LABEL: trunc_xor_const_v16i16_v16i8:
4255 ; AVX2: # %bb.0:
4256 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
4257 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
4258 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4259 ; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
4260 ; AVX2-NEXT: vzeroupper
4261 ; AVX2-NEXT: retq
4311 ; AVX2-SLOW-LABEL: trunc_or_v4i64_v4i32:
4312 ; AVX2-SLOW: # %bb.0:
4313 ; AVX2-SLOW-NEXT: vorps %ymm1, %ymm0, %ymm0
4314 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
4315 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
4316 ; AVX2-SLOW-NEXT: vzeroupper
4317 ; AVX2-SLOW-NEXT: retq
4319 ; AVX2-FAST-LABEL: trunc_or_v4i64_v4i32:
4320 ; AVX2-FAST: # %bb.0:
4321 ; AVX2-FAST-NEXT: vorps %ymm1, %ymm0, %ymm0
4322 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
4323 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
4324 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
4325 ; AVX2-FAST-NEXT: vzeroupper
4326 ; AVX2-FAST-NEXT: retq
4375 ; AVX2-SLOW-LABEL: trunc_or_v8i64_v8i16:
4376 ; AVX2-SLOW: # %bb.0:
4377 ; AVX2-SLOW-NEXT: vorps %ymm3, %ymm1, %ymm1
4378 ; AVX2-SLOW-NEXT: vorps %ymm2, %ymm0, %ymm0
4379 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
4380 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4381 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
4382 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
4383 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4384 ; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
4385 ; AVX2-SLOW-NEXT: vzeroupper
4386 ; AVX2-SLOW-NEXT: retq
4388 ; AVX2-FAST-LABEL: trunc_or_v8i64_v8i16:
4389 ; AVX2-FAST: # %bb.0:
4390 ; AVX2-FAST-NEXT: vpor %ymm3, %ymm1, %ymm1
4391 ; AVX2-FAST-NEXT: vpor %ymm2, %ymm0, %ymm0
4392 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
4393 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
4394 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
4395 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4396 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
4397 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4398 ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
4399 ; AVX2-FAST-NEXT: vzeroupper
4400 ; AVX2-FAST-NEXT: retq
4436 ; AVX2-LABEL: trunc_or_v8i32_v8i16:
4437 ; AVX2: # %bb.0:
4438 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
4439 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
4440 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4441 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
4442 ; AVX2-NEXT: vzeroupper
4443 ; AVX2-NEXT: retq
4511 ; AVX2-SLOW-LABEL: trunc_or_v16i64_v16i8:
4512 ; AVX2-SLOW: # %bb.0:
4513 ; AVX2-SLOW-NEXT: vorps %ymm5, %ymm1, %ymm1
4514 ; AVX2-SLOW-NEXT: vorps %ymm4, %ymm0, %ymm0
4515 ; AVX2-SLOW-NEXT: vorps %ymm7, %ymm3, %ymm3
4516 ; AVX2-SLOW-NEXT: vorps %ymm6, %ymm2, %ymm2
4517 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
4518 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
4519 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
4520 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
4521 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4522 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4523 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
4524 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
4525 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
4526 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4527 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
4528 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4529 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4530 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
4531 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4532 ; AVX2-SLOW-NEXT: vzeroupper
4533 ; AVX2-SLOW-NEXT: retq
4535 ; AVX2-FAST-LABEL: trunc_or_v16i64_v16i8:
4536 ; AVX2-FAST: # %bb.0:
4537 ; AVX2-FAST-NEXT: vpor %ymm5, %ymm1, %ymm1
4538 ; AVX2-FAST-NEXT: vpor %ymm4, %ymm0, %ymm0
4539 ; AVX2-FAST-NEXT: vpor %ymm7, %ymm3, %ymm3
4540 ; AVX2-FAST-NEXT: vpor %ymm6, %ymm2, %ymm2
4541 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
4542 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
4543 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
4544 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
4545 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
4546 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4547 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4548 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
4549 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
4550 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
4551 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
4552 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4553 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4554 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4555 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
4556 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4557 ; AVX2-FAST-NEXT: vzeroupper
4558 ; AVX2-FAST-NEXT: retq
4606 ; AVX2-LABEL: trunc_or_v16i32_v16i8:
4607 ; AVX2: # %bb.0:
4608 ; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
4609 ; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
4610 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
4611 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
4612 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
4613 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
4614 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
4615 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
4616 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4617 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
4618 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4619 ; AVX2-NEXT: vzeroupper
4620 ; AVX2-NEXT: retq
4653 ; AVX2-LABEL: trunc_or_v16i16_v16i8:
4654 ; AVX2: # %bb.0:
4655 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
4656 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
4657 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
4658 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4659 ; AVX2-NEXT: vzeroupper
4660 ; AVX2-NEXT: retq
4709 ; AVX2-SLOW-LABEL: trunc_or_const_v4i64_v4i32:
4710 ; AVX2-SLOW: # %bb.0:
4711 ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
4712 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
4713 ; AVX2-SLOW-NEXT: vorps {{.*}}(%rip), %xmm0, %xmm0
4714 ; AVX2-SLOW-NEXT: vzeroupper
4715 ; AVX2-SLOW-NEXT: retq
4717 ; AVX2-FAST-LABEL: trunc_or_const_v4i64_v4i32:
4718 ; AVX2-FAST: # %bb.0:
4719 ; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
4720 ; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
4721 ; AVX2-FAST-NEXT: vorps {{.*}}(%rip), %xmm0, %xmm0
4722 ; AVX2-FAST-NEXT: vzeroupper
4723 ; AVX2-FAST-NEXT: retq
4768 ; AVX2-SLOW-LABEL: trunc_or_const_v8i64_v8i16:
4769 ; AVX2-SLOW: # %bb.0:
4770 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
4771 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4772 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
4773 ; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
4774 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4775 ; AVX2-SLOW-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
4776 ; AVX2-SLOW-NEXT: vzeroupper
4777 ; AVX2-SLOW-NEXT: retq
4779 ; AVX2-FAST-LABEL: trunc_or_const_v8i64_v8i16:
4780 ; AVX2-FAST: # %bb.0:
4781 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
4782 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
4783 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
4784 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4785 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,…
4786 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4787 ; AVX2-FAST-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
4788 ; AVX2-FAST-NEXT: vzeroupper
4789 ; AVX2-FAST-NEXT: retq
4824 ; AVX2-LABEL: trunc_or_const_v8i32_v8i16:
4825 ; AVX2: # %bb.0:
4826 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28…
4827 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4828 ; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
4829 ; AVX2-NEXT: vzeroupper
4830 ; AVX2-NEXT: retq
4888 ; AVX2-SLOW-LABEL: trunc_or_const_v16i64_v16i8:
4889 ; AVX2-SLOW: # %bb.0:
4890 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],ymm3[2,3]
4891 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
4892 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm2 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6]
4893 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
4894 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4895 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4896 ; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
4897 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm2, %xmm2
4898 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[2,3],ymm1[2,3]
4899 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4900 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm5[0,2],ymm0[4,6],ymm5[4,6]
4901 ; AVX2-SLOW-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4902 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4903 ; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
4904 ; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4905 ; AVX2-SLOW-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
4906 ; AVX2-SLOW-NEXT: vzeroupper
4907 ; AVX2-SLOW-NEXT: retq
4909 ; AVX2-FAST-LABEL: trunc_or_const_v16i64_v16i8:
4910 ; AVX2-FAST: # %bb.0:
4911 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
4912 ; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
4913 ; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
4914 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
4915 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,2…
4916 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm2, %ymm2
4917 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
4918 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
4919 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm2, %xmm2
4920 ; AVX2-FAST-NEXT: vpermd %ymm0, %ymm4, %ymm0
4921 ; AVX2-FAST-NEXT: vpermd %ymm1, %ymm4, %ymm1
4922 ; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
4923 ; AVX2-FAST-NEXT: vpshufb %ymm3, %ymm0, %ymm0
4924 ; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4925 ; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
4926 ; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4927 ; AVX2-FAST-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
4928 ; AVX2-FAST-NEXT: vzeroupper
4929 ; AVX2-FAST-NEXT: retq
4972 ; AVX2-LABEL: trunc_or_const_v16i32_v16i8:
4973 ; AVX2: # %bb.0:
4974 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,…
4975 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
4976 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
4977 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
4978 ; AVX2-NEXT: vpand %xmm3, %xmm1, %xmm1
4979 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
4980 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
4981 ; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
4982 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4983 ; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
4984 ; AVX2-NEXT: vzeroupper
4985 ; AVX2-NEXT: retq
5017 ; AVX2-LABEL: trunc_or_const_v16i16_v16i8:
5018 ; AVX2: # %bb.0:
5019 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
5020 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5021 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
5022 ; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
5023 ; AVX2-NEXT: vzeroupper
5024 ; AVX2-NEXT: retq