Lines Matching full:fast

3 …llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops    | FileCheck %s --check-prefixes=SSE3,S…
5 … < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-F…
7 …< %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FA…
9 …s -mtriple=x86_64-unknown -mattr=+avx512vl,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,…
22 ; SSE3-FAST-LABEL: extract_extract01_v4i32_add_i32:
23 ; SSE3-FAST: # %bb.0:
24 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
25 ; SSE3-FAST-NEXT: movd %xmm0, %eax
26 ; SSE3-FAST-NEXT: retq
35 ; AVX-FAST-LABEL: extract_extract01_v4i32_add_i32:
36 ; AVX-FAST: # %bb.0:
37 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
38 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
39 ; AVX-FAST-NEXT: retq
56 ; SSE3-FAST-LABEL: extract_extract23_v4i32_add_i32:
57 ; SSE3-FAST: # %bb.0:
58 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
59 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
60 ; SSE3-FAST-NEXT: movd %xmm0, %eax
61 ; SSE3-FAST-NEXT: retq
70 ; AVX-FAST-LABEL: extract_extract23_v4i32_add_i32:
71 ; AVX-FAST: # %bb.0:
72 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
73 ; AVX-FAST-NEXT: vpextrd $1, %xmm0, %eax
74 ; AVX-FAST-NEXT: retq
90 ; SSE3-FAST-LABEL: extract_extract01_v4i32_add_i32_commute:
91 ; SSE3-FAST: # %bb.0:
92 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
93 ; SSE3-FAST-NEXT: movd %xmm0, %eax
94 ; SSE3-FAST-NEXT: retq
103 ; AVX-FAST-LABEL: extract_extract01_v4i32_add_i32_commute:
104 ; AVX-FAST: # %bb.0:
105 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
106 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
107 ; AVX-FAST-NEXT: retq
124 ; SSE3-FAST-LABEL: extract_extract23_v4i32_add_i32_commute:
125 ; SSE3-FAST: # %bb.0:
126 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
127 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
128 ; SSE3-FAST-NEXT: movd %xmm0, %eax
129 ; SSE3-FAST-NEXT: retq
138 ; AVX-FAST-LABEL: extract_extract23_v4i32_add_i32_commute:
139 ; AVX-FAST: # %bb.0:
140 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
141 ; AVX-FAST-NEXT: vpextrd $1, %xmm0, %eax
142 ; AVX-FAST-NEXT: retq
158 ; SSE3-FAST-LABEL: extract_extract01_v8i16_add_i16:
159 ; SSE3-FAST: # %bb.0:
160 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
161 ; SSE3-FAST-NEXT: movd %xmm0, %eax
162 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
163 ; SSE3-FAST-NEXT: retq
173 ; AVX-FAST-LABEL: extract_extract01_v8i16_add_i16:
174 ; AVX-FAST: # %bb.0:
175 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
176 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
177 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
178 ; AVX-FAST-NEXT: retq
194 ; SSE3-FAST-LABEL: extract_extract45_v8i16_add_i16:
195 ; SSE3-FAST: # %bb.0:
196 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
197 ; SSE3-FAST-NEXT: pextrw $2, %xmm0, %eax
198 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
199 ; SSE3-FAST-NEXT: retq
209 ; AVX-FAST-LABEL: extract_extract45_v8i16_add_i16:
210 ; AVX-FAST: # %bb.0:
211 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
212 ; AVX-FAST-NEXT: vpextrw $2, %xmm0, %eax
213 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
214 ; AVX-FAST-NEXT: retq
230 ; SSE3-FAST-LABEL: extract_extract01_v8i16_add_i16_commute:
231 ; SSE3-FAST: # %bb.0:
232 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
233 ; SSE3-FAST-NEXT: movd %xmm0, %eax
234 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
235 ; SSE3-FAST-NEXT: retq
245 ; AVX-FAST-LABEL: extract_extract01_v8i16_add_i16_commute:
246 ; AVX-FAST: # %bb.0:
247 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
248 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
249 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
250 ; AVX-FAST-NEXT: retq
266 ; SSE3-FAST-LABEL: extract_extract45_v8i16_add_i16_commute:
267 ; SSE3-FAST: # %bb.0:
268 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
269 ; SSE3-FAST-NEXT: pextrw $2, %xmm0, %eax
270 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
271 ; SSE3-FAST-NEXT: retq
281 ; AVX-FAST-LABEL: extract_extract45_v8i16_add_i16_commute:
282 ; AVX-FAST: # %bb.0:
283 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
284 ; AVX-FAST-NEXT: vpextrw $2, %xmm0, %eax
285 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
286 ; AVX-FAST-NEXT: retq
302 ; SSE3-FAST-LABEL: extract_extract01_v4i32_sub_i32:
303 ; SSE3-FAST: # %bb.0:
304 ; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
305 ; SSE3-FAST-NEXT: movd %xmm0, %eax
306 ; SSE3-FAST-NEXT: retq
315 ; AVX-FAST-LABEL: extract_extract01_v4i32_sub_i32:
316 ; AVX-FAST: # %bb.0:
317 ; AVX-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
318 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
319 ; AVX-FAST-NEXT: retq
336 ; SSE3-FAST-LABEL: extract_extract23_v4i32_sub_i32:
337 ; SSE3-FAST: # %bb.0:
338 ; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
339 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
340 ; SSE3-FAST-NEXT: movd %xmm0, %eax
341 ; SSE3-FAST-NEXT: retq
350 ; AVX-FAST-LABEL: extract_extract23_v4i32_sub_i32:
351 ; AVX-FAST: # %bb.0:
352 ; AVX-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
353 ; AVX-FAST-NEXT: vpextrd $1, %xmm0, %eax
354 ; AVX-FAST-NEXT: retq
413 ; SSE3-FAST-LABEL: extract_extract01_v8i16_sub_i16:
414 ; SSE3-FAST: # %bb.0:
415 ; SSE3-FAST-NEXT: phsubw %xmm0, %xmm0
416 ; SSE3-FAST-NEXT: movd %xmm0, %eax
417 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
418 ; SSE3-FAST-NEXT: retq
428 ; AVX-FAST-LABEL: extract_extract01_v8i16_sub_i16:
429 ; AVX-FAST: # %bb.0:
430 ; AVX-FAST-NEXT: vphsubw %xmm0, %xmm0, %xmm0
431 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
432 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
433 ; AVX-FAST-NEXT: retq
449 ; SSE3-FAST-LABEL: extract_extract23_v8i16_sub_i16:
450 ; SSE3-FAST: # %bb.0:
451 ; SSE3-FAST-NEXT: phsubw %xmm0, %xmm0
452 ; SSE3-FAST-NEXT: pextrw $1, %xmm0, %eax
453 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
454 ; SSE3-FAST-NEXT: retq
464 ; AVX-FAST-LABEL: extract_extract23_v8i16_sub_i16:
465 ; AVX-FAST: # %bb.0:
466 ; AVX-FAST-NEXT: vphsubw %xmm0, %xmm0, %xmm0
467 ; AVX-FAST-NEXT: vpextrw $1, %xmm0, %eax
468 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
469 ; AVX-FAST-NEXT: retq
531 ; SSE3-FAST-LABEL: extract_extract01_v8i32_add_i32:
532 ; SSE3-FAST: # %bb.0:
533 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
534 ; SSE3-FAST-NEXT: movd %xmm0, %eax
535 ; SSE3-FAST-NEXT: retq
545 ; AVX-FAST-LABEL: extract_extract01_v8i32_add_i32:
546 ; AVX-FAST: # %bb.0:
547 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
548 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
549 ; AVX-FAST-NEXT: vzeroupper
550 ; AVX-FAST-NEXT: retq
567 ; SSE3-FAST-LABEL: extract_extract23_v8i32_add_i32:
568 ; SSE3-FAST: # %bb.0:
569 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
570 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
571 ; SSE3-FAST-NEXT: movd %xmm0, %eax
572 ; SSE3-FAST-NEXT: retq
582 ; AVX-FAST-LABEL: extract_extract23_v8i32_add_i32:
583 ; AVX-FAST: # %bb.0:
584 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
585 ; AVX-FAST-NEXT: vpextrd $1, %xmm0, %eax
586 ; AVX-FAST-NEXT: vzeroupper
587 ; AVX-FAST-NEXT: retq
604 ; SSE3-FAST-LABEL: extract_extract67_v8i32_add_i32:
605 ; SSE3-FAST: # %bb.0:
606 ; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
607 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
608 ; SSE3-FAST-NEXT: movd %xmm0, %eax
609 ; SSE3-FAST-NEXT: retq
620 ; AVX1-FAST-LABEL: extract_extract67_v8i32_add_i32:
621 ; AVX1-FAST: # %bb.0:
622 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm0
623 ; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
624 ; AVX1-FAST-NEXT: vpextrd $1, %xmm0, %eax
625 ; AVX1-FAST-NEXT: vzeroupper
626 ; AVX1-FAST-NEXT: retq
628 ; AVX2-FAST-LABEL: extract_extract67_v8i32_add_i32:
629 ; AVX2-FAST: # %bb.0:
630 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
631 ; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
632 ; AVX2-FAST-NEXT: vpextrd $1, %xmm0, %eax
633 ; AVX2-FAST-NEXT: vzeroupper
634 ; AVX2-FAST-NEXT: retq
636 ; AVX512-FAST-LABEL: extract_extract67_v8i32_add_i32:
637 ; AVX512-FAST: # %bb.0:
638 ; AVX512-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
639 ; AVX512-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
640 ; AVX512-FAST-NEXT: vpextrd $1, %xmm0, %eax
641 ; AVX512-FAST-NEXT: vzeroupper
642 ; AVX512-FAST-NEXT: retq
658 ; SSE3-FAST-LABEL: extract_extract01_v8i32_add_i32_commute:
659 ; SSE3-FAST: # %bb.0:
660 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
661 ; SSE3-FAST-NEXT: movd %xmm0, %eax
662 ; SSE3-FAST-NEXT: retq
672 ; AVX-FAST-LABEL: extract_extract01_v8i32_add_i32_commute:
673 ; AVX-FAST: # %bb.0:
674 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
675 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
676 ; AVX-FAST-NEXT: vzeroupper
677 ; AVX-FAST-NEXT: retq
694 ; SSE3-FAST-LABEL: extract_extract23_v8i32_add_i32_commute:
695 ; SSE3-FAST: # %bb.0:
696 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
697 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
698 ; SSE3-FAST-NEXT: movd %xmm0, %eax
699 ; SSE3-FAST-NEXT: retq
709 ; AVX-FAST-LABEL: extract_extract23_v8i32_add_i32_commute:
710 ; AVX-FAST: # %bb.0:
711 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
712 ; AVX-FAST-NEXT: vpextrd $1, %xmm0, %eax
713 ; AVX-FAST-NEXT: vzeroupper
714 ; AVX-FAST-NEXT: retq
731 ; SSE3-FAST-LABEL: extract_extract67_v8i32_add_i32_commute:
732 ; SSE3-FAST: # %bb.0:
733 ; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
734 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
735 ; SSE3-FAST-NEXT: movd %xmm0, %eax
736 ; SSE3-FAST-NEXT: retq
747 ; AVX1-FAST-LABEL: extract_extract67_v8i32_add_i32_commute:
748 ; AVX1-FAST: # %bb.0:
749 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm0
750 ; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
751 ; AVX1-FAST-NEXT: vpextrd $1, %xmm0, %eax
752 ; AVX1-FAST-NEXT: vzeroupper
753 ; AVX1-FAST-NEXT: retq
755 ; AVX2-FAST-LABEL: extract_extract67_v8i32_add_i32_commute:
756 ; AVX2-FAST: # %bb.0:
757 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
758 ; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
759 ; AVX2-FAST-NEXT: vpextrd $1, %xmm0, %eax
760 ; AVX2-FAST-NEXT: vzeroupper
761 ; AVX2-FAST-NEXT: retq
763 ; AVX512-FAST-LABEL: extract_extract67_v8i32_add_i32_commute:
764 ; AVX512-FAST: # %bb.0:
765 ; AVX512-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
766 ; AVX512-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
767 ; AVX512-FAST-NEXT: vpextrd $1, %xmm0, %eax
768 ; AVX512-FAST-NEXT: vzeroupper
769 ; AVX512-FAST-NEXT: retq
785 ; SSE3-FAST-LABEL: extract_extract01_v16i16_add_i16:
786 ; SSE3-FAST: # %bb.0:
787 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
788 ; SSE3-FAST-NEXT: movd %xmm0, %eax
789 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
790 ; SSE3-FAST-NEXT: retq
801 ; AVX-FAST-LABEL: extract_extract01_v16i16_add_i16:
802 ; AVX-FAST: # %bb.0:
803 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
804 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
805 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
806 ; AVX-FAST-NEXT: vzeroupper
807 ; AVX-FAST-NEXT: retq
823 ; SSE3-FAST-LABEL: extract_extract23_v16i16_add_i16:
824 ; SSE3-FAST: # %bb.0:
825 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
826 ; SSE3-FAST-NEXT: pextrw $1, %xmm0, %eax
827 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
828 ; SSE3-FAST-NEXT: retq
839 ; AVX-FAST-LABEL: extract_extract23_v16i16_add_i16:
840 ; AVX-FAST: # %bb.0:
841 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
842 ; AVX-FAST-NEXT: vpextrw $1, %xmm0, %eax
843 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
844 ; AVX-FAST-NEXT: vzeroupper
845 ; AVX-FAST-NEXT: retq
861 ; SSE3-FAST-LABEL: extract_extract89_v16i16_add_i16:
862 ; SSE3-FAST: # %bb.0:
863 ; SSE3-FAST-NEXT: phaddw %xmm1, %xmm1
864 ; SSE3-FAST-NEXT: movd %xmm1, %eax
865 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
866 ; SSE3-FAST-NEXT: retq
878 ; AVX1-FAST-LABEL: extract_extract89_v16i16_add_i16:
879 ; AVX1-FAST: # %bb.0:
880 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm0
881 ; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
882 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax
883 ; AVX1-FAST-NEXT: # kill: def $ax killed $ax killed $eax
884 ; AVX1-FAST-NEXT: vzeroupper
885 ; AVX1-FAST-NEXT: retq
897 ; AVX2-FAST-LABEL: extract_extract89_v16i16_add_i16:
898 ; AVX2-FAST: # %bb.0:
899 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
900 ; AVX2-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
901 ; AVX2-FAST-NEXT: vmovd %xmm0, %eax
902 ; AVX2-FAST-NEXT: # kill: def $ax killed $ax killed $eax
903 ; AVX2-FAST-NEXT: vzeroupper
904 ; AVX2-FAST-NEXT: retq
916 ; AVX512-FAST-LABEL: extract_extract89_v16i16_add_i16:
917 ; AVX512-FAST: # %bb.0:
918 ; AVX512-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
919 ; AVX512-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
920 ; AVX512-FAST-NEXT: vmovd %xmm0, %eax
921 ; AVX512-FAST-NEXT: # kill: def $ax killed $ax killed $eax
922 ; AVX512-FAST-NEXT: vzeroupper
923 ; AVX512-FAST-NEXT: retq
939 ; SSE3-FAST-LABEL: extract_extract01_v16i16_add_i16_commute:
940 ; SSE3-FAST: # %bb.0:
941 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
942 ; SSE3-FAST-NEXT: movd %xmm0, %eax
943 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
944 ; SSE3-FAST-NEXT: retq
955 ; AVX-FAST-LABEL: extract_extract01_v16i16_add_i16_commute:
956 ; AVX-FAST: # %bb.0:
957 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
958 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
959 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
960 ; AVX-FAST-NEXT: vzeroupper
961 ; AVX-FAST-NEXT: retq
977 ; SSE3-FAST-LABEL: extract_extract45_v16i16_add_i16_commute:
978 ; SSE3-FAST: # %bb.0:
979 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
980 ; SSE3-FAST-NEXT: pextrw $2, %xmm0, %eax
981 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
982 ; SSE3-FAST-NEXT: retq
993 ; AVX-FAST-LABEL: extract_extract45_v16i16_add_i16_commute:
994 ; AVX-FAST: # %bb.0:
995 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
996 ; AVX-FAST-NEXT: vpextrw $2, %xmm0, %eax
997 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
998 ; AVX-FAST-NEXT: vzeroupper
999 ; AVX-FAST-NEXT: retq
1015 ; SSE3-FAST-LABEL: extract_extract89_v16i16_add_i16_commute:
1016 ; SSE3-FAST: # %bb.0:
1017 ; SSE3-FAST-NEXT: phaddw %xmm1, %xmm1
1018 ; SSE3-FAST-NEXT: movd %xmm1, %eax
1019 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1020 ; SSE3-FAST-NEXT: retq
1032 ; AVX1-FAST-LABEL: extract_extract89_v16i16_add_i16_commute:
1033 ; AVX1-FAST: # %bb.0:
1034 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm0
1035 ; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
1036 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax
1037 ; AVX1-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1038 ; AVX1-FAST-NEXT: vzeroupper
1039 ; AVX1-FAST-NEXT: retq
1051 ; AVX2-FAST-LABEL: extract_extract89_v16i16_add_i16_commute:
1052 ; AVX2-FAST: # %bb.0:
1053 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
1054 ; AVX2-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
1055 ; AVX2-FAST-NEXT: vmovd %xmm0, %eax
1056 ; AVX2-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1057 ; AVX2-FAST-NEXT: vzeroupper
1058 ; AVX2-FAST-NEXT: retq
1070 ; AVX512-FAST-LABEL: extract_extract89_v16i16_add_i16_commute:
1071 ; AVX512-FAST: # %bb.0:
1072 ; AVX512-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
1073 ; AVX512-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
1074 ; AVX512-FAST-NEXT: vmovd %xmm0, %eax
1075 ; AVX512-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1076 ; AVX512-FAST-NEXT: vzeroupper
1077 ; AVX512-FAST-NEXT: retq
1093 ; SSE3-FAST-LABEL: extract_extract01_v8i32_sub_i32:
1094 ; SSE3-FAST: # %bb.0:
1095 ; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
1096 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1097 ; SSE3-FAST-NEXT: retq
1107 ; AVX-FAST-LABEL: extract_extract01_v8i32_sub_i32:
1108 ; AVX-FAST: # %bb.0:
1109 ; AVX-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
1110 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1111 ; AVX-FAST-NEXT: vzeroupper
1112 ; AVX-FAST-NEXT: retq
1129 ; SSE3-FAST-LABEL: extract_extract23_v8i32_sub_i32:
1130 ; SSE3-FAST: # %bb.0:
1131 ; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
1132 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1133 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1134 ; SSE3-FAST-NEXT: retq
1144 ; AVX-FAST-LABEL: extract_extract23_v8i32_sub_i32:
1145 ; AVX-FAST: # %bb.0:
1146 ; AVX-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
1147 ; AVX-FAST-NEXT: vpextrd $1, %xmm0, %eax
1148 ; AVX-FAST-NEXT: vzeroupper
1149 ; AVX-FAST-NEXT: retq
1166 ; SSE3-FAST-LABEL: extract_extract67_v8i32_sub_i32:
1167 ; SSE3-FAST: # %bb.0:
1168 ; SSE3-FAST-NEXT: phsubd %xmm1, %xmm1
1169 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1170 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1171 ; SSE3-FAST-NEXT: retq
1182 ; AVX1-FAST-LABEL: extract_extract67_v8i32_sub_i32:
1183 ; AVX1-FAST: # %bb.0:
1184 ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm0
1185 ; AVX1-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
1186 ; AVX1-FAST-NEXT: vpextrd $1, %xmm0, %eax
1187 ; AVX1-FAST-NEXT: vzeroupper
1188 ; AVX1-FAST-NEXT: retq
1190 ; AVX2-FAST-LABEL: extract_extract67_v8i32_sub_i32:
1191 ; AVX2-FAST: # %bb.0:
1192 ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
1193 ; AVX2-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
1194 ; AVX2-FAST-NEXT: vpextrd $1, %xmm0, %eax
1195 ; AVX2-FAST-NEXT: vzeroupper
1196 ; AVX2-FAST-NEXT: retq
1198 ; AVX512-FAST-LABEL: extract_extract67_v8i32_sub_i32:
1199 ; AVX512-FAST: # %bb.0:
1200 ; AVX512-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
1201 ; AVX512-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
1202 ; AVX512-FAST-NEXT: vpextrd $1, %xmm0, %eax
1203 ; AVX512-FAST-NEXT: vzeroupper
1204 ; AVX512-FAST-NEXT: retq
1244 ; SSE3-FAST-LABEL: extract_extract01_v16i16_sub_i16:
1245 ; SSE3-FAST: # %bb.0:
1246 ; SSE3-FAST-NEXT: phsubw %xmm0, %xmm0
1247 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1248 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1249 ; SSE3-FAST-NEXT: retq
1260 ; AVX-FAST-LABEL: extract_extract01_v16i16_sub_i16:
1261 ; AVX-FAST: # %bb.0:
1262 ; AVX-FAST-NEXT: vphsubw %xmm0, %xmm0, %xmm0
1263 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1264 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1265 ; AVX-FAST-NEXT: vzeroupper
1266 ; AVX-FAST-NEXT: retq
1309 ; SSE3-FAST-LABEL: extract_extract01_v16i32_add_i32:
1310 ; SSE3-FAST: # %bb.0:
1311 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
1312 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1313 ; SSE3-FAST-NEXT: retq
1323 ; AVX-FAST-LABEL: extract_extract01_v16i32_add_i32:
1324 ; AVX-FAST: # %bb.0:
1325 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1326 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1327 ; AVX-FAST-NEXT: vzeroupper
1328 ; AVX-FAST-NEXT: retq
1344 ; SSE3-FAST-LABEL: extract_extract01_v16i32_add_i32_commute:
1345 ; SSE3-FAST: # %bb.0:
1346 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
1347 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1348 ; SSE3-FAST-NEXT: retq
1358 ; AVX-FAST-LABEL: extract_extract01_v16i32_add_i32_commute:
1359 ; AVX-FAST: # %bb.0:
1360 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1361 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1362 ; AVX-FAST-NEXT: vzeroupper
1363 ; AVX-FAST-NEXT: retq
1379 ; SSE3-FAST-LABEL: extract_extract01_v32i16_add_i16:
1380 ; SSE3-FAST: # %bb.0:
1381 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
1382 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1383 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1384 ; SSE3-FAST-NEXT: retq
1395 ; AVX-FAST-LABEL: extract_extract01_v32i16_add_i16:
1396 ; AVX-FAST: # %bb.0:
1397 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
1398 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1399 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1400 ; AVX-FAST-NEXT: vzeroupper
1401 ; AVX-FAST-NEXT: retq
1417 ; SSE3-FAST-LABEL: extract_extract01_v32i16_add_i16_commute:
1418 ; SSE3-FAST: # %bb.0:
1419 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
1420 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1421 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1422 ; SSE3-FAST-NEXT: retq
1433 ; AVX-FAST-LABEL: extract_extract01_v32i16_add_i16_commute:
1434 ; AVX-FAST: # %bb.0:
1435 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
1436 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1437 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1438 ; AVX-FAST-NEXT: vzeroupper
1439 ; AVX-FAST-NEXT: retq
1455 ; SSE3-FAST-LABEL: extract_extract01_v16i32_sub_i32:
1456 ; SSE3-FAST: # %bb.0:
1457 ; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
1458 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1459 ; SSE3-FAST-NEXT: retq
1469 ; AVX-FAST-LABEL: extract_extract01_v16i32_sub_i32:
1470 ; AVX-FAST: # %bb.0:
1471 ; AVX-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
1472 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1473 ; AVX-FAST-NEXT: vzeroupper
1474 ; AVX-FAST-NEXT: retq
1512 ; SSE3-FAST-LABEL: extract_extract01_v32i16_sub_i16:
1513 ; SSE3-FAST: # %bb.0:
1514 ; SSE3-FAST-NEXT: phsubw %xmm0, %xmm0
1515 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1516 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1517 ; SSE3-FAST-NEXT: retq
1528 ; AVX-FAST-LABEL: extract_extract01_v32i16_sub_i16:
1529 ; AVX-FAST: # %bb.0:
1530 ; AVX-FAST-NEXT: vphsubw %xmm0, %xmm0, %xmm0
1531 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1532 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1533 ; AVX-FAST-NEXT: vzeroupper
1534 ; AVX-FAST-NEXT: retq
1576 ; SSE3-FAST-LABEL: extract_extract01_v4i32_add_i32_uses1:
1577 ; SSE3-FAST: # %bb.0:
1578 ; SSE3-FAST-NEXT: movd %xmm0, (%rdi)
1579 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
1580 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1581 ; SSE3-FAST-NEXT: retq
1591 ; AVX-FAST-LABEL: extract_extract01_v4i32_add_i32_uses1:
1592 ; AVX-FAST: # %bb.0:
1593 ; AVX-FAST-NEXT: vmovd %xmm0, (%rdi)
1594 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1595 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1596 ; AVX-FAST-NEXT: retq
1614 ; SSE3-FAST-LABEL: extract_extract01_v4i32_add_i32_uses2:
1615 ; SSE3-FAST: # %bb.0:
1616 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1617 ; SSE3-FAST-NEXT: movd %xmm1, (%rdi)
1618 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
1619 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1620 ; SSE3-FAST-NEXT: retq
1630 ; AVX-FAST-LABEL: extract_extract01_v4i32_add_i32_uses2:
1631 ; AVX-FAST: # %bb.0:
1632 ; AVX-FAST-NEXT: vpextrd $1, %xmm0, (%rdi)
1633 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1634 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1635 ; AVX-FAST-NEXT: retq
1682 ; SSE3-FAST-LABEL: partial_reduction_add_v8i32:
1683 ; SSE3-FAST: # %bb.0:
1684 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1685 ; SSE3-FAST-NEXT: paddd %xmm0, %xmm1
1686 ; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
1687 ; SSE3-FAST-NEXT: movd %xmm1, %eax
1688 ; SSE3-FAST-NEXT: retq
1700 ; AVX-FAST-LABEL: partial_reduction_add_v8i32:
1701 ; AVX-FAST: # %bb.0:
1702 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1703 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1704 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1705 ; AVX-FAST-NEXT: vzeroupper
1706 ; AVX-FAST-NEXT: retq
1725 ; SSE3-FAST-LABEL: partial_reduction_add_v16i32:
1726 ; SSE3-FAST: # %bb.0:
1727 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1728 ; SSE3-FAST-NEXT: paddd %xmm0, %xmm1
1729 ; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
1730 ; SSE3-FAST-NEXT: movd %xmm1, %eax
1731 ; SSE3-FAST-NEXT: retq
1743 ; AVX-FAST-LABEL: partial_reduction_add_v16i32:
1744 ; AVX-FAST: # %bb.0:
1745 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1746 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1747 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1748 ; AVX-FAST-NEXT: vzeroupper
1749 ; AVX-FAST-NEXT: retq
1768 ; SSE3-FAST-LABEL: partial_reduction_sub_v8i32:
1769 ; SSE3-FAST: # %bb.0:
1770 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1771 ; SSE3-FAST-NEXT: psubd %xmm1, %xmm0
1772 ; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
1773 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1774 ; SSE3-FAST-NEXT: retq
1786 ; AVX-FAST-LABEL: partial_reduction_sub_v8i32:
1787 ; AVX-FAST: # %bb.0:
1788 ; AVX-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1789 ; AVX-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1790 ; AVX-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
1791 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1792 ; AVX-FAST-NEXT: vzeroupper
1793 ; AVX-FAST-NEXT: retq
1812 ; SSE3-FAST-LABEL: partial_reduction_sub_v16i32:
1813 ; SSE3-FAST: # %bb.0:
1814 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1815 ; SSE3-FAST-NEXT: psubd %xmm1, %xmm0
1816 ; SSE3-FAST-NEXT: phsubd %xmm0, %xmm0
1817 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1818 ; SSE3-FAST-NEXT: retq
1830 ; AVX1-FAST-LABEL: partial_reduction_sub_v16i32:
1831 ; AVX1-FAST: # %bb.0:
1832 ; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1833 ; AVX1-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1834 ; AVX1-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
1835 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax
1836 ; AVX1-FAST-NEXT: vzeroupper
1837 ; AVX1-FAST-NEXT: retq
1839 ; AVX2-FAST-LABEL: partial_reduction_sub_v16i32:
1840 ; AVX2-FAST: # %bb.0:
1841 ; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1842 ; AVX2-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1843 ; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1844 ; AVX2-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1845 ; AVX2-FAST-NEXT: vmovd %xmm0, %eax
1846 ; AVX2-FAST-NEXT: vzeroupper
1847 ; AVX2-FAST-NEXT: retq
1849 ; AVX512-FAST-LABEL: partial_reduction_sub_v16i32:
1850 ; AVX512-FAST: # %bb.0:
1851 ; AVX512-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1852 ; AVX512-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1853 ; AVX512-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1854 ; AVX512-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1855 ; AVX512-FAST-NEXT: vmovd %xmm0, %eax
1856 ; AVX512-FAST-NEXT: vzeroupper
1857 ; AVX512-FAST-NEXT: retq
1882 ; SSE3-FAST-LABEL: hadd16_8:
1883 ; SSE3-FAST: # %bb.0:
1884 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
1885 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
1886 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0
1887 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1888 ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1889 ; SSE3-FAST-NEXT: retq
1903 ; AVX-FAST-LABEL: hadd16_8:
1904 ; AVX-FAST: # %bb.0:
1905 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
1906 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
1907 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0
1908 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1909 ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax
1910 ; AVX-FAST-NEXT: retq
1931 ; SSE3-FAST-LABEL: hadd32_4:
1932 ; SSE3-FAST: # %bb.0:
1933 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
1934 ; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
1935 ; SSE3-FAST-NEXT: movd %xmm0, %eax
1936 ; SSE3-FAST-NEXT: retq
1947 ; AVX-FAST-LABEL: hadd32_4:
1948 ; AVX-FAST: # %bb.0:
1949 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1950 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1951 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1952 ; AVX-FAST-NEXT: retq
1971 ; SSE3-FAST-LABEL: hadd32_8:
1972 ; SSE3-FAST: # %bb.0:
1973 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1974 ; SSE3-FAST-NEXT: paddd %xmm0, %xmm1
1975 ; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
1976 ; SSE3-FAST-NEXT: movd %xmm1, %eax
1977 ; SSE3-FAST-NEXT: retq
1989 ; AVX-FAST-LABEL: hadd32_8:
1990 ; AVX-FAST: # %bb.0:
1991 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1992 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
1993 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
1994 ; AVX-FAST-NEXT: vzeroupper
1995 ; AVX-FAST-NEXT: retq
2014 ; SSE3-FAST-LABEL: hadd32_16:
2015 ; SSE3-FAST: # %bb.0:
2016 ; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2017 ; SSE3-FAST-NEXT: paddd %xmm0, %xmm1
2018 ; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1
2019 ; SSE3-FAST-NEXT: movd %xmm1, %eax
2020 ; SSE3-FAST-NEXT: retq
2032 ; AVX-FAST-LABEL: hadd32_16:
2033 ; AVX-FAST: # %bb.0:
2034 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
2035 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
2036 ; AVX-FAST-NEXT: vmovd %xmm0, %eax
2037 ; AVX-FAST-NEXT: vzeroupper
2038 ; AVX-FAST-NEXT: retq