Lines Matching refs:AVX512

6 … < %s -mtriple=x86_64-- -mcpu=x86-64 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512
7 …iple=x86_64-- -mcpu=x86-64 -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
36 ; AVX512-LABEL: test_v2f32:
37 ; AVX512: # %bb.0:
38 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
39 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
40 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
41 ; AVX512-NEXT: retq
77 ; AVX512-LABEL: test_v4f32:
78 ; AVX512: # %bb.0:
79 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
80 ; AVX512-NEXT: vmulps %xmm2, %xmm1, %xmm1
81 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
82 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
83 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
84 ; AVX512-NEXT: retq
125 ; AVX512-LABEL: test_v8f32:
126 ; AVX512: # %bb.0:
127 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
128 ; AVX512-NEXT: vmulps %xmm2, %xmm1, %xmm1
129 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
130 ; AVX512-NEXT: vmulps %xmm2, %xmm1, %xmm1
131 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
132 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
133 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
134 ; AVX512-NEXT: vzeroupper
135 ; AVX512-NEXT: retq
181 ; AVX512-LABEL: test_v16f32:
182 ; AVX512: # %bb.0:
183 ; AVX512-NEXT: vextractf64x4 $1, %zmm1, %ymm2
184 ; AVX512-NEXT: vmulps %zmm2, %zmm1, %zmm1
185 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
186 ; AVX512-NEXT: vmulps %xmm2, %xmm1, %xmm1
187 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
188 ; AVX512-NEXT: vmulps %xmm2, %xmm1, %xmm1
189 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
190 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
191 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
192 ; AVX512-NEXT: vzeroupper
193 ; AVX512-NEXT: retq
223 ; AVX512-LABEL: test_v2f32_zero:
224 ; AVX512: # %bb.0:
225 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
226 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
227 ; AVX512-NEXT: retq
261 ; AVX512-LABEL: test_v4f32_zero:
262 ; AVX512: # %bb.0:
263 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
264 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
265 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
266 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
267 ; AVX512-NEXT: retq
306 ; AVX512-LABEL: test_v8f32_zero:
307 ; AVX512: # %bb.0:
308 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
309 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
310 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
311 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
312 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
313 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
314 ; AVX512-NEXT: vzeroupper
315 ; AVX512-NEXT: retq
359 ; AVX512-LABEL: test_v16f32_zero:
360 ; AVX512: # %bb.0:
361 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
362 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
363 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
364 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
365 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
366 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
367 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
368 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
369 ; AVX512-NEXT: vzeroupper
370 ; AVX512-NEXT: retq
400 ; AVX512-LABEL: test_v2f32_undef:
401 ; AVX512: # %bb.0:
402 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
403 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
404 ; AVX512-NEXT: retq
438 ; AVX512-LABEL: test_v4f32_undef:
439 ; AVX512: # %bb.0:
440 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
441 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
442 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
443 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
444 ; AVX512-NEXT: retq
483 ; AVX512-LABEL: test_v8f32_undef:
484 ; AVX512: # %bb.0:
485 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
486 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
487 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
488 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
489 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
490 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
491 ; AVX512-NEXT: vzeroupper
492 ; AVX512-NEXT: retq
536 ; AVX512-LABEL: test_v16f32_undef:
537 ; AVX512: # %bb.0:
538 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
539 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
540 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
541 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
542 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
543 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
544 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
545 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
546 ; AVX512-NEXT: vzeroupper
547 ; AVX512-NEXT: retq
572 ; AVX512-LABEL: test_v2f64:
573 ; AVX512: # %bb.0:
574 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
575 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
576 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
577 ; AVX512-NEXT: retq
602 ; AVX512-LABEL: test_v4f64:
603 ; AVX512: # %bb.0:
604 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
605 ; AVX512-NEXT: vmulpd %xmm2, %xmm1, %xmm1
606 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
607 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
608 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
609 ; AVX512-NEXT: vzeroupper
610 ; AVX512-NEXT: retq
638 ; AVX512-LABEL: test_v8f64:
639 ; AVX512: # %bb.0:
640 ; AVX512-NEXT: vextractf64x4 $1, %zmm1, %ymm2
641 ; AVX512-NEXT: vmulpd %zmm2, %zmm1, %zmm1
642 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
643 ; AVX512-NEXT: vmulpd %xmm2, %xmm1, %xmm1
644 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
645 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
646 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
647 ; AVX512-NEXT: vzeroupper
648 ; AVX512-NEXT: retq
682 ; AVX512-LABEL: test_v16f64:
683 ; AVX512: # %bb.0:
684 ; AVX512-NEXT: vmulpd %zmm2, %zmm1, %zmm1
685 ; AVX512-NEXT: vextractf64x4 $1, %zmm1, %ymm2
686 ; AVX512-NEXT: vmulpd %zmm2, %zmm1, %zmm1
687 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
688 ; AVX512-NEXT: vmulpd %xmm2, %xmm1, %xmm1
689 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
690 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
691 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
692 ; AVX512-NEXT: vzeroupper
693 ; AVX512-NEXT: retq
717 ; AVX512-LABEL: test_v2f64_zero:
718 ; AVX512: # %bb.0:
719 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
720 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
721 ; AVX512-NEXT: retq
745 ; AVX512-LABEL: test_v4f64_zero:
746 ; AVX512: # %bb.0:
747 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
748 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
749 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
750 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
751 ; AVX512-NEXT: vzeroupper
752 ; AVX512-NEXT: retq
779 ; AVX512-LABEL: test_v8f64_zero:
780 ; AVX512: # %bb.0:
781 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
782 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
783 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
784 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
785 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
786 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
787 ; AVX512-NEXT: vzeroupper
788 ; AVX512-NEXT: retq
820 ; AVX512-LABEL: test_v16f64_zero:
821 ; AVX512: # %bb.0:
822 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
823 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
824 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
825 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
826 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
827 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
828 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
829 ; AVX512-NEXT: vzeroupper
830 ; AVX512-NEXT: retq
854 ; AVX512-LABEL: test_v2f64_undef:
855 ; AVX512: # %bb.0:
856 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
857 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
858 ; AVX512-NEXT: retq
882 ; AVX512-LABEL: test_v4f64_undef:
883 ; AVX512: # %bb.0:
884 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
885 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
886 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
887 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
888 ; AVX512-NEXT: vzeroupper
889 ; AVX512-NEXT: retq
916 ; AVX512-LABEL: test_v8f64_undef:
917 ; AVX512: # %bb.0:
918 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
919 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
920 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
921 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
922 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
923 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
924 ; AVX512-NEXT: vzeroupper
925 ; AVX512-NEXT: retq
957 ; AVX512-LABEL: test_v16f64_undef:
958 ; AVX512: # %bb.0:
959 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
960 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
961 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
962 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
963 ; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0
964 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
965 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
966 ; AVX512-NEXT: vzeroupper
967 ; AVX512-NEXT: retq