Lines Matching refs:AVX512
6 …< %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512
7 …ple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
35 ; AVX512-LABEL: test_v2f32:
36 ; AVX512: # %bb.0:
37 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
38 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
39 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
40 ; AVX512-NEXT: retq
82 ; AVX512-LABEL: test_v4f32:
83 ; AVX512: # %bb.0:
84 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
85 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
86 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
87 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
88 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
89 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
90 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
91 ; AVX512-NEXT: retq
159 ; AVX512-LABEL: test_v8f32:
160 ; AVX512: # %bb.0:
161 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
162 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
163 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
164 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
165 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
166 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
167 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
168 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1
169 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
170 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
171 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
172 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
173 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
174 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
175 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
176 ; AVX512-NEXT: vzeroupper
177 ; AVX512-NEXT: retq
294 ; AVX512-LABEL: test_v16f32:
295 ; AVX512: # %bb.0:
296 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
297 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
298 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
299 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
300 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
301 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
302 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
303 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
304 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
305 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
306 ; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0
307 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
308 ; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0
309 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
310 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
311 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
312 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
313 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
314 ; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0
315 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
316 ; AVX512-NEXT: vmulss %xmm3, %xmm0, %xmm0
317 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
318 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
319 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
320 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
321 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
322 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
323 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
324 ; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
325 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
326 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
327 ; AVX512-NEXT: vzeroupper
328 ; AVX512-NEXT: retq
358 ; AVX512-LABEL: test_v2f32_one:
359 ; AVX512: # %bb.0:
360 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
361 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
362 ; AVX512-NEXT: retq
401 ; AVX512-LABEL: test_v4f32_one:
402 ; AVX512: # %bb.0:
403 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
404 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1
405 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
406 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
407 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
408 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
409 ; AVX512-NEXT: retq
474 ; AVX512-LABEL: test_v8f32_one:
475 ; AVX512: # %bb.0:
476 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
477 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1
478 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
479 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
480 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
481 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
482 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
483 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1
484 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
485 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
486 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
487 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
488 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
489 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
490 ; AVX512-NEXT: vzeroupper
491 ; AVX512-NEXT: retq
605 ; AVX512-LABEL: test_v16f32_one:
606 ; AVX512: # %bb.0:
607 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
608 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1
609 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
610 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
611 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
612 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
613 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
614 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
615 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
616 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
617 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
618 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
619 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
620 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
621 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
622 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
623 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
624 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
625 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
626 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
627 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
628 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
629 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
630 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1
631 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
632 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
633 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
634 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
635 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
636 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
637 ; AVX512-NEXT: vzeroupper
638 ; AVX512-NEXT: retq
666 ; AVX512-LABEL: test_v2f32_undef:
667 ; AVX512: # %bb.0:
668 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
669 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
670 ; AVX512-NEXT: retq
709 ; AVX512-LABEL: test_v4f32_undef:
710 ; AVX512: # %bb.0:
711 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
712 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
713 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
714 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
715 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
716 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
717 ; AVX512-NEXT: retq
782 ; AVX512-LABEL: test_v8f32_undef:
783 ; AVX512: # %bb.0:
784 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
785 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
786 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
787 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
788 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
789 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
790 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
791 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1
792 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
793 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
794 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
795 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
796 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
797 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
798 ; AVX512-NEXT: vzeroupper
799 ; AVX512-NEXT: retq
913 ; AVX512-LABEL: test_v16f32_undef:
914 ; AVX512: # %bb.0:
915 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
916 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1
917 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
918 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
919 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
920 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
921 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
922 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
923 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
924 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
925 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
926 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
927 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
928 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
929 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
930 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
931 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
932 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
933 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
934 ; AVX512-NEXT: vmulss %xmm3, %xmm1, %xmm1
935 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
936 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
937 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
938 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm1
939 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
940 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
941 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
942 ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
943 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
944 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
945 ; AVX512-NEXT: vzeroupper
946 ; AVX512-NEXT: retq
970 ; AVX512-LABEL: test_v2f64:
971 ; AVX512: # %bb.0:
972 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
973 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
974 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
975 ; AVX512-NEXT: retq
1003 ; AVX512-LABEL: test_v4f64:
1004 ; AVX512: # %bb.0:
1005 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1006 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1007 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1008 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1
1009 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1010 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1011 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1012 ; AVX512-NEXT: vzeroupper
1013 ; AVX512-NEXT: retq
1054 ; AVX512-LABEL: test_v8f64:
1055 ; AVX512: # %bb.0:
1056 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1057 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1058 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1059 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1060 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1061 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1062 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1063 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1064 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1065 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1066 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1067 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1068 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1069 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1070 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1071 ; AVX512-NEXT: vzeroupper
1072 ; AVX512-NEXT: retq
1167 ; AVX512-LABEL: test_v16f64:
1168 ; AVX512: # %bb.0:
1169 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1170 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
1171 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1172 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm3
1173 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1174 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1175 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1176 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm3
1177 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1178 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1179 ; AVX512-NEXT: vmulsd %xmm3, %xmm0, %xmm0
1180 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1181 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1182 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1183 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1184 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1185 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
1186 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1187 ; AVX512-NEXT: vextractf128 $1, %ymm2, %xmm1
1188 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1189 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1190 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1191 ; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm1
1192 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1193 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1194 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1195 ; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm1
1196 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1197 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1198 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1199 ; AVX512-NEXT: vzeroupper
1200 ; AVX512-NEXT: retq
1224 ; AVX512-LABEL: test_v2f64_one:
1225 ; AVX512: # %bb.0:
1226 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1227 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1228 ; AVX512-NEXT: retq
1256 ; AVX512-LABEL: test_v4f64_one:
1257 ; AVX512: # %bb.0:
1258 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1259 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm1
1260 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
1261 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1262 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1263 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1264 ; AVX512-NEXT: vzeroupper
1265 ; AVX512-NEXT: retq
1306 ; AVX512-LABEL: test_v8f64_one:
1307 ; AVX512: # %bb.0:
1308 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1309 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm1
1310 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
1311 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1312 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1313 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1314 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
1315 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1316 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1317 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1318 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1319 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1320 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1321 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1322 ; AVX512-NEXT: vzeroupper
1323 ; AVX512-NEXT: retq
1389 ; AVX512-LABEL: test_v16f64_one:
1390 ; AVX512: # %bb.0:
1391 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1392 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm2
1393 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
1394 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1395 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1396 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1397 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3
1398 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1399 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1400 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1401 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1402 ; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm2
1403 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1404 ; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm0
1405 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1406 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1407 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1408 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1409 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1410 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1411 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1412 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1413 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1414 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1415 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1416 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1417 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1418 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1419 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1420 ; AVX512-NEXT: vzeroupper
1421 ; AVX512-NEXT: retq
1443 ; AVX512-LABEL: test_v2f64_undef:
1444 ; AVX512: # %bb.0:
1445 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1446 ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
1447 ; AVX512-NEXT: retq
1473 ; AVX512-LABEL: test_v4f64_undef:
1474 ; AVX512: # %bb.0:
1475 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1476 ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
1477 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
1478 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1479 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1480 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1481 ; AVX512-NEXT: vzeroupper
1482 ; AVX512-NEXT: retq
1521 ; AVX512-LABEL: test_v8f64_undef:
1522 ; AVX512: # %bb.0:
1523 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1524 ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1
1525 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
1526 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1527 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1528 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1529 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
1530 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1531 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1532 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1
1533 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1534 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1
1535 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1536 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
1537 ; AVX512-NEXT: vzeroupper
1538 ; AVX512-NEXT: retq
1603 ; AVX512-LABEL: test_v16f64_undef:
1604 ; AVX512: # %bb.0:
1605 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1606 ; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2
1607 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
1608 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1609 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1610 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1611 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3
1612 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1613 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1614 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2
1615 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1616 ; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm2
1617 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1618 ; AVX512-NEXT: vmulsd %xmm0, %xmm2, %xmm0
1619 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1620 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1621 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1622 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1623 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1624 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1625 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1626 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1627 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1628 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1629 ; AVX512-NEXT: vmulsd %xmm2, %xmm0, %xmm0
1630 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1631 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1632 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1633 ; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
1634 ; AVX512-NEXT: vzeroupper
1635 ; AVX512-NEXT: retq