Lines Matching refs:AVX512
7 …< %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512
8 …ple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
36 ; AVX512-LABEL: test_v2f32:
37 ; AVX512: # %bb.0:
38 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
39 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
40 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
41 ; AVX512-NEXT: retq
83 ; AVX512-LABEL: test_v4f32:
84 ; AVX512: # %bb.0:
85 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
86 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
87 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
88 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
89 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
90 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
91 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
92 ; AVX512-NEXT: retq
160 ; AVX512-LABEL: test_v8f32:
161 ; AVX512: # %bb.0:
162 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
163 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
164 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
165 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
166 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
167 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
168 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
169 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1
170 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
171 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
172 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
173 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
174 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
175 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
176 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
177 ; AVX512-NEXT: vzeroupper
178 ; AVX512-NEXT: retq
295 ; AVX512-LABEL: test_v16f32:
296 ; AVX512: # %bb.0:
297 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
298 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
299 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
300 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
301 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
302 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,3,3,3]
303 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
304 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
305 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
306 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
307 ; AVX512-NEXT: vaddss %xmm3, %xmm0, %xmm0
308 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
309 ; AVX512-NEXT: vaddss %xmm3, %xmm0, %xmm0
310 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
311 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
312 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
313 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
314 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
315 ; AVX512-NEXT: vaddss %xmm3, %xmm0, %xmm0
316 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
317 ; AVX512-NEXT: vaddss %xmm3, %xmm0, %xmm0
318 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
319 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
320 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
321 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
322 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
323 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
324 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
325 ; AVX512-NEXT: vaddss %xmm2, %xmm0, %xmm0
326 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
327 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
328 ; AVX512-NEXT: vzeroupper
329 ; AVX512-NEXT: retq
370 ; AVX512-LABEL: test_v2f32_zero:
371 ; AVX512: # %bb.0:
372 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
373 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
374 ; AVX512-NEXT: retq
432 ; AVX512-LABEL: test_v4f32_zero:
433 ; AVX512: # %bb.0:
434 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
435 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
436 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
437 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
438 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
439 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
440 ; AVX512-NEXT: retq
542 ; AVX512-LABEL: test_v8f32_zero:
543 ; AVX512: # %bb.0:
544 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
545 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
546 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
547 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
548 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
549 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
550 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
551 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm1
552 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
553 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
554 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
555 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
556 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
557 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
558 ; AVX512-NEXT: vzeroupper
559 ; AVX512-NEXT: retq
740 ; AVX512-LABEL: test_v16f32_zero:
741 ; AVX512: # %bb.0:
742 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
743 ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm1
744 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
745 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
746 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
747 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
748 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
749 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
750 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
751 ; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
752 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
753 ; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
754 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
755 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
756 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
757 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
758 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
759 ; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
760 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
761 ; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
762 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
763 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
764 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
765 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm1
766 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
767 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
768 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
769 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
770 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
771 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
772 ; AVX512-NEXT: vzeroupper
773 ; AVX512-NEXT: retq
801 ; AVX512-LABEL: test_v2f32_undef:
802 ; AVX512: # %bb.0:
803 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
804 ; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
805 ; AVX512-NEXT: retq
844 ; AVX512-LABEL: test_v4f32_undef:
845 ; AVX512: # %bb.0:
846 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
847 ; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
848 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
849 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
850 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
851 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
852 ; AVX512-NEXT: retq
917 ; AVX512-LABEL: test_v8f32_undef:
918 ; AVX512: # %bb.0:
919 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
920 ; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
921 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
922 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
923 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
924 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
925 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
926 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm1
927 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
928 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
929 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
930 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
931 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
932 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
933 ; AVX512-NEXT: vzeroupper
934 ; AVX512-NEXT: retq
1048 ; AVX512-LABEL: test_v16f32_undef:
1049 ; AVX512: # %bb.0:
1050 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
1051 ; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
1052 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1053 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
1054 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,3,3,3]
1055 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
1056 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
1057 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
1058 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
1059 ; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
1060 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
1061 ; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
1062 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
1063 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
1064 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
1065 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
1066 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3]
1067 ; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
1068 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0]
1069 ; AVX512-NEXT: vaddss %xmm3, %xmm1, %xmm1
1070 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
1071 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
1072 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1073 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm1
1074 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
1075 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
1076 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1077 ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1
1078 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1079 ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
1080 ; AVX512-NEXT: vzeroupper
1081 ; AVX512-NEXT: retq
1105 ; AVX512-LABEL: test_v2f64:
1106 ; AVX512: # %bb.0:
1107 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1108 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1109 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1110 ; AVX512-NEXT: retq
1138 ; AVX512-LABEL: test_v4f64:
1139 ; AVX512: # %bb.0:
1140 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1141 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1142 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1143 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1
1144 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1145 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1146 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1147 ; AVX512-NEXT: vzeroupper
1148 ; AVX512-NEXT: retq
1189 ; AVX512-LABEL: test_v8f64:
1190 ; AVX512: # %bb.0:
1191 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1192 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1193 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1194 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1195 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1196 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1197 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1198 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1199 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1200 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1201 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1202 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1203 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1204 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1205 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1206 ; AVX512-NEXT: vzeroupper
1207 ; AVX512-NEXT: retq
1302 ; AVX512-LABEL: test_v16f64:
1303 ; AVX512: # %bb.0:
1304 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1305 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
1306 ; AVX512-NEXT: vaddsd %xmm3, %xmm0, %xmm0
1307 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm3
1308 ; AVX512-NEXT: vaddsd %xmm3, %xmm0, %xmm0
1309 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1310 ; AVX512-NEXT: vaddsd %xmm3, %xmm0, %xmm0
1311 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm3
1312 ; AVX512-NEXT: vaddsd %xmm3, %xmm0, %xmm0
1313 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1314 ; AVX512-NEXT: vaddsd %xmm3, %xmm0, %xmm0
1315 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1316 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1317 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1318 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1319 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1320 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0]
1321 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1322 ; AVX512-NEXT: vextractf128 $1, %ymm2, %xmm1
1323 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1324 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1325 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1326 ; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm1
1327 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1328 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1329 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1330 ; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm1
1331 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1332 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1333 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1334 ; AVX512-NEXT: vzeroupper
1335 ; AVX512-NEXT: retq
1370 ; AVX512-LABEL: test_v2f64_zero:
1371 ; AVX512: # %bb.0:
1372 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1373 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1374 ; AVX512-NEXT: retq
1423 ; AVX512-LABEL: test_v4f64_zero:
1424 ; AVX512: # %bb.0:
1425 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1426 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
1427 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
1428 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1
1429 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1430 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
1431 ; AVX512-NEXT: vzeroupper
1432 ; AVX512-NEXT: retq
1508 ; AVX512-LABEL: test_v8f64_zero:
1509 ; AVX512: # %bb.0:
1510 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1511 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm1
1512 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
1513 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1514 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1515 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1516 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
1517 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1518 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1519 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1520 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1521 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1
1522 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1523 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
1524 ; AVX512-NEXT: vzeroupper
1525 ; AVX512-NEXT: retq
1654 ; AVX512-LABEL: test_v16f64_zero:
1655 ; AVX512: # %bb.0:
1656 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1657 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm2
1658 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
1659 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
1660 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1661 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
1662 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3
1663 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
1664 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1665 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
1666 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1667 ; AVX512-NEXT: vaddsd %xmm0, %xmm2, %xmm2
1668 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1669 ; AVX512-NEXT: vaddsd %xmm0, %xmm2, %xmm0
1670 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1671 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1672 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1673 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1674 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1675 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1676 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1677 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1678 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1679 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1680 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1681 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1682 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1683 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1684 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1685 ; AVX512-NEXT: vzeroupper
1686 ; AVX512-NEXT: retq
1708 ; AVX512-LABEL: test_v2f64_undef:
1709 ; AVX512: # %bb.0:
1710 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1711 ; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
1712 ; AVX512-NEXT: retq
1738 ; AVX512-LABEL: test_v4f64_undef:
1739 ; AVX512: # %bb.0:
1740 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1741 ; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1
1742 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
1743 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1
1744 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1745 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
1746 ; AVX512-NEXT: vzeroupper
1747 ; AVX512-NEXT: retq
1786 ; AVX512-LABEL: test_v8f64_undef:
1787 ; AVX512: # %bb.0:
1788 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
1789 ; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1
1790 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
1791 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1792 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1793 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1794 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2
1795 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1796 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1797 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1
1798 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1799 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1
1800 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1801 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
1802 ; AVX512-NEXT: vzeroupper
1803 ; AVX512-NEXT: retq
1868 ; AVX512-LABEL: test_v16f64_undef:
1869 ; AVX512: # %bb.0:
1870 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
1871 ; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm2, %xmm2
1872 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3
1873 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
1874 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1875 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
1876 ; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm3
1877 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
1878 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
1879 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2
1880 ; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
1881 ; AVX512-NEXT: vaddsd %xmm0, %xmm2, %xmm2
1882 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1883 ; AVX512-NEXT: vaddsd %xmm0, %xmm2, %xmm0
1884 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1885 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
1886 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1887 ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm2
1888 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1889 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1890 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1891 ; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm2
1892 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1893 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
1894 ; AVX512-NEXT: vaddsd %xmm2, %xmm0, %xmm0
1895 ; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1
1896 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1897 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
1898 ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
1899 ; AVX512-NEXT: vzeroupper
1900 ; AVX512-NEXT: retq