Lines Matching refs:AVX2

3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
14 ; AVX2-LABEL: shuffle_v8f32_00000000:
15 ; AVX2: # BB#0:
16 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
17 ; AVX2-NEXT: retq
30 ; AVX2-LABEL: shuffle_v8f32_00000010:
31 ; AVX2: # BB#0:
32 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
33 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
34 ; AVX2-NEXT: retq
47 ; AVX2-LABEL: shuffle_v8f32_00000200:
48 ; AVX2: # BB#0:
49 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
50 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
51 ; AVX2-NEXT: retq
64 ; AVX2-LABEL: shuffle_v8f32_00003000:
65 ; AVX2: # BB#0:
66 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
67 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
68 ; AVX2-NEXT: retq
82 ; AVX2-LABEL: shuffle_v8f32_00040000:
83 ; AVX2: # BB#0:
84 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
85 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
86 ; AVX2-NEXT: retq
99 ; AVX2-LABEL: shuffle_v8f32_00500000:
100 ; AVX2: # BB#0:
101 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
102 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
103 ; AVX2-NEXT: retq
116 ; AVX2-LABEL: shuffle_v8f32_06000000:
117 ; AVX2: # BB#0:
118 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
119 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
120 ; AVX2-NEXT: retq
133 ; AVX2-LABEL: shuffle_v8f32_70000000:
134 ; AVX2: # BB#0:
135 ; AVX2-NEXT: movl $7, %eax
136 ; AVX2-NEXT: vmovd %eax, %xmm1
137 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
138 ; AVX2-NEXT: retq
160 ; AVX2-LABEL: shuffle_v8f32_00112233:
161 ; AVX2: # BB#0:
162 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
163 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
164 ; AVX2-NEXT: retq
177 ; AVX2-LABEL: shuffle_v8f32_00001111:
178 ; AVX2: # BB#0:
179 ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
180 ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
181 ; AVX2-NEXT: retq
203 ; AVX2-LABEL: shuffle_v8f32_08080808:
204 ; AVX2: # BB#0:
205 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
206 ; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
207 ; AVX2-NEXT: retq
275 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
276 ; AVX2: # BB#0:
277 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
278 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
279 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
280 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
281 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ym…
282 ; AVX2-NEXT: retq
297 ; AVX2-LABEL: shuffle_v8f32_08991abb:
298 ; AVX2: # BB#0:
299 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
300 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
301 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
302 ; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
303 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
304 ; AVX2-NEXT: retq
318 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
319 ; AVX2: # BB#0:
320 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
321 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
322 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ym…
323 ; AVX2-NEXT: retq
336 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
337 ; AVX2: # BB#0:
338 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
339 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
340 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
341 ; AVX2-NEXT: retq
645 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
646 ; AVX2: # BB#0:
647 ; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
648 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
649 ; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
650 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,1]
651 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
652 ; AVX2-NEXT: retq
669 ; AVX2-LABEL: shuffle_v8f32_f511235a:
670 ; AVX2: # BB#0:
671 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
672 ; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
673 ; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
674 ; AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
675 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
676 ; AVX2-NEXT: retq
688 ; AVX2-LABEL: shuffle_v8f32_32103210:
689 ; AVX2: # BB#0:
690 ; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
691 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
692 ; AVX2-NEXT: retq
768 ; AVX2-LABEL: PR21138:
769 ; AVX2: # BB#0:
770 ; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[1,3,1,3,5,7,5,7]
771 ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
772 ; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,3,5,7,5,7]
773 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
774 ; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
775 ; AVX2-NEXT: retq
835 ; AVX2-LABEL: shuffle_v8f32_44444444:
836 ; AVX2: # BB#0:
837 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
838 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
839 ; AVX2-NEXT: retq
899 ; AVX2-LABEL: shuffle_v8i32_00000000:
900 ; AVX2: # BB#0:
901 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
902 ; AVX2-NEXT: retq
915 ; AVX2-LABEL: shuffle_v8i32_00000010:
916 ; AVX2: # BB#0:
917 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
918 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
919 ; AVX2-NEXT: retq
932 ; AVX2-LABEL: shuffle_v8i32_00000200:
933 ; AVX2: # BB#0:
934 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
935 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
936 ; AVX2-NEXT: retq
949 ; AVX2-LABEL: shuffle_v8i32_00003000:
950 ; AVX2: # BB#0:
951 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
952 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
953 ; AVX2-NEXT: retq
967 ; AVX2-LABEL: shuffle_v8i32_00040000:
968 ; AVX2: # BB#0:
969 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
970 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
971 ; AVX2-NEXT: retq
984 ; AVX2-LABEL: shuffle_v8i32_00500000:
985 ; AVX2: # BB#0:
986 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
987 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
988 ; AVX2-NEXT: retq
1001 ; AVX2-LABEL: shuffle_v8i32_06000000:
1002 ; AVX2: # BB#0:
1003 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
1004 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1005 ; AVX2-NEXT: retq
1018 ; AVX2-LABEL: shuffle_v8i32_70000000:
1019 ; AVX2: # BB#0:
1020 ; AVX2-NEXT: movl $7, %eax
1021 ; AVX2-NEXT: vmovd %eax, %xmm1
1022 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1023 ; AVX2-NEXT: retq
1034 ; AVX2-LABEL: shuffle_v8i32_01014545:
1035 ; AVX2: # BB#0:
1036 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1037 ; AVX2-NEXT: retq
1050 ; AVX2-LABEL: shuffle_v8i32_00112233:
1051 ; AVX2: # BB#0:
1052 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
1053 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1054 ; AVX2-NEXT: retq
1067 ; AVX2-LABEL: shuffle_v8i32_00001111:
1068 ; AVX2: # BB#0:
1069 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
1070 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1071 ; AVX2-NEXT: retq
1082 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
1083 ; AVX2: # BB#0:
1084 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ym…
1085 ; AVX2-NEXT: retq
1098 ; AVX2-LABEL: shuffle_v8i32_08080808:
1099 ; AVX2: # BB#0:
1100 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1101 ; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
1102 ; AVX2-NEXT: retq
1114 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
1115 ; AVX2: # BB#0:
1116 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
1117 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1118 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ym…
1119 ; AVX2-NEXT: retq
1130 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
1131 ; AVX2: # BB#0:
1132 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
1133 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1134 ; AVX2-NEXT: retq
1145 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
1146 ; AVX2: # BB#0:
1147 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1148 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1149 ; AVX2-NEXT: retq
1160 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
1161 ; AVX2: # BB#0:
1162 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
1163 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
1164 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
1165 ; AVX2-NEXT: retq
1176 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
1177 ; AVX2: # BB#0:
1178 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],…
1179 ; AVX2-NEXT: retq
1190 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
1191 ; AVX2: # BB#0:
1192 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],…
1193 ; AVX2-NEXT: retq
1206 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
1207 ; AVX2: # BB#0:
1208 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
1209 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1210 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1211 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ym…
1212 ; AVX2-NEXT: retq
1227 ; AVX2-LABEL: shuffle_v8i32_08991abb:
1228 ; AVX2: # BB#0:
1229 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1230 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1231 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
1232 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
1233 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1234 ; AVX2-NEXT: retq
1248 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
1249 ; AVX2: # BB#0:
1250 ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1251 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ym…
1252 ; AVX2-NEXT: retq
1265 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
1266 ; AVX2: # BB#0:
1267 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
1268 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
1269 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
1270 ; AVX2-NEXT: retq
1281 ; AVX2-LABEL: shuffle_v8i32_00014445:
1282 ; AVX2: # BB#0:
1283 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
1284 ; AVX2-NEXT: retq
1295 ; AVX2-LABEL: shuffle_v8i32_00204464:
1296 ; AVX2: # BB#0:
1297 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
1298 ; AVX2-NEXT: retq
1309 ; AVX2-LABEL: shuffle_v8i32_03004744:
1310 ; AVX2: # BB#0:
1311 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
1312 ; AVX2-NEXT: retq
1323 ; AVX2-LABEL: shuffle_v8i32_10005444:
1324 ; AVX2: # BB#0:
1325 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
1326 ; AVX2-NEXT: retq
1337 ; AVX2-LABEL: shuffle_v8i32_22006644:
1338 ; AVX2: # BB#0:
1339 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
1340 ; AVX2-NEXT: retq
1351 ; AVX2-LABEL: shuffle_v8i32_33307774:
1352 ; AVX2: # BB#0:
1353 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
1354 ; AVX2-NEXT: retq
1365 ; AVX2-LABEL: shuffle_v8i32_32107654:
1366 ; AVX2: # BB#0:
1367 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1368 ; AVX2-NEXT: retq
1379 ; AVX2-LABEL: shuffle_v8i32_00234467:
1380 ; AVX2: # BB#0:
1381 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
1382 ; AVX2-NEXT: retq
1393 ; AVX2-LABEL: shuffle_v8i32_00224466:
1394 ; AVX2: # BB#0:
1395 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1396 ; AVX2-NEXT: retq
1407 ; AVX2-LABEL: shuffle_v8i32_10325476:
1408 ; AVX2: # BB#0:
1409 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
1410 ; AVX2-NEXT: retq
1421 ; AVX2-LABEL: shuffle_v8i32_11335577:
1422 ; AVX2: # BB#0:
1423 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1424 ; AVX2-NEXT: retq
1435 ; AVX2-LABEL: shuffle_v8i32_10235467:
1436 ; AVX2: # BB#0:
1437 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
1438 ; AVX2-NEXT: retq
1449 ; AVX2-LABEL: shuffle_v8i32_10225466:
1450 ; AVX2: # BB#0:
1451 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
1452 ; AVX2-NEXT: retq
1463 ; AVX2-LABEL: shuffle_v8i32_00015444:
1464 ; AVX2: # BB#0:
1465 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
1466 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1467 ; AVX2-NEXT: retq
1478 ; AVX2-LABEL: shuffle_v8i32_00204644:
1479 ; AVX2: # BB#0:
1480 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
1481 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1482 ; AVX2-NEXT: retq
1493 ; AVX2-LABEL: shuffle_v8i32_03004474:
1494 ; AVX2: # BB#0:
1495 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
1496 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1497 ; AVX2-NEXT: retq
1508 ; AVX2-LABEL: shuffle_v8i32_10004444:
1509 ; AVX2: # BB#0:
1510 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
1511 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1512 ; AVX2-NEXT: retq
1523 ; AVX2-LABEL: shuffle_v8i32_22006446:
1524 ; AVX2: # BB#0:
1525 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
1526 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1527 ; AVX2-NEXT: retq
1538 ; AVX2-LABEL: shuffle_v8i32_33307474:
1539 ; AVX2: # BB#0:
1540 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
1541 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1542 ; AVX2-NEXT: retq
1553 ; AVX2-LABEL: shuffle_v8i32_32104567:
1554 ; AVX2: # BB#0:
1555 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
1556 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1557 ; AVX2-NEXT: retq
1568 ; AVX2-LABEL: shuffle_v8i32_00236744:
1569 ; AVX2: # BB#0:
1570 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
1571 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1572 ; AVX2-NEXT: retq
1583 ; AVX2-LABEL: shuffle_v8i32_00226644:
1584 ; AVX2: # BB#0:
1585 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
1586 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1587 ; AVX2-NEXT: retq
1598 ; AVX2-LABEL: shuffle_v8i32_10324567:
1599 ; AVX2: # BB#0:
1600 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
1601 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1602 ; AVX2-NEXT: retq
1613 ; AVX2-LABEL: shuffle_v8i32_11334567:
1614 ; AVX2: # BB#0:
1615 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
1616 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1617 ; AVX2-NEXT: retq
1628 ; AVX2-LABEL: shuffle_v8i32_01235467:
1629 ; AVX2: # BB#0:
1630 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
1631 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1632 ; AVX2-NEXT: retq
1643 ; AVX2-LABEL: shuffle_v8i32_01235466:
1644 ; AVX2: # BB#0:
1645 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
1646 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1647 ; AVX2-NEXT: retq
1658 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
1659 ; AVX2: # BB#0:
1660 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
1661 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1662 ; AVX2-NEXT: retq
1673 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
1674 ; AVX2: # BB#0:
1675 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
1676 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1677 ; AVX2-NEXT: retq
1688 ; AVX2-LABEL: shuffle_v8i32_103245uu:
1689 ; AVX2: # BB#0:
1690 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
1691 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1692 ; AVX2-NEXT: retq
1703 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
1704 ; AVX2: # BB#0:
1705 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
1706 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1707 ; AVX2-NEXT: retq
1718 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
1719 ; AVX2: # BB#0:
1720 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
1721 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1722 ; AVX2-NEXT: retq
1733 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
1734 ; AVX2: # BB#0:
1735 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
1736 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
1737 ; AVX2-NEXT: retq
1752 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
1753 ; AVX2: # BB#0:
1754 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
1755 ; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6]
1756 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,0,3]
1757 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
1758 ; AVX2-NEXT: retq
1770 ; AVX2-LABEL: shuffle_v8i32_32103210:
1771 ; AVX2: # BB#0:
1772 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
1773 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1774 ; AVX2-NEXT: retq
1786 ; AVX2-LABEL: shuffle_v8i32_76547654:
1787 ; AVX2: # BB#0:
1788 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1789 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
1790 ; AVX2-NEXT: retq
1802 ; AVX2-LABEL: shuffle_v8i32_76543210:
1803 ; AVX2: # BB#0:
1804 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1805 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
1806 ; AVX2-NEXT: retq
1818 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
1819 ; AVX2: # BB#0:
1820 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1821 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1822 ; AVX2-NEXT: retq
1834 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
1835 ; AVX2: # BB#0:
1836 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1837 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1838 ; AVX2-NEXT: retq
1850 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
1851 ; AVX2: # BB#0:
1852 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1853 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1854 ; AVX2-NEXT: retq
1866 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
1867 ; AVX2: # BB#0:
1868 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
1869 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1870 ; AVX2-NEXT: retq
1882 ; AVX2-LABEL: shuffle_v8i32_ba987654:
1883 ; AVX2: # BB#0:
1884 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1885 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1886 ; AVX2-NEXT: retq
1898 ; AVX2-LABEL: shuffle_v8i32_ba983210:
1899 ; AVX2: # BB#0:
1900 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1901 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
1902 ; AVX2-NEXT: retq
1914 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
1915 ; AVX2: # BB#0:
1916 ; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero…
1917 ; AVX2-NEXT: retq
1930 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
1931 ; AVX2: # BB#0:
1932 ; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm…
1933 ; AVX2-NEXT: retq
1944 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
1945 ; AVX2: # BB#0:
1946 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],…
1947 ; AVX2-NEXT: retq
1959 ; AVX2-LABEL: shuffle_v8i32_uuuu1111:
1960 ; AVX2: # BB#0:
1961 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1962 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1963 ; AVX2-NEXT: retq
1993 ; AVX2-LABEL: shuffle_v8i32_44444444:
1994 ; AVX2: # BB#0:
1995 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
1996 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
1997 ; AVX2-NEXT: retq
2009 ; AVX2-LABEL: shuffle_v8i32_5555uuuu:
2010 ; AVX2: # BB#0:
2011 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2012 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
2013 ; AVX2-NEXT: retq
2036 ; AVX2-LABEL: splat_v8f32:
2037 ; AVX2: # BB#0:
2038 ; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
2039 ; AVX2-NEXT: retq
2056 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
2057 ; AVX2: # BB#0:
2058 ; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
2059 ; AVX2-NEXT: retq
2072 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
2073 ; AVX2: # BB#0:
2074 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
2075 ; AVX2-NEXT: retq
2087 ; AVX2-LABEL: shuffle_v8i32_B012F456:
2088 ; AVX2: # BB#0:
2089 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,…
2090 ; AVX2-NEXT: retq
2102 ; AVX2-LABEL: shuffle_v8i32_1238567C:
2103 ; AVX2: # BB#0:
2104 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,…
2105 ; AVX2-NEXT: retq
2117 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
2118 ; AVX2: # BB#0:
2119 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,…
2120 ; AVX2-NEXT: retq
2132 ; AVX2-LABEL: shuffle_v8i32_389A7CDE:
2133 ; AVX2: # BB#0:
2134 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,…
2135 ; AVX2-NEXT: retq
2146 ; AVX2-LABEL: shuffle_v8i32_30127456:
2147 ; AVX2: # BB#0:
2148 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
2149 ; AVX2-NEXT: retq
2160 ; AVX2-LABEL: shuffle_v8i32_12305674:
2161 ; AVX2: # BB#0:
2162 ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
2163 ; AVX2-NEXT: retq
2227 ; AVX2-LABEL: concat_v8i32_0123CDEF:
2228 ; AVX2: # BB#0:
2229 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
2230 ; AVX2-NEXT: retq