Lines Matching refs:xmm1
26 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
33 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
44 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
51 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
62 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
69 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
80 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
87 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
98 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
105 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
116 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
123 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
134 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
141 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
152 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
153 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
154 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
174 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
175 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
176 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1]
195 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
196 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
197 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
215 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
216 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
217 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
235 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
236 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
237 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
255 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
256 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
257 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
275 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
276 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
277 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
295 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
296 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
297 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
315 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
317 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
319 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
333 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
335 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
337 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
351 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
352 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
371 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
372 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
391 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
392 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
411 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
412 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
546 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xm…
564 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xm…
607 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
609 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
610 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
625 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
645 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xm…
664 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xm…
694 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
701 ; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1
712 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
714 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
716 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
740 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
741 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
763 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
766 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
788 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],…
807 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
809 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
811 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
825 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
827 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
829 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
843 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
845 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
847 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
861 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
863 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
865 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
879 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
881 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
883 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
897 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
899 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
901 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
915 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
917 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
919 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
936 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
954 … AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
972 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
992 … AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
1009 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
1026 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
1043 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
1060 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
1077 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
1094 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
1111 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1128 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
1129 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
1146 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1164 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1]
1181 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
1198 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
1199 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
1216 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3]
1217 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1237 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1238 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1239 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1244 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1258 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1259 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1260 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1279 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1280 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1281 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1282 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1300 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1301 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1302 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1303 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1319 … AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
1320 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1326 … AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
1327 … AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1337 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero…
1354 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1375 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
1392 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1
1409 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
1442 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7]
1444 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
1446 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1460 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1461 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
1463 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1480 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1498 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
1516 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
1534 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1550 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
1551 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1556 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1567 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1568 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1573 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1583 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1584 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1587 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1588 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1593 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1594 … AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1597 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1598 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1607 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1608 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1610 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
1611 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1616 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1617 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
1619 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
1620 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1632 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],…
1633 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
1634 ; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1641 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1642 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1644 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1645 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1654 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1655 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1657 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1658 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1663 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1664 … AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1666 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
1667 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1676 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1677 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1681 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1682 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1683 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1688 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1689 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
1693 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1694 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
1695 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1704 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1705 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1708 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
1709 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1714 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1715 … AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
1718 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
1719 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1728 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1729 … AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
1732 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1733 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1738 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1739 … AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
1742 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
1743 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1752 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1753 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1756 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
1757 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1758 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1763 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1764 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1767 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
1768 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1769 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1778 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1779 …ldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1782 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1783 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1788 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1789 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1792 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
1793 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1802 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1803 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1805 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
1806 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1811 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1812 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1814 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
1815 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1824 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1825 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1828 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
1829 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1830 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1835 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1836 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
1839 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
1840 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
1841 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1850 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1852 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1853 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
1854 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1855 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1860 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1862 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1863 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
1864 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
1865 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1874 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1876 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1877 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1884 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1886 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1887 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
1898 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1899 …ldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1902 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
1903 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1908 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1909 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1912 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
1913 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1922 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1923 …ldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1926 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
1927 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1932 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1933 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1936 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
1937 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1946 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1947 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1950 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
1951 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1956 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1957 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
1960 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
1961 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1970 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1971 …ldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1974 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
1975 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1980 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1981 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
1984 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
1985 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1994 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1995 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
1998 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
1999 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2004 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2005 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2008 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
2009 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2018 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2020 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2021 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2028 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2030 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2031 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2042 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2044 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2045 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2052 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2054 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2055 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2066 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2068 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2069 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
2076 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2078 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2079 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
2090 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2092 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2093 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
2100 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2102 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2103 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
2114 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2115 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2118 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
2119 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2124 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2125 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2128 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
2129 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2138 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2139 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2142 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
2143 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2148 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2149 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2152 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
2153 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2162 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2163 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2166 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2167 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2172 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2173 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2176 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2177 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2186 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2187 …ldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
2190 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
2191 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2196 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2197 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
2200 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
2201 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2210 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2213 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2214 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2215 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
2216 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2221 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2224 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2225 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2226 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
2227 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2236 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2237 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2240 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
2241 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2246 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2247 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2250 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
2251 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2260 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2261 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2264 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2265 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2270 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2271 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2274 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2275 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2284 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2285 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
2288 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2289 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2294 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2295 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
2298 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
2299 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2308 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2310 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2312 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2326 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2327 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
2329 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
2330 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2335 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2336 ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2
2338 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
2339 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2348 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2350 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2352 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2366 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2368 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2369 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2376 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2378 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2379 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2390 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2392 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2393 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
2394 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2399 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2401 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2402 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
2403 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2412 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2414 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2415 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2422 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2424 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
2425 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
2439 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],…
2440 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
2441 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
2449 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],…
2450 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
2451 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
2467 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],…
2468 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
2469 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
2476 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15]
2477 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2
2480 ; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
2493 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],…
2494 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
2495 … AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
2503 ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],…
2504 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
2505 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
2521 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],…
2522 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
2523 … AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],…
2530 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2531 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2534 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2535 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2549 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
2550 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
2551 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
2552 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
2554 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
2561 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
2562 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
2563 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
2583 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
2584 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3]
2586 … AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],…
2595 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
2596 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2610 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],…
2611 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11]
2613 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2624 ; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],…
2625 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7]
2626 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5]
2644 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],…
2671 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],…
2699 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],…
2700 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7]
2702 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2709 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2712 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2713 ; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
2714 ; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
2715 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2724 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
2727 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2729 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
2753 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
2788 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
2789 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2809 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
2829 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2830 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
2848 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
2850 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2851 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2852 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
2853 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2859 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2860 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
2863 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2864 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2877 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
2893 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2894 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
2898 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
2901 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
2926 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2928 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
2950 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,…
2951 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
2958 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2959 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
2961 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
2962 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2974 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
2989 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2990 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
2992 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
2993 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2998 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2999 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
3001 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3002 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3011 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3028 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
3050 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,ze…
3051 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
3058 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3059 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3061 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3062 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3074 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
3089 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3090 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3092 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3093 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3098 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3099 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3101 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3102 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3111 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3128 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,ze…
3148 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],…
3149 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
3151 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
3158 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3159 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
3161 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
3162 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3174 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
3192 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
3193 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
3195 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7]
3202 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3203 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
3205 ; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
3206 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3218 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
3239 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
3240 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]