Lines Matching refs:AVX512

2 …ple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX…
3 …own-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
4 …own-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
5 …-mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
155 ; AVX512-LABEL: shuffle_v64i8_to_v16i8_1:
156 ; AVX512: # %bb.0:
157 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
158 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
159 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
160 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
161 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u>
162 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
163 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
164 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
165 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
166 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
167 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
168 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
169 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
170 ; AVX512-NEXT: vmovdqa %xmm0, (%rsi)
171 ; AVX512-NEXT: retq
179 ; AVX512-LABEL: shuffle_v64i8_to_v16i8_2:
180 ; AVX512: # %bb.0:
181 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
182 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
183 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
184 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
185 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,u,u,2,6,10,14,u,u,u,u,u,u,u,u>
186 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
187 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
188 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
189 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u>
190 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
191 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
192 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
193 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
194 ; AVX512-NEXT: vmovdqa %xmm0, (%rsi)
195 ; AVX512-NEXT: retq
203 ; AVX512-LABEL: shuffle_v64i8_to_v16i8_3:
204 ; AVX512: # %bb.0:
205 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
206 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
207 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
208 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
209 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,u,u,3,7,11,15,u,u,u,u,u,u,u,u>
210 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
211 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
212 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
213 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u>
214 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
215 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
216 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
217 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
218 ; AVX512-NEXT: vmovdqa %xmm0, (%rsi)
219 ; AVX512-NEXT: retq
404 ; AVX512-LABEL: shuffle_v64i8_to_v8i8_1:
405 ; AVX512: # %bb.0:
406 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
407 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
408 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
409 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
410 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,1,9,u,u,u,u,u,u,u,u,u,u,u,u>
411 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
412 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
413 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3…
414 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <1,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
415 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
416 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
417 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
418 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
419 ; AVX512-NEXT: vmovq %xmm0, (%rsi)
420 ; AVX512-NEXT: retq
428 ; AVX512-LABEL: shuffle_v64i8_to_v8i8_2:
429 ; AVX512: # %bb.0:
430 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
431 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
432 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
433 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
434 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,2,10,u,u,u,u,u,u,u,u,u,u,u,u>
435 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
436 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
437 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3…
438 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <2,10,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
439 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
440 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
441 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
442 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
443 ; AVX512-NEXT: vmovq %xmm0, (%rsi)
444 ; AVX512-NEXT: retq
452 ; AVX512-LABEL: shuffle_v64i8_to_v8i8_3:
453 ; AVX512: # %bb.0:
454 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
455 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
456 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
457 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
458 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,3,11,u,u,u,u,u,u,u,u,u,u,u,u>
459 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
460 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
461 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3…
462 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <3,11,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
463 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
464 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
465 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
466 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
467 ; AVX512-NEXT: vmovq %xmm0, (%rsi)
468 ; AVX512-NEXT: retq
476 ; AVX512-LABEL: shuffle_v64i8_to_v8i8_4:
477 ; AVX512: # %bb.0:
478 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
479 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
480 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
481 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
482 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,4,12,u,u,u,u,u,u,u,u,u,u,u,u>
483 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
484 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
485 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3…
486 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <4,12,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
487 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
488 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
489 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
490 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
491 ; AVX512-NEXT: vmovq %xmm0, (%rsi)
492 ; AVX512-NEXT: retq
500 ; AVX512-LABEL: shuffle_v64i8_to_v8i8_5:
501 ; AVX512: # %bb.0:
502 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
503 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
504 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
505 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
506 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,5,13,u,u,u,u,u,u,u,u,u,u,u,u>
507 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
508 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
509 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3…
510 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <5,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
511 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
512 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
513 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
514 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
515 ; AVX512-NEXT: vmovq %xmm0, (%rsi)
516 ; AVX512-NEXT: retq
524 ; AVX512-LABEL: shuffle_v64i8_to_v8i8_6:
525 ; AVX512: # %bb.0:
526 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
527 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
528 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
529 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
530 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,6,14,u,u,u,u,u,u,u,u,u,u,u,u>
531 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
532 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
533 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3…
534 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <6,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
535 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
536 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
537 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
538 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
539 ; AVX512-NEXT: vmovq %xmm0, (%rsi)
540 ; AVX512-NEXT: retq
548 ; AVX512-LABEL: shuffle_v64i8_to_v8i8_7:
549 ; AVX512: # %bb.0:
550 ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
551 ; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1
552 ; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2
553 ; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3
554 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <u,u,7,15,u,u,u,u,u,u,u,u,u,u,u,u>
555 ; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm3
556 ; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2
557 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3…
558 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <7,15,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
559 ; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1
560 ; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0
561 ; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3…
562 ; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2,3]
563 ; AVX512-NEXT: vmovq %xmm0, (%rsi)
564 ; AVX512-NEXT: retq