1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=ALL,AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512DQ 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=ALL,AVX512VBMI 6 7define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a) { 8; ALL-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u: 9; ALL: # %bb.0: 10; ALL-NEXT: vpsrld $16, %xmm0, %xmm0 11; ALL-NEXT: retq 12 %b = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 13 ret <64 x i8> %b 14} 15 16define <64 x i8> @shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<64 x i8> %a, <64 x i8> %b) { 17; AVX512F-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 18; AVX512F: # %bb.0: 19; AVX512F-NEXT: vpslldq {{.*#+}} ymm1 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 20; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 21; AVX512F-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 22; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 23; AVX512F-NEXT: retq 24; 25; AVX512BW-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 26; AVX512BW: # %bb.0: 27; AVX512BW-NEXT: vpslldq {{.*#+}} zmm0 = zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 28; AVX512BW-NEXT: retq 29; 30; AVX512DQ-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 31; AVX512DQ: # %bb.0: 32; AVX512DQ-NEXT: vpslldq {{.*#+}} ymm1 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 33; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 34; AVX512DQ-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 35; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 36; AVX512DQ-NEXT: retq 37; 38; AVX512VBMI-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 39; AVX512VBMI: # %bb.0: 40; AVX512VBMI-NEXT: vpslldq {{.*#+}} zmm0 = zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 41; AVX512VBMI-NEXT: retq 42 %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 79, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 95, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 111, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 127, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62> 43 ret <64 x i8> %shuffle 44} 45 46define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz(<64 x i8> %a, <64 x i8> %b) { 47; AVX512F-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz: 48; AVX512F: # %bb.0: 49; AVX512F-NEXT: vpsrldq {{.*#+}} ymm1 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 50; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 51; AVX512F-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 52; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 53; AVX512F-NEXT: retq 54; 55; AVX512BW-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz: 56; AVX512BW: # %bb.0: 57; AVX512BW-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zmm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zmm0[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zmm0[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zero,zero 58; AVX512BW-NEXT: retq 59; 60; AVX512DQ-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz: 61; AVX512DQ: # %bb.0: 62; AVX512DQ-NEXT: vpsrldq {{.*#+}} ymm1 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 63; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 64; AVX512DQ-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 65; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 66; AVX512DQ-NEXT: retq 67; 68; AVX512VBMI-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz: 69; AVX512VBMI: # %bb.0: 70; AVX512VBMI-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zmm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zmm0[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zmm0[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zero,zero 71; AVX512VBMI-NEXT: retq 72 %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 64, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 64, i32 64, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 64, i32 64, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 64> 73 ret <64 x i8> %shuffle 74} 75 76define <64 x i8> @shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<64 x i8> %a, <64 x i8> %b) { 77; AVX512F-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 78; AVX512F: # %bb.0: 79; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 80; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 81; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[15],ymm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm2[31],ymm3[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 82; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 83; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 84; AVX512F-NEXT: retq 85; 86; AVX512BW-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 87; AVX512BW: # %bb.0: 88; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 89; AVX512BW-NEXT: retq 90; 91; AVX512DQ-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 92; AVX512DQ: # %bb.0: 93; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 94; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 95; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[15],ymm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm2[31],ymm3[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 96; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 97; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 98; AVX512DQ-NEXT: retq 99; 100; AVX512VBMI-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62: 101; AVX512VBMI: # %bb.0: 102; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] 103; AVX512VBMI-NEXT: retq 104 %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 79, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 95, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 111, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 127, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62> 105 ret <64 x i8> %shuffle 106} 107 108 109define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) { 110; AVX512F-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 111; AVX512F: # %bb.0: 112; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,0,0] 113; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0 114; AVX512F-NEXT: retq 115; 116; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 117; AVX512BW: # %bb.0: 118; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,0,0] 119; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 120; AVX512BW-NEXT: retq 121; 122; AVX512DQ-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 123; AVX512DQ: # %bb.0: 124; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,0,0] 125; AVX512DQ-NEXT: vandps %zmm1, %zmm0, %zmm0 126; AVX512DQ-NEXT: retq 127; 128; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: 129; AVX512VBMI: # %bb.0: 130; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,0,0] 131; AVX512VBMI-NEXT: vpandq %zmm1, %zmm0, %zmm0 132; AVX512VBMI-NEXT: retq 133 %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 0, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64> 134 ret <64 x i8> %shuffle 135} 136 137define <64 x i8> @shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<64 x i8> %a, <64 x i8> %b) { 138; AVX512F-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 139; AVX512F: # %bb.0: 140; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 141; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 142; AVX512F-NEXT: retq 143; 144; AVX512BW-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 145; AVX512BW: # %bb.0: 146; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0 147; AVX512BW-NEXT: retq 148; 149; AVX512DQ-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 150; AVX512DQ: # %bb.0: 151; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0 152; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 153; AVX512DQ-NEXT: retq 154; 155; AVX512VBMI-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 156; AVX512VBMI: # %bb.0: 157; AVX512VBMI-NEXT: vpbroadcastb %xmm0, %zmm0 158; AVX512VBMI-NEXT: retq 159 %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 160 ret <64 x i8> %shuffle 161} 162 163define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00(<64 x i8> %a) { 164; AVX512F-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 165; AVX512F: # %bb.0: 166; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 167; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm2 168; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 169; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm0 170; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 171; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] 172; AVX512F-NEXT: retq 173; 174; AVX512BW-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 175; AVX512BW: # %bb.0: 176; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48] 177; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] 178; AVX512BW-NEXT: retq 179; 180; AVX512DQ-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 181; AVX512DQ: # %bb.0: 182; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 183; AVX512DQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 184; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 185; AVX512DQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 186; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 187; AVX512DQ-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] 188; AVX512DQ-NEXT: retq 189; 190; AVX512VBMI-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00: 191; AVX512VBMI: # %bb.0: 192; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm1 = [63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] 193; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 194; AVX512VBMI-NEXT: retq 195 %shuffle = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 196 ret <64 x i8> %shuffle 197} 198 199; PR44379 200define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57(<64 x i8> %a) { 201; ALL-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57: 202; ALL: # %bb.0: 203; ALL-NEXT: vprolq $48, %zmm0, %zmm0 204; ALL-NEXT: retq 205 %shuffle = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 32, i32 33, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 40, i32 41, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 48, i32 49, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 56, i32 57> 206 ret <64 x i8> %shuffle 207} 208 209define <64 x i8> @insert_dup_mem_v64i8_i32(i32* %ptr) { 210; AVX512F-LABEL: insert_dup_mem_v64i8_i32: 211; AVX512F: # %bb.0: 212; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0 213; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 214; AVX512F-NEXT: retq 215; 216; AVX512BW-LABEL: insert_dup_mem_v64i8_i32: 217; AVX512BW: # %bb.0: 218; AVX512BW-NEXT: vpbroadcastb (%rdi), %zmm0 219; AVX512BW-NEXT: retq 220; 221; AVX512DQ-LABEL: insert_dup_mem_v64i8_i32: 222; AVX512DQ: # %bb.0: 223; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0 224; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 225; AVX512DQ-NEXT: retq 226; 227; AVX512VBMI-LABEL: insert_dup_mem_v64i8_i32: 228; AVX512VBMI: # %bb.0: 229; AVX512VBMI-NEXT: vpbroadcastb (%rdi), %zmm0 230; AVX512VBMI-NEXT: retq 231 %tmp = load i32, i32* %ptr, align 4 232 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 233 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 234 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <64 x i32> zeroinitializer 235 ret <64 x i8> %tmp3 236} 237 238define <64 x i8> @insert_dup_mem_v64i8_sext_i8(i8* %ptr) { 239; AVX512F-LABEL: insert_dup_mem_v64i8_sext_i8: 240; AVX512F: # %bb.0: 241; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0 242; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 243; AVX512F-NEXT: retq 244; 245; AVX512BW-LABEL: insert_dup_mem_v64i8_sext_i8: 246; AVX512BW: # %bb.0: 247; AVX512BW-NEXT: vpbroadcastb (%rdi), %zmm0 248; AVX512BW-NEXT: retq 249; 250; AVX512DQ-LABEL: insert_dup_mem_v64i8_sext_i8: 251; AVX512DQ: # %bb.0: 252; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0 253; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 254; AVX512DQ-NEXT: retq 255; 256; AVX512VBMI-LABEL: insert_dup_mem_v64i8_sext_i8: 257; AVX512VBMI: # %bb.0: 258; AVX512VBMI-NEXT: vpbroadcastb (%rdi), %zmm0 259; AVX512VBMI-NEXT: retq 260 %tmp = load i8, i8* %ptr, align 1 261 %tmp1 = sext i8 %tmp to i32 262 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 263 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 264 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <64 x i32> zeroinitializer 265 ret <64 x i8> %tmp4 266} 267 268define <64 x i8> @insert_dup_elt1_mem_v64i8_i32(i32* %ptr) { 269; AVX512F-LABEL: insert_dup_elt1_mem_v64i8_i32: 270; AVX512F: # %bb.0: 271; AVX512F-NEXT: vpbroadcastb 1(%rdi), %ymm0 272; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 273; AVX512F-NEXT: retq 274; 275; AVX512BW-LABEL: insert_dup_elt1_mem_v64i8_i32: 276; AVX512BW: # %bb.0: 277; AVX512BW-NEXT: vpbroadcastb 1(%rdi), %zmm0 278; AVX512BW-NEXT: retq 279; 280; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_i32: 281; AVX512DQ: # %bb.0: 282; AVX512DQ-NEXT: vpbroadcastb 1(%rdi), %ymm0 283; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 284; AVX512DQ-NEXT: retq 285; 286; AVX512VBMI-LABEL: insert_dup_elt1_mem_v64i8_i32: 287; AVX512VBMI: # %bb.0: 288; AVX512VBMI-NEXT: vpbroadcastb 1(%rdi), %zmm0 289; AVX512VBMI-NEXT: retq 290 %tmp = load i32, i32* %ptr, align 4 291 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 292 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 293 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 294 ret <64 x i8> %tmp3 295} 296 297define <64 x i8> @insert_dup_elt3_mem_v64i8_i32(i32* %ptr) { 298; AVX512F-LABEL: insert_dup_elt3_mem_v64i8_i32: 299; AVX512F: # %bb.0: 300; AVX512F-NEXT: vpbroadcastb 3(%rdi), %ymm0 301; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 302; AVX512F-NEXT: retq 303; 304; AVX512BW-LABEL: insert_dup_elt3_mem_v64i8_i32: 305; AVX512BW: # %bb.0: 306; AVX512BW-NEXT: vpbroadcastb 3(%rdi), %zmm0 307; AVX512BW-NEXT: retq 308; 309; AVX512DQ-LABEL: insert_dup_elt3_mem_v64i8_i32: 310; AVX512DQ: # %bb.0: 311; AVX512DQ-NEXT: vpbroadcastb 3(%rdi), %ymm0 312; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 313; AVX512DQ-NEXT: retq 314; 315; AVX512VBMI-LABEL: insert_dup_elt3_mem_v64i8_i32: 316; AVX512VBMI: # %bb.0: 317; AVX512VBMI-NEXT: vpbroadcastb 3(%rdi), %zmm0 318; AVX512VBMI-NEXT: retq 319 %tmp = load i32, i32* %ptr, align 4 320 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 321 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 322 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 323 ret <64 x i8> %tmp3 324} 325 326define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) { 327; AVX512F-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: 328; AVX512F: # %bb.0: 329; AVX512F-NEXT: movsbl (%rdi), %eax 330; AVX512F-NEXT: shrl $8, %eax 331; AVX512F-NEXT: vmovd %eax, %xmm0 332; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 333; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 334; AVX512F-NEXT: retq 335; 336; AVX512BW-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: 337; AVX512BW: # %bb.0: 338; AVX512BW-NEXT: movsbl (%rdi), %eax 339; AVX512BW-NEXT: shrl $8, %eax 340; AVX512BW-NEXT: vpbroadcastb %eax, %zmm0 341; AVX512BW-NEXT: retq 342; 343; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: 344; AVX512DQ: # %bb.0: 345; AVX512DQ-NEXT: movsbl (%rdi), %eax 346; AVX512DQ-NEXT: shrl $8, %eax 347; AVX512DQ-NEXT: vmovd %eax, %xmm0 348; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0 349; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 350; AVX512DQ-NEXT: retq 351; 352; AVX512VBMI-LABEL: insert_dup_elt1_mem_v64i8_sext_i8: 353; AVX512VBMI: # %bb.0: 354; AVX512VBMI-NEXT: movsbl (%rdi), %eax 355; AVX512VBMI-NEXT: shrl $8, %eax 356; AVX512VBMI-NEXT: vpbroadcastb %eax, %zmm0 357; AVX512VBMI-NEXT: retq 358 %tmp = load i8, i8* %ptr, align 1 359 %tmp1 = sext i8 %tmp to i32 360 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 361 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 362 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 363 ret <64 x i8> %tmp4 364} 365 366define <64 x i8> @shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz(<64 x i8> %a) { 367; AVX512F-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz: 368; AVX512F: # %bb.0: 369; AVX512F-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 370; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 371; AVX512F-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 372; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 373; AVX512F-NEXT: retq 374; 375; AVX512BW-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz: 376; AVX512BW: # %bb.0: 377; AVX512BW-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 378; AVX512BW-NEXT: retq 379; 380; AVX512DQ-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz: 381; AVX512DQ: # %bb.0: 382; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 383; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 384; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 385; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 386; AVX512DQ-NEXT: retq 387; 388; AVX512VBMI-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz: 389; AVX512VBMI: # %bb.0: 390; AVX512VBMI-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 391; AVX512VBMI-NEXT: retq 392 %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 65, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 66, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 67, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 68, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 69, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 70, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 71, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 393 ret <64 x i8> %shuffle 394} 395 396define <64 x i8> @shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz(<64 x i8> %a) { 397; AVX512F-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz: 398; AVX512F: # %bb.0: 399; AVX512F-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 400; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 401; AVX512F-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 402; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 403; AVX512F-NEXT: retq 404; 405; AVX512BW-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz: 406; AVX512BW: # %bb.0: 407; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 408; AVX512BW-NEXT: retq 409; 410; AVX512DQ-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz: 411; AVX512DQ: # %bb.0: 412; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 413; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 414; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 415; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 416; AVX512DQ-NEXT: retq 417; 418; AVX512VBMI-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz: 419; AVX512VBMI: # %bb.0: 420; AVX512VBMI-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 421; AVX512VBMI-NEXT: retq 422 %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 0, i32 0, i32 65, i32 0, i32 0, i32 0, i32 66, i32 0, i32 0, i32 0, i32 67, i32 0, i32 0, i32 0, i32 68, i32 0, i32 0, i32 0, i32 69, i32 0, i32 0, i32 0, i32 70, i32 0, i32 0, i32 0, i32 71, i32 0, i32 0, i32 0, i32 72, i32 0, i32 0, i32 0, i32 73, i32 0, i32 0, i32 0, i32 74, i32 0, i32 0, i32 0, i32 75, i32 0, i32 0, i32 0, i32 76, i32 0, i32 0, i32 0, i32 77, i32 0, i32 0, i32 0, i32 78, i32 0, i32 0, i32 0, i32 79, i32 0, i32 0, i32 0> 423 ret <64 x i8> %shuffle 424} 425 426define <64 x i8> @shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz(<64 x i8> %a) { 427; AVX512F-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz: 428; AVX512F: # %bb.0: 429; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 430; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 431; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 432; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 433; AVX512F-NEXT: retq 434; 435; AVX512BW-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz: 436; AVX512BW: # %bb.0: 437; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 438; AVX512BW-NEXT: retq 439; 440; AVX512DQ-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz: 441; AVX512DQ: # %bb.0: 442; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 443; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 444; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 445; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 446; AVX512DQ-NEXT: retq 447; 448; AVX512VBMI-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz: 449; AVX512VBMI: # %bb.0: 450; AVX512VBMI-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 451; AVX512VBMI-NEXT: retq 452 %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 65, i32 0, i32 66, i32 0, i32 67, i32 0, i32 68, i32 0, i32 69, i32 0, i32 70, i32 0, i32 71, i32 0, i32 72, i32 0, i32 73, i32 0, i32 74, i32 0, i32 75, i32 0, i32 76, i32 0, i32 77, i32 0, i32 78, i32 0, i32 79, i32 0, i32 80, i32 0, i32 81, i32 0, i32 82, i32 0, i32 83, i32 0, i32 84, i32 0, i32 85, i32 0, i32 86, i32 0, i32 87, i32 0, i32 88, i32 0, i32 89, i32 0, i32 90, i32 0, i32 91, i32 0, i32 92, i32 0, i32 93, i32 0, i32 94, i32 0, i32 95, i32 0> 453 ret <64 x i8> %shuffle 454} 455 456define <64 x i8> @shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz(<64 x i8> %a) { 457; AVX512F-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: 458; AVX512F: # %bb.0: 459; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 460; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128,15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128] 461; AVX512F-NEXT: vpshufb %ymm2, %ymm1, %ymm1 462; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 463; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 464; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 465; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 466; AVX512F-NEXT: retq 467; 468; AVX512BW-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: 469; AVX512BW: # %bb.0: 470; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] 471; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zmm0[13],zero,zmm0[11],zero,zmm0[9],zero,zmm0[7],zero,zmm0[5],zero,zmm0[3],zero,zmm0[1],zero,zmm0[31],zero,zmm0[29],zero,zmm0[27],zero,zmm0[25],zero,zmm0[23],zero,zmm0[21],zero,zmm0[19],zero,zmm0[17],zero,zmm0[47],zero,zmm0[45],zero,zmm0[43],zero,zmm0[41],zero,zmm0[39],zero,zmm0[37],zero,zmm0[35],zero,zmm0[33],zero,zmm0[63],zero,zmm0[61],zero,zmm0[59],zero,zmm0[57],zero,zmm0[55],zero,zmm0[53],zero,zmm0[51],zero,zmm0[49],zero 472; AVX512BW-NEXT: retq 473; 474; AVX512DQ-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: 475; AVX512DQ: # %bb.0: 476; AVX512DQ-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1] 477; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128,15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128] 478; AVX512DQ-NEXT: vpshufb %ymm2, %ymm1, %ymm1 479; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 480; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 481; AVX512DQ-NEXT: vpshufb %ymm2, %ymm0, %ymm0 482; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 483; AVX512DQ-NEXT: retq 484; 485; AVX512VBMI-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz: 486; AVX512VBMI: # %bb.0: 487; AVX512VBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1 488; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [63,65,61,67,59,69,57,71,55,73,53,75,51,77,49,79,47,81,45,83,43,85,41,87,39,89,37,91,35,93,33,95,31,97,29,99,27,101,25,103,23,105,21,107,19,109,17,111,15,113,13,115,11,117,9,119,7,121,5,123,3,125,1,127] 489; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 490; AVX512VBMI-NEXT: retq 491 %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 63, i32 64, i32 61, i32 64, i32 59, i32 64, i32 57, i32 64, i32 55, i32 64, i32 53, i32 64, i32 51, i32 64, i32 49, i32 64, i32 47, i32 64, i32 45, i32 64, i32 43, i32 64, i32 41, i32 64, i32 39, i32 64, i32 37, i32 64, i32 35, i32 64, i32 33, i32 64, i32 31, i32 64, i32 29, i32 64, i32 27, i32 64, i32 25, i32 64, i32 23, i32 64, i32 21, i32 64, i32 19, i32 64, i32 17, i32 64, i32 15, i32 64, i32 13, i32 64, i32 11, i32 64, i32 9, i32 64, i32 7, i32 64, i32 5, i32 64, i32 3, i32 64, i32 1, i32 64> 492 ret <64 x i8> %shuffle 493} 494 495define <64 x i8> @shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126(<64 x i8> %a, <64 x i8> %b) { 496; AVX512F-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126: 497; AVX512F: # %bb.0: 498; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 499; AVX512F-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1] 500; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 501; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm2 502; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14] 503; AVX512F-NEXT: vpshufb %ymm3, %ymm2, %ymm2 504; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 505; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 506; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 507; AVX512F-NEXT: vpshufb %ymm3, %ymm0, %ymm0 508; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 509; AVX512F-NEXT: retq 510; 511; AVX512BW-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126: 512; AVX512BW: # %bb.0: 513; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 514; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1] 515; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3 516; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 517; AVX512BW-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm2 518; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14] 519; AVX512BW-NEXT: vpshufb %ymm3, %ymm2, %ymm2 520; AVX512BW-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 521; AVX512BW-NEXT: vpshufb %ymm3, %ymm0, %ymm0 522; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 523; AVX512BW-NEXT: retq 524; 525; AVX512DQ-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126: 526; AVX512DQ: # %bb.0: 527; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 528; AVX512DQ-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1] 529; AVX512DQ-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 530; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm2 531; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14] 532; AVX512DQ-NEXT: vpshufb %ymm3, %ymm2, %ymm2 533; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 534; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] 535; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 536; AVX512DQ-NEXT: vpshufb %ymm3, %ymm0, %ymm0 537; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 538; AVX512DQ-NEXT: retq 539; 540; AVX512VBMI-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126: 541; AVX512VBMI: # %bb.0: 542; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [63,64,61,66,59,68,57,70,55,72,53,74,51,76,49,78,47,80,45,82,43,84,41,86,39,88,37,90,35,92,33,94,31,96,29,98,27,100,25,102,23,104,21,106,19,108,17,110,15,112,13,114,11,116,9,118,7,120,5,122,3,124,1,126] 543; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 544; AVX512VBMI-NEXT: retq 545 %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 63, i32 64, i32 61, i32 66, i32 59, i32 68, i32 57, i32 70, i32 55, i32 72, i32 53, i32 74, i32 51, i32 76, i32 49, i32 78, i32 47, i32 80, i32 45, i32 82, i32 43, i32 84, i32 41, i32 86, i32 39, i32 88, i32 37, i32 90, i32 35, i32 92, i32 33, i32 94, i32 31, i32 96, i32 29, i32 98, i32 27, i32 100, i32 25, i32 102, i32 23, i32 104, i32 21, i32 106, i32 19, i32 108, i32 17, i32 110, i32 15, i32 112, i32 13, i32 114, i32 11, i32 116, i32 9, i32 118, i32 7, i32 120, i32 5, i32 122, i32 3, i32 124, i32 1, i32 126> 546 ret <64 x i8> %shuffle 547} 548 549define <64 x i8> @shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind { 550; AVX512F-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 551; AVX512F: # %bb.0: 552; AVX512F-NEXT: vpsrad $25, %zmm0, %zmm0 553; AVX512F-NEXT: vpsrad $25, %zmm1, %zmm1 554; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 555; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 556; AVX512F-NEXT: vpackssdw %ymm2, %ymm3, %ymm2 557; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 558; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 559; AVX512F-NEXT: retq 560; 561; AVX512BW-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 562; AVX512BW: # %bb.0: 563; AVX512BW-NEXT: vpsrad $25, %zmm0, %zmm0 564; AVX512BW-NEXT: vpsrad $25, %zmm1, %zmm1 565; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 566; AVX512BW-NEXT: retq 567; 568; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 569; AVX512DQ: # %bb.0: 570; AVX512DQ-NEXT: vpsrad $25, %zmm0, %zmm0 571; AVX512DQ-NEXT: vpsrad $25, %zmm1, %zmm1 572; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 573; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 574; AVX512DQ-NEXT: vpackssdw %ymm2, %ymm3, %ymm2 575; AVX512DQ-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 576; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 577; AVX512DQ-NEXT: retq 578; 579; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 580; AVX512VBMI: # %bb.0: 581; AVX512VBMI-NEXT: vpsrad $25, %zmm0, %zmm0 582; AVX512VBMI-NEXT: vpsrad $25, %zmm1, %zmm1 583; AVX512VBMI-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 584; AVX512VBMI-NEXT: retq 585 %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 586 %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 587 %3 = bitcast <16 x i32> %1 to <64 x i8> 588 %4 = bitcast <16 x i32> %2 to <64 x i8> 589 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 64, i32 65, i32 68, i32 69, i32 72, i32 73, i32 76, i32 77, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32 80, i32 81, i32 84, i32 85, i32 88, i32 89, i32 92, i32 93, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32 96, i32 97, i32 100, i32 101, i32 104, i32 105, i32 108, i32 109, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61, i32 112, i32 113, i32 116, i32 117, i32 120, i32 121, i32 124, i32 125> 590 ret <64 x i8> %5 591} 592 593define <64 x i8> @shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind { 594; AVX512F-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 595; AVX512F: # %bb.0: 596; AVX512F-NEXT: vpsrad $25, %zmm0, %zmm0 597; AVX512F-NEXT: vpsrad $25, %zmm1, %zmm1 598; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 599; AVX512F-NEXT: vpacksswb %ymm2, %ymm2, %ymm2 600; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 601; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 602; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 603; AVX512F-NEXT: vpacksswb %ymm0, %ymm0, %ymm0 604; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 605; AVX512F-NEXT: retq 606; 607; AVX512BW-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 608; AVX512BW: # %bb.0: 609; AVX512BW-NEXT: vpsrad $25, %zmm0, %zmm0 610; AVX512BW-NEXT: vpsrad $25, %zmm1, %zmm1 611; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 612; AVX512BW-NEXT: vpacksswb %zmm0, %zmm0, %zmm0 613; AVX512BW-NEXT: retq 614; 615; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 616; AVX512DQ: # %bb.0: 617; AVX512DQ-NEXT: vpsrad $25, %zmm0, %zmm0 618; AVX512DQ-NEXT: vpsrad $25, %zmm1, %zmm1 619; AVX512DQ-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 620; AVX512DQ-NEXT: vpacksswb %ymm2, %ymm2, %ymm2 621; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 622; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 623; AVX512DQ-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 624; AVX512DQ-NEXT: vpacksswb %ymm0, %ymm0, %ymm0 625; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 626; AVX512DQ-NEXT: retq 627; 628; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 629; AVX512VBMI: # %bb.0: 630; AVX512VBMI-NEXT: vpsrad $25, %zmm0, %zmm0 631; AVX512VBMI-NEXT: vpsrad $25, %zmm1, %zmm1 632; AVX512VBMI-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 633; AVX512VBMI-NEXT: vpacksswb %zmm0, %zmm0, %zmm0 634; AVX512VBMI-NEXT: retq 635 %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 636 %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 637 %3 = bitcast <16 x i32> %1 to <64 x i8> 638 %4 = bitcast <16 x i32> %2 to <64 x i8> 639 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 4, i32 8, i32 12, i32 64, i32 68, i32 72, i32 76, i32 0, i32 4, i32 8, i32 12, i32 64, i32 68, i32 72, i32 76, i32 16, i32 20, i32 24, i32 28, i32 80, i32 84, i32 88, i32 92, i32 16, i32 20, i32 24, i32 28, i32 80, i32 84, i32 88, i32 92, i32 32, i32 36, i32 40, i32 44, i32 96, i32 100, i32 104, i32 108, i32 32, i32 36, i32 40, i32 44, i32 96, i32 100, i32 104, i32 108, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124> 640 ret <64 x i8> %5 641} 642 643define <64 x i8> @shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind { 644; AVX512F-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 645; AVX512F: # %bb.0: 646; AVX512F-NEXT: vpsrld $25, %zmm0, %zmm0 647; AVX512F-NEXT: vpsrld $25, %zmm1, %zmm1 648; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 649; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 650; AVX512F-NEXT: vpackusdw %ymm2, %ymm3, %ymm2 651; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 652; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 653; AVX512F-NEXT: retq 654; 655; AVX512BW-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 656; AVX512BW: # %bb.0: 657; AVX512BW-NEXT: vpsrld $25, %zmm0, %zmm0 658; AVX512BW-NEXT: vpsrld $25, %zmm1, %zmm1 659; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 660; AVX512BW-NEXT: retq 661; 662; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 663; AVX512DQ: # %bb.0: 664; AVX512DQ-NEXT: vpsrld $25, %zmm0, %zmm0 665; AVX512DQ-NEXT: vpsrld $25, %zmm1, %zmm1 666; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 667; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 668; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm3, %ymm2 669; AVX512DQ-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 670; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 671; AVX512DQ-NEXT: retq 672; 673; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: 674; AVX512VBMI: # %bb.0: 675; AVX512VBMI-NEXT: vpsrld $25, %zmm0, %zmm0 676; AVX512VBMI-NEXT: vpsrld $25, %zmm1, %zmm1 677; AVX512VBMI-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 678; AVX512VBMI-NEXT: retq 679 %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 680 %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 681 %3 = bitcast <16 x i32> %1 to <64 x i8> 682 %4 = bitcast <16 x i32> %2 to <64 x i8> 683 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 64, i32 65, i32 68, i32 69, i32 72, i32 73, i32 76, i32 77, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32 80, i32 81, i32 84, i32 85, i32 88, i32 89, i32 92, i32 93, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32 96, i32 97, i32 100, i32 101, i32 104, i32 105, i32 108, i32 109, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61, i32 112, i32 113, i32 116, i32 117, i32 120, i32 121, i32 124, i32 125> 684 ret <64 x i8> %5 685} 686 687define <64 x i8> @shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind { 688; AVX512F-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 689; AVX512F: # %bb.0: 690; AVX512F-NEXT: vpsrld $25, %zmm0, %zmm0 691; AVX512F-NEXT: vpsrld $25, %zmm1, %zmm1 692; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 693; AVX512F-NEXT: vpackuswb %ymm2, %ymm2, %ymm2 694; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 695; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 696; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 697; AVX512F-NEXT: vpackuswb %ymm0, %ymm0, %ymm0 698; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 699; AVX512F-NEXT: retq 700; 701; AVX512BW-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 702; AVX512BW: # %bb.0: 703; AVX512BW-NEXT: vpsrld $25, %zmm0, %zmm0 704; AVX512BW-NEXT: vpsrld $25, %zmm1, %zmm1 705; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 706; AVX512BW-NEXT: vpackuswb %zmm0, %zmm0, %zmm0 707; AVX512BW-NEXT: retq 708; 709; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 710; AVX512DQ: # %bb.0: 711; AVX512DQ-NEXT: vpsrld $25, %zmm0, %zmm0 712; AVX512DQ-NEXT: vpsrld $25, %zmm1, %zmm1 713; AVX512DQ-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 714; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm2, %ymm2 715; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 716; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 717; AVX512DQ-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 718; AVX512DQ-NEXT: vpackuswb %ymm0, %ymm0, %ymm0 719; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 720; AVX512DQ-NEXT: retq 721; 722; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: 723; AVX512VBMI: # %bb.0: 724; AVX512VBMI-NEXT: vpsrld $25, %zmm0, %zmm0 725; AVX512VBMI-NEXT: vpsrld $25, %zmm1, %zmm1 726; AVX512VBMI-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 727; AVX512VBMI-NEXT: vpackuswb %zmm0, %zmm0, %zmm0 728; AVX512VBMI-NEXT: retq 729 %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 730 %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25> 731 %3 = bitcast <16 x i32> %1 to <64 x i8> 732 %4 = bitcast <16 x i32> %2 to <64 x i8> 733 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 4, i32 8, i32 12, i32 64, i32 68, i32 72, i32 76, i32 0, i32 4, i32 8, i32 12, i32 64, i32 68, i32 72, i32 76, i32 16, i32 20, i32 24, i32 28, i32 80, i32 84, i32 88, i32 92, i32 16, i32 20, i32 24, i32 28, i32 80, i32 84, i32 88, i32 92, i32 32, i32 36, i32 40, i32 44, i32 96, i32 100, i32 104, i32 108, i32 32, i32 36, i32 40, i32 44, i32 96, i32 100, i32 104, i32 108, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124> 734 ret <64 x i8> %5 735} 736 737define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) { 738; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: 739; AVX512F: # %bb.0: 740; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 741; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 742; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 743; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 744; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 745; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 746; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 747; AVX512F-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 748; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 749; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7] 750; AVX512F-NEXT: retq 751; 752; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: 753; AVX512BW: # %bb.0: 754; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 755; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 756; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[2,3,6,7],zmm1[2,3,6,7] 757; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[0,1,4,5] 758; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 759; AVX512BW-NEXT: retq 760; 761; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: 762; AVX512DQ: # %bb.0: 763; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 764; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 765; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 766; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 767; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 768; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 769; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 770; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 771; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 772; AVX512DQ-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7] 773; AVX512DQ-NEXT: retq 774; 775; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: 776; AVX512VBMI: # %bb.0: 777; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127] 778; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 779; AVX512VBMI-NEXT: retq 780 %1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 781 %2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 782 %3 = bitcast <32 x i16> %1 to <64 x i8> 783 %4 = bitcast <32 x i16> %2 to <64 x i8> 784 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126> 785 ret <64 x i8> %5 786} 787 788define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) { 789; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126: 790; AVX512F: # %bb.0: 791; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm2 792; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 793; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 794; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm3 795; AVX512F-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 796; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 797; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 798; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 799; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 800; AVX512F-NEXT: retq 801; 802; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126: 803; AVX512BW: # %bb.0: 804; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 805; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 806; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 807; AVX512BW-NEXT: retq 808; 809; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126: 810; AVX512DQ: # %bb.0: 811; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm2 812; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 813; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 814; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm3 815; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 816; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 817; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 818; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 819; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 820; AVX512DQ-NEXT: retq 821; 822; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126: 823; AVX512VBMI: # %bb.0: 824; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,5,7,9,11,13,15,65,67,69,71,73,75,77,79,17,19,21,23,25,27,29,31,81,83,85,87,89,91,93,95,33,35,37,39,41,43,45,47,97,99,101,103,105,107,109,111,49,51,53,55,57,59,61,63,113,115,117,119,121,123,125,127] 825; AVX512VBMI-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 826; AVX512VBMI-NEXT: retq 827 %1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 828 %2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 829 %3 = bitcast <32 x i16> %1 to <64 x i8> 830 %4 = bitcast <32 x i16> %2 to <64 x i8> 831 %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126> 832 ret <64 x i8> %5 833} 834