1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s 3 4declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) 5declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) 6declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) 7declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) 8 9define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) { 10; CHECK-LABEL: combine_pshufb_pslldq: 11; CHECK: # BB#0: 12; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 13; CHECK-NEXT: retq 14 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) 15 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 16 ret <32 x i8> %2 17} 18 19define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) { 20; CHECK-LABEL: combine_pshufb_psrldq: 21; CHECK: # BB#0: 22; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 23; CHECK-NEXT: retq 24 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) 25 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 26 ret <32 x i8> %2 27} 28 29define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) { 30; CHECK-LABEL: combine_pshufb_vpermd: 31; CHECK: # BB#0: 32; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18] 33; CHECK-NEXT: retq 34 %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>) 35 %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8> 36 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30> 37 ret <32 x i8> %tmp2 38} 39 40define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) { 41; CHECK-LABEL: combine_pshufb_vpermps: 42; CHECK: # BB#0: 43; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18] 44; CHECK-NEXT: retq 45 %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>) 46 %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8> 47 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30> 48 ret <32 x i8> %tmp2 49} 50 51define <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) { 52; CHECK-LABEL: combine_permq_pshufb_as_vperm2i128: 53; CHECK: # BB#0: 54; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 55; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 56; CHECK-NEXT: retq 57 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 58 %2 = bitcast <4 x i64> %1 to <32 x i8> 59 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>) 60 %4 = bitcast <32 x i8> %3 to <4 x i64> 61 %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3> 62 ret <4 x i64> %5 63} 64 65define <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) { 66; CHECK-LABEL: combine_permq_pshufb_as_vpblendd: 67; CHECK: # BB#0: 68; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 69; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 70; CHECK-NEXT: retq 71 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 72 %2 = bitcast <4 x i64> %1 to <32 x i8> 73 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>) 74 ret <32 x i8> %3 75} 76 77define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) { 78; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128: 79; CHECK: # BB#0: 80; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 81; CHECK-NEXT: retq 82 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer) 83 ret <16 x i8> %1 84} 85 86define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) { 87; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256: 88; CHECK: # BB#0: 89; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 90; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 91; CHECK-NEXT: retq 92 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 93 %2 = bitcast <4 x i64> %1 to <32 x i8> 94 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer) 95 %4 = bitcast <32 x i8> %3 to <8 x i32> 96 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer) 97 %6 = bitcast <8 x i32> %5 to <32 x i8> 98 ret <32 x i8> %6 99} 100 101define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) { 102; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128: 103; CHECK: # BB#0: 104; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 105; CHECK-NEXT: retq 106 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>) 107 ret <16 x i8> %1 108} 109 110define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) { 111; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256: 112; CHECK: # BB#0: 113; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 114; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 115; CHECK-NEXT: retq 116 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 117 %2 = bitcast <4 x i64> %1 to <32 x i8> 118 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>) 119 %4 = bitcast <32 x i8> %3 to <8 x i32> 120 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer) 121 %6 = bitcast <8 x i32> %5 to <32 x i8> 122 ret <32 x i8> %6 123} 124 125define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) { 126; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128: 127; CHECK: # BB#0: 128; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 129; CHECK-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 130; CHECK-NEXT: retq 131 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>) 132 %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3> 133 ret <16 x i8> %2 134} 135 136define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) { 137; CHECK-LABEL: combine_permd_as_vpbroadcastd256: 138; CHECK: # BB#0: 139; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 140; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 141; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 142; CHECK-NEXT: retq 143 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 144 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer) 145 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 146 ret <8 x i32> %3 147} 148 149define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) { 150; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128: 151; CHECK: # BB#0: 152; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 153; CHECK-NEXT: retq 154 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) 155 ret <16 x i8> %1 156} 157 158define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) { 159; CHECK-LABEL: combine_permd_as_vpbroadcastq256: 160; CHECK: # BB#0: 161; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 162; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 163; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 164; CHECK-NEXT: retq 165 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 166 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>) 167 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 168 ret <8 x i32> %3 169} 170 171define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) { 172; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128: 173; CHECK: # BB#0: 174; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 175; CHECK-NEXT: retq 176 %1 = bitcast <4 x float> %a to <16 x i8> 177 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>) 178 %3 = bitcast <16 x i8> %2 to <4 x float> 179 ret <4 x float> %3 180} 181 182define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) { 183; CHECK-LABEL: combine_permd_as_vpbroadcastss256: 184; CHECK: # BB#0: 185; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 186; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 187; CHECK-NEXT: retq 188 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 189 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer) 190 ret <8 x float> %2 191} 192 193define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) { 194; CHECK-LABEL: combine_permd_as_vpbroadcastsd256: 195; CHECK: # BB#0: 196; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 197; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 198; CHECK-NEXT: retq 199 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 200 %2 = bitcast <4 x double> %1 to <8 x float> 201 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>) 202 %4 = bitcast <8 x float> %3 to <4 x double> 203 ret <4 x double> %4 204} 205 206define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) { 207; CHECK-LABEL: combine_permd_as_permq: 208; CHECK: # BB#0: 209; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1] 210; CHECK-NEXT: retq 211 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>) 212 ret <8 x i32> %1 213} 214 215define <8 x float> @combine_permps_as_permpd(<8 x float> %a) { 216; CHECK-LABEL: combine_permps_as_permpd: 217; CHECK: # BB#0: 218; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1] 219; CHECK-NEXT: retq 220 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>) 221 ret <8 x float> %1 222} 223 224define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) { 225; CHECK-LABEL: combine_pshufb_as_pslldq: 226; CHECK: # BB#0: 227; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21] 228; CHECK-NEXT: retq 229 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>) 230 ret <32 x i8> %res0 231} 232 233define <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) { 234; CHECK-LABEL: combine_pshufb_as_psrldq: 235; CHECK: # BB#0: 236; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 237; CHECK-NEXT: retq 238 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) 239 ret <32 x i8> %res0 240} 241 242define <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) { 243; CHECK-LABEL: combine_pshufb_as_pshuflw: 244; CHECK: # BB#0: 245; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] 246; CHECK-NEXT: retq 247 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>) 248 ret <32 x i8> %res0 249} 250 251define <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) { 252; CHECK-LABEL: combine_pshufb_as_pshufhw: 253; CHECK: # BB#0: 254; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] 255; CHECK-NEXT: retq 256 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) 257 ret <32 x i8> %res0 258} 259 260define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) { 261; CHECK-LABEL: combine_pshufb_not_as_pshufw: 262; CHECK: # BB#0: 263; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29] 264; CHECK-NEXT: retq 265 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>) 266 %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) 267 ret <32 x i8> %res1 268} 269