1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 5target triple = "x86_64-unknown-unknown" 6 7define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 8; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 9; AVX1: # BB#0: 10; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 11; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 12; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 13; AVX1-NEXT: retq 14; 15; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 16; AVX2: # BB#0: 17; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 18; AVX2-NEXT: retq 19 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20 ret <16 x i16> %shuffle 21} 22 23define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) { 24; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 25; AVX1: # BB#0: 26; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 27; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 28; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 29; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 30; AVX1-NEXT: retq 31; 32; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00: 33; AVX2: # BB#0: 34; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 35; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 36; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 37; AVX2-NEXT: retq 38 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 39 ret <16 x i16> %shuffle 40} 41 42define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) { 43; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 44; AVX1: # BB#0: 45; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 46; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 47; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 48; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 49; AVX1-NEXT: retq 50; 51; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00: 52; AVX2: # BB#0: 53; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 54; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 55; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 56; AVX2-NEXT: retq 57 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 58 ret <16 x i16> %shuffle 59} 60 61define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) { 62; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 63; AVX1: # BB#0: 64; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 65; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 66; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 67; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 68; AVX1-NEXT: retq 69; 70; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00: 71; AVX2: # BB#0: 72; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 73; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 74; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 75; AVX2-NEXT: retq 76 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 77 ret <16 x i16> %shuffle 78} 79 80define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 81; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 82; AVX1: # BB#0: 83; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 84; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 85; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 86; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 87; AVX1-NEXT: retq 88; 89; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00: 90; AVX2: # BB#0: 91; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 92; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 93; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 94; AVX2-NEXT: retq 95 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 96 ret <16 x i16> %shuffle 97} 98 99define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 100; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 101; AVX1: # BB#0: 102; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 103; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 104; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 105; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 106; AVX1-NEXT: retq 107; 108; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00: 109; AVX2: # BB#0: 110; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 111; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 112; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 113; AVX2-NEXT: retq 114 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 115 ret <16 x i16> %shuffle 116} 117 118define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 119; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 120; AVX1: # BB#0: 121; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 122; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 123; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 124; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 125; AVX1-NEXT: retq 126; 127; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 128; AVX2: # BB#0: 129; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 130; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 131; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 132; AVX2-NEXT: retq 133 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 134 ret <16 x i16> %shuffle 135} 136 137define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 138; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 139; AVX1: # BB#0: 140; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 141; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 142; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 143; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 144; AVX1-NEXT: retq 145; 146; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 147; AVX2: # BB#0: 148; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 149; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 150; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 151; AVX2-NEXT: retq 152 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 153 ret <16 x i16> %shuffle 154} 155 156define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 157; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 158; AVX1: # BB#0: 159; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 160; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 161; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 162; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 163; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 164; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 165; AVX1-NEXT: retq 166; 167; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: 168; AVX2: # BB#0: 169; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1 170; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 171; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 172; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 173; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 174; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 175; AVX2-NEXT: retq 176 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 177 ret <16 x i16> %shuffle 178} 179 180define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 181; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 182; AVX1: # BB#0: 183; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 184; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 185; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1] 186; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 187; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 188; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 189; AVX1-NEXT: retq 190; 191; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: 192; AVX2: # BB#0: 193; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 194; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 195; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 196; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 197; AVX2-NEXT: retq 198 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 199 ret <16 x i16> %shuffle 200} 201 202define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 203; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 204; AVX1: # BB#0: 205; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 206; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 207; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1] 208; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 209; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 210; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 211; AVX1-NEXT: retq 212; 213; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: 214; AVX2: # BB#0: 215; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 216; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 217; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 218; AVX2-NEXT: retq 219 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 220 ret <16 x i16> %shuffle 221} 222 223define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 224; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 225; AVX1: # BB#0: 226; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 227; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 228; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1] 229; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 230; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 231; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 232; AVX1-NEXT: retq 233; 234; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: 235; AVX2: # BB#0: 236; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 237; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 238; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 239; AVX2-NEXT: retq 240 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 241 ret <16 x i16> %shuffle 242} 243 244define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 245; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 246; AVX1: # BB#0: 247; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 248; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 249; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 250; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 251; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 252; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 253; AVX1-NEXT: retq 254; 255; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: 256; AVX2: # BB#0: 257; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 258; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 259; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 260; AVX2-NEXT: retq 261 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 262 ret <16 x i16> %shuffle 263} 264 265define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 266; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 267; AVX1: # BB#0: 268; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 269; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 270; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 271; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 272; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 273; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 274; AVX1-NEXT: retq 275; 276; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: 277; AVX2: # BB#0: 278; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 279; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 280; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 281; AVX2-NEXT: retq 282 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 283 ret <16 x i16> %shuffle 284} 285 286define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 287; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 288; AVX1: # BB#0: 289; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 290; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 291; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 292; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 293; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 294; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 295; AVX1-NEXT: retq 296; 297; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 298; AVX2: # BB#0: 299; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 300; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 301; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 302; AVX2-NEXT: retq 303 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 304 ret <16 x i16> %shuffle 305} 306 307define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 308; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 309; AVX1: # BB#0: 310; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 311; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7] 312; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 313; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 314; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 315; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 316; AVX1-NEXT: retq 317; 318; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 319; AVX2: # BB#0: 320; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 321; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 322; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 323; AVX2-NEXT: retq 324 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 325 ret <16 x i16> %shuffle 326} 327 328define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 329; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 330; AVX1: # BB#0: 331; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 332; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 333; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 334; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 335; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 336; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 337; AVX1-NEXT: retq 338; 339; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 340; AVX2: # BB#0: 341; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 342; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 343; AVX2-NEXT: retq 344 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 345 ret <16 x i16> %shuffle 346} 347 348define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) { 349; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 350; AVX1: # BB#0: 351; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,7,7,7] 352; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 353; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 354; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 355; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 356; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 357; AVX1-NEXT: retq 358; 359; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15: 360; AVX2: # BB#0: 361; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 362; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,3,3,6,6,7,7] 363; AVX2-NEXT: retq 364 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 365 ret <16 x i16> %shuffle 366} 367 368define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 369; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 370; AVX1: # BB#0: 371; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 372; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 373; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 374; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 375; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 376; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 377; AVX1-NEXT: retq 378; 379; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 380; AVX2: # BB#0: 381; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 382; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 383; AVX2-NEXT: retq 384 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 385 ret <16 x i16> %shuffle 386} 387 388define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) { 389; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 390; AVX1: # BB#0: 391; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] 392; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7] 393; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 394; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 395; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7] 396; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 397; AVX1-NEXT: retq 398; 399; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15: 400; AVX2: # BB#0: 401; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15] 402; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15] 403; AVX2-NEXT: retq 404 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15> 405 ret <16 x i16> %shuffle 406} 407 408define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) { 409; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 410; AVX1: # BB#0: 411; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7] 412; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 413; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 414; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 415; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 416; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 417; AVX1-NEXT: retq 418; 419; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14: 420; AVX2: # BB#0: 421; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15] 422; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14] 423; AVX2-NEXT: retq 424 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 425 ret <16 x i16> %shuffle 426} 427 428define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) { 429; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 430; AVX1: # BB#0: 431; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7] 432; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7] 433; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 434; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7] 435; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7] 436; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 437; AVX1-NEXT: retq 438; 439; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15: 440; AVX2: # BB#0: 441; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15] 442; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15] 443; AVX2-NEXT: retq 444 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 445 ret <16 x i16> %shuffle 446} 447 448define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) { 449; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 450; AVX1: # BB#0: 451; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 452; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 453; AVX1-NEXT: retq 454; 455; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00: 456; AVX2: # BB#0: 457; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 458; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 459; AVX2-NEXT: retq 460 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 461 ret <16 x i16> %shuffle 462} 463 464define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) { 465; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 466; AVX1: # BB#0: 467; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 468; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 469; AVX1-NEXT: retq 470; 471; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00: 472; AVX2: # BB#0: 473; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 474; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 475; AVX2-NEXT: retq 476 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 477 ret <16 x i16> %shuffle 478} 479 480define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) { 481; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 482; AVX1: # BB#0: 483; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 484; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 485; AVX1-NEXT: retq 486; 487; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00: 488; AVX2: # BB#0: 489; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 490; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 491; AVX2-NEXT: retq 492 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 493 ret <16 x i16> %shuffle 494} 495 496define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 497; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: 498; AVX1: # BB#0: 499; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 500; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 501; AVX1-NEXT: retq 502; 503; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00: 504; AVX2: # BB#0: 505; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 506; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 507; AVX2-NEXT: retq 508 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 509 ret <16 x i16> %shuffle 510} 511 512define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 513; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: 514; AVX1: # BB#0: 515; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 516; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 517; AVX1-NEXT: retq 518; 519; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00: 520; AVX2: # BB#0: 521; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 522; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 523; AVX2-NEXT: retq 524 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 525 ret <16 x i16> %shuffle 526} 527 528define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 529; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 530; AVX1: # BB#0: 531; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 532; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 533; AVX1-NEXT: retq 534; 535; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00: 536; AVX2: # BB#0: 537; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 538; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 539; AVX2-NEXT: retq 540 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 541 ret <16 x i16> %shuffle 542} 543 544define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 545; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 546; AVX1: # BB#0: 547; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 548; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 549; AVX1-NEXT: retq 550; 551; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00: 552; AVX2: # BB#0: 553; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 554; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 555; AVX2-NEXT: retq 556 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 557 ret <16 x i16> %shuffle 558} 559 560define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) { 561; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 562; AVX1: # BB#0: 563; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0] 564; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 565; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 566; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 567; AVX1-NEXT: retq 568; 569; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 570; AVX2: # BB#0: 571; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 572; AVX2-NEXT: retq 573 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 574 ret <16 x i16> %shuffle 575} 576 577define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) { 578; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15: 579; AVX1: # BB#0: 580; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0] 581; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 582; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 583; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 584; AVX1-NEXT: retq 585; 586; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15: 587; AVX2: # BB#0: 588; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 589; AVX2-NEXT: retq 590 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 591 ret <16 x i16> %shuffle 592} 593 594define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) { 595; AVX1-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31: 596; AVX1: # BB#0: 597; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 598; AVX1-NEXT: retq 599; 600; AVX2-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31: 601; AVX2: # BB#0: 602; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 603; AVX2-NEXT: retq 604 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 605 ret <16 x i16> %shuffle 606} 607 608define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 609; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: 610; AVX1: # BB#0: 611; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] 612; AVX1-NEXT: retq 613; 614; AVX2-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15: 615; AVX2: # BB#0: 616; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 617; AVX2-NEXT: retq 618 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15> 619 ret <16 x i16> %shuffle 620} 621 622define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) { 623; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 624; AVX1: # BB#0: 625; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,0] 626; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 627; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 628; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 629; AVX1-NEXT: retq 630; 631; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: 632; AVX2: # BB#0: 633; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] 634; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 635; AVX2-NEXT: retq 636 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31> 637 ret <16 x i16> %shuffle 638} 639 640define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 641; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 642; AVX1: # BB#0: 643; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535] 644; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 645; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 646; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 647; AVX1-NEXT: retq 648; 649; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: 650; AVX2: # BB#0: 651; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 652; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 653; AVX2-NEXT: retq 654 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 655 ret <16 x i16> %shuffle 656} 657 658define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) { 659; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 660; AVX1: # BB#0: 661; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,0,65535,0,65535,0,65535,0,65535] 662; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 663; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 664; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 665; AVX1-NEXT: retq 666; 667; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: 668; AVX2: # BB#0: 669; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] 670; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 671; AVX2-NEXT: retq 672 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 673 ret <16 x i16> %shuffle 674} 675 676define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) { 677; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 678; AVX1: # BB#0: 679; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [0,65535,0,65535,0,65535,0,65535,65535,0,65535,0,65535,0,65535,0] 680; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 681; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 682; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 683; AVX1-NEXT: retq 684; 685; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: 686; AVX2: # BB#0: 687; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0] 688; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 689; AVX2-NEXT: retq 690 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 691 ret <16 x i16> %shuffle 692} 693 694define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) { 695; AVX1-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31: 696; AVX1: # BB#0: 697; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7] 698; AVX1-NEXT: retq 699; 700; AVX2-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31: 701; AVX2: # BB#0: 702; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7] 703; AVX2-NEXT: retq 704 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 705 ret <16 x i16> %shuffle 706} 707 708define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) { 709; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16: 710; AVX1: # BB#0: 711; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 712; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 713; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 714; AVX1-NEXT: retq 715; 716; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16: 717; AVX2: # BB#0: 718; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 719; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 720; AVX2-NEXT: retq 721 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16> 722 ret <16 x i16> %shuffle 723} 724 725define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) { 726; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 727; AVX1: # BB#0: 728; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 729; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 730; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 731; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 732; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 733; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 734; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 735; AVX1-NEXT: retq 736; 737; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: 738; AVX2: # BB#0: 739; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 740; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 741; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,1,1,4,4,5,5] 742; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 743; AVX2-NEXT: retq 744 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24> 745 ret <16 x i16> %shuffle 746} 747 748define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 749; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 750; AVX1: # BB#0: 751; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 752; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 753; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 754; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7] 755; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 756; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 757; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 758; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 759; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 760; AVX1-NEXT: retq 761; 762; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: 763; AVX2: # BB#0: 764; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 765; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 766; AVX2-NEXT: retq 767 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15> 768 ret <16 x i16> %shuffle 769} 770 771define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) { 772; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 773; AVX1: # BB#0: 774; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 775; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 776; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 777; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] 778; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7] 779; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 780; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 781; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 782; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 783; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 784; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 785; AVX1-NEXT: retq 786; 787; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: 788; AVX2: # BB#0: 789; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 790; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 791; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] 792; AVX2-NEXT: retq 793 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12> 794 ret <16 x i16> %shuffle 795} 796 797define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) { 798; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 799; AVX1: # BB#0: 800; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 801; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 802; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 803; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3] 804; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 805; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 806; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 807; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 808; AVX1-NEXT: retq 809; 810; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: 811; AVX2: # BB#0: 812; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] 813; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 814; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] 815; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 816; AVX2-NEXT: retq 817 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8> 818 ret <16 x i16> %shuffle 819} 820 821define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) { 822; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 823; AVX1: # BB#0: 824; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 825; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 826; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 827; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 828; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 829; AVX1-NEXT: retq 830; 831; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08: 832; AVX2: # BB#0: 833; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17] 834; AVX2-NEXT: retq 835 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8> 836 ret <16 x i16> %shuffle 837} 838 839define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) { 840; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 841; AVX1: # BB#0: 842; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 843; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 844; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 845; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 846; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 847; AVX1-NEXT: retq 848; 849; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08: 850; AVX2: # BB#0: 851; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17] 852; AVX2-NEXT: retq 853 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8> 854 ret <16 x i16> %shuffle 855} 856 857define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) { 858; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 859; AVX1: # BB#0: 860; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 861; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 862; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 863; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 864; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 865; AVX1-NEXT: retq 866; 867; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08: 868; AVX2: # BB#0: 869; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17] 870; AVX2-NEXT: retq 871 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8> 872 ret <16 x i16> %shuffle 873} 874 875define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 876; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08: 877; AVX1: # BB#0: 878; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 879; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 880; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 881; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 882; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 883; AVX1-NEXT: retq 884; 885; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08: 886; AVX2: # BB#0: 887; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17] 888; AVX2-NEXT: retq 889 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8> 890 ret <16 x i16> %shuffle 891} 892 893define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 894; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08: 895; AVX1: # BB#0: 896; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 897; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 898; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 899; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 900; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 901; AVX1-NEXT: retq 902; 903; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08: 904; AVX2: # BB#0: 905; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17] 906; AVX2-NEXT: retq 907 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8> 908 ret <16 x i16> %shuffle 909} 910 911define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 912; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08: 913; AVX1: # BB#0: 914; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 915; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 916; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 917; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 918; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 919; AVX1-NEXT: retq 920; 921; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08: 922; AVX2: # BB#0: 923; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17] 924; AVX2-NEXT: retq 925 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 926 ret <16 x i16> %shuffle 927} 928 929define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 930; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08: 931; AVX1: # BB#0: 932; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 933; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 934; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 935; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 936; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 937; AVX1-NEXT: retq 938; 939; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08: 940; AVX2: # BB#0: 941; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17] 942; AVX2-NEXT: retq 943 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 944 ret <16 x i16> %shuffle 945} 946 947define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 948; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27: 949; AVX1: # BB#0: 950; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 951; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 952; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 953; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 954; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 955; AVX1-NEXT: retq 956; 957; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27: 958; AVX2: # BB#0: 959; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] 960; AVX2-NEXT: retq 961 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 962 ret <16 x i16> %shuffle 963} 964 965define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 966; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31: 967; AVX1: # BB#0: 968; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 969; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 970; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 971; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 972; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 973; AVX1-NEXT: retq 974; 975; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31: 976; AVX2: # BB#0: 977; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] 978; AVX2-NEXT: retq 979 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 980 ret <16 x i16> %shuffle 981} 982 983define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 984; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 985; AVX1: # BB#0: 986; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 987; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 988; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 989; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 990; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 991; AVX1-NEXT: retq 992; 993; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: 994; AVX2: # BB#0: 995; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31] 996; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u] 997; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 998; AVX2-NEXT: retq 999 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 1000 ret <16 x i16> %shuffle 1001} 1002 1003define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 1004; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 1005; AVX1: # BB#0: 1006; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1007; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1008; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 1009; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1010; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1011; AVX1-NEXT: retq 1012; 1013; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: 1014; AVX2: # BB#0: 1015; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23] 1016; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u] 1017; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1018; AVX2-NEXT: retq 1019 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 1020 ret <16 x i16> %shuffle 1021} 1022 1023define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1024; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08: 1025; AVX1: # BB#0: 1026; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1] 1027; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1028; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1] 1029; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1030; AVX1-NEXT: retq 1031; 1032; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08: 1033; AVX2: # BB#0: 1034; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17] 1035; AVX2-NEXT: retq 1036 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1037 ret <16 x i16> %shuffle 1038} 1039 1040define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1041; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08: 1042; AVX1: # BB#0: 1043; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1] 1044; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1045; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1] 1046; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1047; AVX1-NEXT: retq 1048; 1049; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08: 1050; AVX2: # BB#0: 1051; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17] 1052; AVX2-NEXT: retq 1053 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8> 1054 ret <16 x i16> %shuffle 1055} 1056 1057define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1058; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08: 1059; AVX1: # BB#0: 1060; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1] 1061; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1062; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1] 1063; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1064; AVX1-NEXT: retq 1065; 1066; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08: 1067; AVX2: # BB#0: 1068; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17] 1069; AVX2-NEXT: retq 1070 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8> 1071 ret <16 x i16> %shuffle 1072} 1073 1074define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1075; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08: 1076; AVX1: # BB#0: 1077; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1] 1078; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1079; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1] 1080; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1081; AVX1-NEXT: retq 1082; 1083; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08: 1084; AVX2: # BB#0: 1085; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17] 1086; AVX2-NEXT: retq 1087 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8> 1088 ret <16 x i16> %shuffle 1089} 1090 1091define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) { 1092; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08: 1093; AVX1: # BB#0: 1094; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1] 1095; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1096; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1] 1097; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1098; AVX1-NEXT: retq 1099; 1100; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08: 1101; AVX2: # BB#0: 1102; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17] 1103; AVX2-NEXT: retq 1104 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8> 1105 ret <16 x i16> %shuffle 1106} 1107 1108define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) { 1109; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08: 1110; AVX1: # BB#0: 1111; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1] 1112; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1113; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1] 1114; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1115; AVX1-NEXT: retq 1116; 1117; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08: 1118; AVX2: # BB#0: 1119; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17] 1120; AVX2-NEXT: retq 1121 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8> 1122 ret <16 x i16> %shuffle 1123} 1124 1125define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) { 1126; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15: 1127; AVX1: # BB#0: 1128; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1129; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1130; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15] 1131; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1132; AVX1-NEXT: retq 1133; 1134; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15: 1135; AVX2: # BB#0: 1136; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31] 1137; AVX2-NEXT: retq 1138 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15> 1139 ret <16 x i16> %shuffle 1140} 1141 1142define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1143; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08: 1144; AVX1: # BB#0: 1145; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7] 1146; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 1147; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1148; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1] 1149; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1150; AVX1-NEXT: retq 1151; 1152; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08: 1153; AVX2: # BB#0: 1154; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17] 1155; AVX2-NEXT: retq 1156 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8> 1157 ret <16 x i16> %shuffle 1158} 1159 1160define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1161; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12: 1162; AVX1: # BB#0: 1163; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1164; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1165; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1166; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1167; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1168; AVX1-NEXT: retq 1169; 1170; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12: 1171; AVX2: # BB#0: 1172; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25] 1173; AVX2-NEXT: retq 1174 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 1175 ret <16 x i16> %shuffle 1176} 1177 1178define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) { 1179; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08: 1180; AVX1: # BB#0: 1181; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 1182; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1183; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1184; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1] 1185; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1186; AVX1-NEXT: retq 1187; 1188; AVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08: 1189; AVX2: # BB#0: 1190; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17] 1191; AVX2-NEXT: retq 1192 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8> 1193 ret <16 x i16> %shuffle 1194} 1195 1196define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) { 1197; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15: 1198; AVX1: # BB#0: 1199; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1] 1200; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1201; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15] 1202; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1203; AVX1-NEXT: retq 1204; 1205; AVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15: 1206; AVX2: # BB#0: 1207; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31] 1208; AVX2-NEXT: retq 1209 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15> 1210 ret <16 x i16> %shuffle 1211} 1212 1213define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) { 1214; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08: 1215; AVX1: # BB#0: 1216; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7] 1217; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6] 1218; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1219; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1] 1220; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1221; AVX1-NEXT: retq 1222; 1223; AVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08: 1224; AVX2: # BB#0: 1225; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17] 1226; AVX2-NEXT: retq 1227 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8> 1228 ret <16 x i16> %shuffle 1229} 1230 1231define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1232; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12: 1233; AVX1: # BB#0: 1234; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3] 1235; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1236; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1237; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7] 1238; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1239; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1240; AVX1-NEXT: retq 1241; 1242; AVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12: 1243; AVX2: # BB#0: 1244; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25] 1245; AVX2-NEXT: retq 1246 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12> 1247 ret <16 x i16> %shuffle 1248} 1249 1250define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) { 1251; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1252; AVX1: # BB#0: 1253; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1254; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1255; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1256; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1257; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1258; AVX1-NEXT: retq 1259; 1260; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: 1261; AVX2: # BB#0: 1262; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1263; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1264; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1265; AVX2-NEXT: retq 1266 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20> 1267 ret <16 x i16> %shuffle 1268} 1269 1270define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) { 1271; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1272; AVX1: # BB#0: 1273; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1274; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1275; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1276; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1277; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1278; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1279; AVX1-NEXT: retq 1280; 1281; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: 1282; AVX2: # BB#0: 1283; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1284; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1285; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1286; AVX2-NEXT: retq 1287 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20> 1288 ret <16 x i16> %shuffle 1289} 1290 1291define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) { 1292; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1293; AVX1: # BB#0: 1294; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1295; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1296; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1297; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1298; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1299; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1300; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1301; AVX1-NEXT: retq 1302; 1303; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: 1304; AVX2: # BB#0: 1305; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1306; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1307; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1308; AVX2-NEXT: retq 1309 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1310 ret <16 x i16> %shuffle 1311} 1312 1313define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) { 1314; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1315; AVX1: # BB#0: 1316; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1317; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1318; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1319; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1320; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1321; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1322; AVX1-NEXT: retq 1323; 1324; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: 1325; AVX2: # BB#0: 1326; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1327; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] 1328; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12] 1329; AVX2-NEXT: retq 1330 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28> 1331 ret <16 x i16> %shuffle 1332} 1333 1334define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) { 1335; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1336; AVX1: # BB#0: 1337; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1338; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1339; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1340; AVX1-NEXT: retq 1341; 1342; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 1343; AVX2: # BB#0: 1344; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1345; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1346; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1347; AVX2-NEXT: retq 1348 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1349 ret <16 x i16> %shuffle 1350} 1351 1352define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) { 1353; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24: 1354; AVX1: # BB#0: 1355; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 1356; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1357; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] 1358; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1359; AVX1-NEXT: retq 1360; 1361; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24: 1362; AVX2: # BB#0: 1363; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17] 1364; AVX2-NEXT: retq 1365 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24> 1366 ret <16 x i16> %shuffle 1367} 1368 1369define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) { 1370; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz: 1371; AVX1: # BB#0: 1372; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1373; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1374; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1375; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1376; AVX1-NEXT: retq 1377; 1378; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz: 1379; AVX2: # BB#0: 1380; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero 1381; AVX2-NEXT: retq 1382 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0> 1383 ret <16 x i16> %shuffle 1384} 1385 1386; 1387; Shuffle to logical bit shifts 1388; 1389 1390define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) { 1391; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1392; AVX1: # BB#0: 1393; AVX1-NEXT: vpslld $16, %xmm0, %xmm1 1394; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1395; AVX1-NEXT: vpslld $16, %xmm0, %xmm0 1396; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1397; AVX1-NEXT: retq 1398; 1399; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1400; AVX2: # BB#0: 1401; AVX2-NEXT: vpslld $16, %ymm0, %ymm0 1402; AVX2-NEXT: retq 1403 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14> 1404 ret <16 x i16> %shuffle 1405} 1406 1407define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) { 1408; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1409; AVX1: # BB#0: 1410; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1 1411; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1412; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0 1413; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1414; AVX1-NEXT: retq 1415; 1416; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1417; AVX2: # BB#0: 1418; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0 1419; AVX2-NEXT: retq 1420 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12> 1421 ret <16 x i16> %shuffle 1422} 1423 1424define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) { 1425; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz: 1426; AVX1: # BB#0: 1427; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1428; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1429; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 1430; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1431; AVX1-NEXT: retq 1432; 1433; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz: 1434; AVX2: # BB#0: 1435; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 1436; AVX2-NEXT: retq 1437 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16> 1438 ret <16 x i16> %shuffle 1439} 1440 1441define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) { 1442; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz: 1443; AVX1: # BB#0: 1444; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 1445; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 1446; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1447; AVX1-NEXT: retq 1448; 1449; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz: 1450; AVX2: # BB#0: 1451; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 1452; AVX2-NEXT: retq 1453 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16> 1454 ret <16 x i16> %shuffle 1455} 1456 1457define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) { 1458; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz: 1459; AVX1: # BB#0: 1460; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1461; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1462; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1463; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1464; AVX1-NEXT: retq 1465; 1466; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz: 1467; AVX2: # BB#0: 1468; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1469; AVX2-NEXT: retq 1470 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0> 1471 ret <16 x i16> %shuffle 1472} 1473 1474define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) { 1475; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz: 1476; AVX1: # BB#0: 1477; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1478; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1479; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1480; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1481; AVX1-NEXT: retq 1482; 1483; AVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz: 1484; AVX2: # BB#0: 1485; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1486; AVX2-NEXT: retq 1487 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0> 1488 ret <16 x i16> %shuffle 1489} 1490 1491define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) { 1492; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14: 1493; AVX1: # BB#0: 1494; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1495; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1496; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1497; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1498; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1499; AVX1-NEXT: retq 1500; 1501; AVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14: 1502; AVX2: # BB#0: 1503; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1504; AVX2-NEXT: retq 1505 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 1506 ret <16 x i16> %shuffle 1507} 1508 1509define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) { 1510; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24: 1511; AVX1: # BB#0: 1512; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1513; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1514; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1] 1515; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] 1516; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1517; AVX1-NEXT: retq 1518; 1519; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24: 1520; AVX2: # BB#0: 1521; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17] 1522; AVX2-NEXT: retq 1523 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24> 1524 ret <16 x i16> %shuffle 1525} 1526 1527define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) { 1528; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8: 1529; AVX1: # BB#0: 1530; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1531; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1532; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1] 1533; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] 1534; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1535; AVX1-NEXT: retq 1536; 1537; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8: 1538; AVX2: # BB#0: 1539; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] 1540; AVX2-NEXT: retq 1541 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8> 1542 ret <16 x i16> %shuffle 1543} 1544 1545define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) { 1546; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30: 1547; AVX1: # BB#0: 1548; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1549; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1550; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1551; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1552; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1553; AVX1-NEXT: retq 1554; 1555; AVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30: 1556; AVX2: # BB#0: 1557; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1558; AVX2-NEXT: retq 1559 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 1560 ret <16 x i16> %shuffle 1561} 1562 1563define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) { 1564; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 1565; AVX1: # BB#0: 1566; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 1567; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 1568; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1569; AVX1-NEXT: retq 1570; 1571; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: 1572; AVX2: # BB#0: 1573; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1574; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17] 1575; AVX2-NEXT: retq 1576 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16> 1577 ret <16 x i16> %shuffle 1578} 1579 1580define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) { 1581; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 1582; AVX1: # BB#0: 1583; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1584; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1585; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1586; AVX1-NEXT: retq 1587; 1588; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: 1589; AVX2: # BB#0: 1590; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1591; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29] 1592; AVX2-NEXT: retq 1593 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22> 1594 ret <16 x i16> %shuffle 1595} 1596 1597define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) { 1598; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 1599; AVX1: # BB#0: 1600; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1601; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1602; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] 1603; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] 1604; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1605; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1606; AVX1-NEXT: retq 1607; 1608; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: 1609; AVX2: # BB#0: 1610; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1611; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1612; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] 1613; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] 1614; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1615; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1616; AVX2-NEXT: retq 1617 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11> 1618 ret <16 x i16> %shuffle 1619} 1620 1621define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) { 1622; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 1623; AVX1: # BB#0: 1624; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1625; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 1626; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 1627; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 1628; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1629; AVX1-NEXT: retq 1630; 1631; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: 1632; AVX2: # BB#0: 1633; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1634; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 1635; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] 1636; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] 1637; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1638; AVX2-NEXT: retq 1639 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9> 1640 ret <16 x i16> %shuffle 1641} 1642 1643define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) { 1644; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 1645; AVX1: # BB#0: 1646; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1647; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1648; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7] 1649; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 1650; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15] 1651; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1652; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1653; AVX1-NEXT: retq 1654; 1655; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: 1656; AVX2: # BB#0: 1657; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1658; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1659; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 1660; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1661; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1662; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1663; AVX2-NEXT: retq 1664 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27> 1665 ret <16 x i16> %shuffle 1666} 1667 1668define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1669; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 1670; AVX1: # BB#0: 1671; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1672; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1673; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 1674; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1675; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1676; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1677; AVX1-NEXT: retq 1678; 1679; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: 1680; AVX2: # BB#0: 1681; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1682; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1683; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] 1684; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 1685; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1686; AVX2-NEXT: retq 1687 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1688 ret <16 x i16> %shuffle 1689} 1690 1691define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 1692; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 1693; AVX1: # BB#0: 1694; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1695; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 1696; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1697; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] 1698; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1699; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1700; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1701; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1702; AVX1-NEXT: retq 1703; 1704; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: 1705; AVX2: # BB#0: 1706; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1707; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 1708; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1709; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] 1710; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1711; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1712; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 1713; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1714; AVX2-NEXT: retq 1715 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 1716 ret <16 x i16> %shuffle 1717} 1718 1719define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) { 1720; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 1721; AVX1: # BB#0: 1722; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1723; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1724; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1725; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1726; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1727; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1728; AVX1-NEXT: retq 1729; 1730; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: 1731; AVX2: # BB#0: 1732; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1733; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1734; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1735; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1736; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1737; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1738; AVX2-NEXT: retq 1739 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11> 1740 ret <16 x i16> %shuffle 1741} 1742 1743define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) { 1744; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 1745; AVX1: # BB#0: 1746; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1747; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1748; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1749; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1750; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1751; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1752; AVX1-NEXT: retq 1753; 1754; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: 1755; AVX2: # BB#0: 1756; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1757; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1758; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1759; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 1760; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 1761; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1762; AVX2-NEXT: retq 1763 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15> 1764 ret <16 x i16> %shuffle 1765} 1766 1767define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1768; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 1769; AVX1: # BB#0: 1770; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1771; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1772; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1773; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1774; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] 1775; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1776; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1777; AVX1-NEXT: retq 1778; 1779; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: 1780; AVX2: # BB#0: 1781; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1782; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1783; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] 1784; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1785; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] 1786; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1787; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1788; AVX2-NEXT: retq 1789 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13> 1790 ret <16 x i16> %shuffle 1791} 1792 1793define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 1794; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 1795; AVX1: # BB#0: 1796; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1797; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1798; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] 1799; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1800; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1801; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1802; AVX1-NEXT: retq 1803; 1804; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: 1805; AVX2: # BB#0: 1806; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1807; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1808; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] 1809; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1810; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] 1811; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1812; AVX2-NEXT: retq 1813 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8> 1814 ret <16 x i16> %shuffle 1815} 1816 1817define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1818; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 1819; AVX1: # BB#0: 1820; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1821; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1822; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1823; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] 1824; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1825; AVX1-NEXT: retq 1826; 1827; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: 1828; AVX2: # BB#0: 1829; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1830; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1831; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1832; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] 1833; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1834; AVX2-NEXT: retq 1835 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 1836 ret <16 x i16> %shuffle 1837} 1838 1839define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) { 1840; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 1841; AVX1: # BB#0: 1842; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1843; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1844; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 1845; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1846; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] 1847; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1848; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1849; AVX1-NEXT: retq 1850; 1851; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: 1852; AVX2: # BB#0: 1853; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1854; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 1855; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] 1856; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] 1857; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] 1858; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] 1859; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1860; AVX2-NEXT: retq 1861 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13> 1862 ret <16 x i16> %shuffle 1863} 1864 1865define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) { 1866; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 1867; AVX1: # BB#0: 1868; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1869; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1870; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1871; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] 1872; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 1873; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1874; AVX1-NEXT: retq 1875; 1876; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: 1877; AVX2: # BB#0: 1878; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1879; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] 1880; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 1881; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] 1882; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] 1883; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1884; AVX2-NEXT: retq 1885 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15> 1886 ret <16 x i16> %shuffle 1887} 1888 1889define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) { 1890; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 1891; AVX1: # BB#0: 1892; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1893; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 1894; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 1895; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] 1896; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1897; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1898; AVX1-NEXT: retq 1899; 1900; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: 1901; AVX2: # BB#0: 1902; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1903; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] 1904; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 1905; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] 1906; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1907; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 1908; AVX2-NEXT: retq 1909 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8> 1910 ret <16 x i16> %shuffle 1911} 1912 1913define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) { 1914; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 1915; AVX1: # BB#0: 1916; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1917; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1918; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] 1919; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1920; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 1921; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1922; AVX1-NEXT: retq 1923; 1924; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: 1925; AVX2: # BB#0: 1926; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1927; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1928; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] 1929; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1930; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] 1931; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1932; AVX2-NEXT: retq 1933 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8> 1934 ret <16 x i16> %shuffle 1935} 1936 1937define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) { 1938; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 1939; AVX1: # BB#0: 1940; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1941; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1942; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] 1943; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1944; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 1945; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1946; AVX1-NEXT: retq 1947; 1948; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: 1949; AVX2: # BB#0: 1950; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1951; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 1952; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] 1953; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1954; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] 1955; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1956; AVX2-NEXT: retq 1957 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8> 1958 ret <16 x i16> %shuffle 1959} 1960 1961define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) { 1962; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 1963; AVX1: # BB#0: 1964; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1965; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 1966; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] 1967; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1968; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 1969; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1970; AVX1-NEXT: retq 1971; 1972; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: 1973; AVX2: # BB#0: 1974; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1975; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 1976; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] 1977; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1978; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] 1979; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1980; AVX2-NEXT: retq 1981 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12> 1982 ret <16 x i16> %shuffle 1983} 1984 1985define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) { 1986; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 1987; AVX1: # BB#0: 1988; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1989; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 1990; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] 1991; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 1992; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 1993; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1994; AVX1-NEXT: retq 1995; 1996; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: 1997; AVX2: # BB#0: 1998; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1999; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 2000; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] 2001; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2002; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] 2003; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2004; AVX2-NEXT: retq 2005 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8> 2006 ret <16 x i16> %shuffle 2007} 2008 2009define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) { 2010; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 2011; AVX1: # BB#0: 2012; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2013; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2014; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] 2015; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2016; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 2017; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2018; AVX1-NEXT: retq 2019; 2020; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: 2021; AVX2: # BB#0: 2022; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2023; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2024; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] 2025; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2026; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] 2027; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2028; AVX2-NEXT: retq 2029 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12> 2030 ret <16 x i16> %shuffle 2031} 2032 2033define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) { 2034; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2035; AVX1: # BB#0: 2036; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2037; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 2038; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2039; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2040; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2041; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2042; AVX1-NEXT: retq 2043; 2044; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: 2045; AVX2: # BB#0: 2046; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2047; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] 2048; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2049; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2050; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2051; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2052; AVX2-NEXT: retq 2053 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11> 2054 ret <16 x i16> %shuffle 2055} 2056 2057define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) { 2058; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2059; AVX1: # BB#0: 2060; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2061; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 2062; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2063; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2064; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2065; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2066; AVX1-NEXT: retq 2067; 2068; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: 2069; AVX2: # BB#0: 2070; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2071; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] 2072; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2073; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2074; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2075; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2076; AVX2-NEXT: retq 2077 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11> 2078 ret <16 x i16> %shuffle 2079} 2080 2081define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) { 2082; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2083; AVX1: # BB#0: 2084; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2085; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 2086; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2087; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2088; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2089; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2090; AVX1-NEXT: retq 2091; 2092; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: 2093; AVX2: # BB#0: 2094; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2095; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] 2096; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2097; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] 2098; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2099; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2100; AVX2-NEXT: retq 2101 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13> 2102 ret <16 x i16> %shuffle 2103} 2104 2105define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) { 2106; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2107; AVX1: # BB#0: 2108; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2109; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 2110; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2111; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] 2112; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2113; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2114; AVX1-NEXT: retq 2115; 2116; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: 2117; AVX2: # BB#0: 2118; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2119; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] 2120; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2121; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 2122; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2123; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2124; AVX2-NEXT: retq 2125 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11> 2126 ret <16 x i16> %shuffle 2127} 2128 2129define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2130; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2131; AVX1: # BB#0: 2132; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2133; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2134; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] 2135; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2136; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 2137; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2138; AVX1-NEXT: retq 2139; 2140; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: 2141; AVX2: # BB#0: 2142; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2143; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2144; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] 2145; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2146; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] 2147; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2148; AVX2-NEXT: retq 2149 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 2150 ret <16 x i16> %shuffle 2151} 2152 2153define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2154; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2155; AVX1: # BB#0: 2156; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2157; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2158; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] 2159; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2160; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 2161; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2162; AVX1-NEXT: retq 2163; 2164; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: 2165; AVX2: # BB#0: 2166; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2167; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2168; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] 2169; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2170; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] 2171; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2172; AVX2-NEXT: retq 2173 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 2174 ret <16 x i16> %shuffle 2175} 2176 2177define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2178; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2179; AVX1: # BB#0: 2180; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2181; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2182; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2183; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2184; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2185; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2186; AVX1-NEXT: retq 2187; 2188; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: 2189; AVX2: # BB#0: 2190; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2191; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2192; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2193; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2194; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2195; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2196; AVX2-NEXT: retq 2197 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12> 2198 ret <16 x i16> %shuffle 2199} 2200 2201define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) { 2202; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2203; AVX1: # BB#0: 2204; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2205; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 2206; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] 2207; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2208; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 2209; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2210; AVX1-NEXT: retq 2211; 2212; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: 2213; AVX2: # BB#0: 2214; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2215; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 2216; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] 2217; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2218; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] 2219; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2220; AVX2-NEXT: retq 2221 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8> 2222 ret <16 x i16> %shuffle 2223} 2224 2225define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) { 2226; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2227; AVX1: # BB#0: 2228; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2229; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2230; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 2231; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2232; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2233; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] 2234; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2235; AVX1-NEXT: retq 2236; 2237; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: 2238; AVX2: # BB#0: 2239; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2240; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2241; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] 2242; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2243; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2244; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] 2245; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2246; AVX2-NEXT: retq 2247 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15> 2248 ret <16 x i16> %shuffle 2249} 2250 2251define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2252; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2253; AVX1: # BB#0: 2254; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2255; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2256; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] 2257; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2258; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 2259; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2260; AVX1-NEXT: retq 2261; 2262; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: 2263; AVX2: # BB#0: 2264; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2265; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2266; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] 2267; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2268; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] 2269; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2270; AVX2-NEXT: retq 2271 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12> 2272 ret <16 x i16> %shuffle 2273} 2274 2275define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2276; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 2277; AVX1: # BB#0: 2278; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2279; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2280; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2281; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2282; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2283; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2284; AVX1-NEXT: retq 2285; 2286; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: 2287; AVX2: # BB#0: 2288; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2289; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2290; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2291; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2292; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2293; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2294; AVX2-NEXT: retq 2295 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12> 2296 ret <16 x i16> %shuffle 2297} 2298 2299define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) { 2300; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 2301; AVX1: # BB#0: 2302; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2303; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2 2304; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2305; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2306; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2307; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2308; AVX1-NEXT: retq 2309; 2310; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: 2311; AVX2: # BB#0: 2312; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2313; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 2314; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] 2315; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2316; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] 2317; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2318; AVX2-NEXT: retq 2319 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12> 2320 ret <16 x i16> %shuffle 2321} 2322 2323define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2324; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu: 2325; AVX1: # BB#0: 2326; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2327; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15] 2328; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2329; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2330; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2331; AVX1-NEXT: retq 2332; 2333; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu: 2334; AVX2: # BB#0: 2335; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31] 2336; AVX2-NEXT: retq 2337 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef> 2338 ret <16 x i16> %shuffle 2339} 2340 2341define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) { 2342; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 2343; AVX1: # BB#0: 2344; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2345; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] 2346; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2347; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 2348; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2349; AVX1-NEXT: retq 2350; 2351; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: 2352; AVX2: # BB#0: 2353; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2354; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2 2355; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] 2356; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7] 2357; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2358; AVX2-NEXT: retq 2359 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11> 2360 ret <16 x i16> %shuffle 2361} 2362 2363define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2364; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu: 2365; AVX1: # BB#0: 2366; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2367; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3] 2368; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2369; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2370; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2371; AVX1-NEXT: retq 2372; 2373; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu: 2374; AVX2: # BB#0: 2375; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19] 2376; AVX2-NEXT: retq 2377 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 2378 ret <16 x i16> %shuffle 2379} 2380 2381define <16 x i16> @shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a) { 2382; AVX1-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15: 2383; AVX1: # BB#0: 2384; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7] 2385; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2386; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7] 2387; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2388; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2389; AVX1-NEXT: retq 2390; 2391; AVX2-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15: 2392; AVX2: # BB#0: 2393; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[8,9,u,u,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 2394; AVX2-NEXT: retq 2395 %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 2, i32 16, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2396 ret <16 x i16> %shuffle 2397} 2398 2399define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) { 2400; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 2401; AVX1: # BB#0: 2402; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2403; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 2404; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2405; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2406; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2407; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2408; AVX1-NEXT: retq 2409; 2410; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: 2411; AVX2: # BB#0: 2412; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2413; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7] 2414; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2415; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2416; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2417; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2418; AVX2-NEXT: retq 2419 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11> 2420 ret <16 x i16> %shuffle 2421} 2422 2423define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) { 2424; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 2425; AVX1: # BB#0: 2426; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2427; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3] 2428; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2429; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 2430; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2431; AVX1-NEXT: retq 2432; 2433; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: 2434; AVX2: # BB#0: 2435; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2436; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3] 2437; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2438; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15] 2439; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2440; AVX2-NEXT: retq 2441 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15> 2442 ret <16 x i16> %shuffle 2443} 2444 2445define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) { 2446; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 2447; AVX1: # BB#0: 2448; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2449; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 2450; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2451; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 2452; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2453; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2454; AVX1-NEXT: retq 2455; 2456; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: 2457; AVX2: # BB#0: 2458; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2459; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] 2460; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3 2461; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 2462; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2463; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2464; AVX2-NEXT: retq 2465 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13> 2466 ret <16 x i16> %shuffle 2467} 2468 2469define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) { 2470; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 2471; AVX1: # BB#0: 2472; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2473; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2474; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2475; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2476; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2477; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2478; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2479; AVX1-NEXT: retq 2480; 2481; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: 2482; AVX2: # BB#0: 2483; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2484; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2485; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2486; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7] 2487; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7] 2488; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2489; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] 2490; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2491; AVX2-NEXT: retq 2492 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 2493 ret <16 x i16> %shuffle 2494} 2495 2496define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) { 2497; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 2498; AVX1: # BB#0: 2499; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2500; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2501; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1] 2502; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 2503; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2504; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2505; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2506; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2507; AVX1-NEXT: retq 2508; 2509; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: 2510; AVX2: # BB#0: 2511; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2512; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15] 2513; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2 2514; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2515; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7] 2516; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2517; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 2518; AVX2-NEXT: retq 2519 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31> 2520 ret <16 x i16> %shuffle 2521} 2522 2523define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 2524; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 2525; AVX1: # BB#0: 2526; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2527; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2528; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2529; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2530; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2531; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2532; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2533; AVX1-NEXT: retq 2534; 2535; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: 2536; AVX2: # BB#0: 2537; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2538; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2539; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2540; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7] 2541; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7] 2542; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 2543; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 2544; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2545; AVX2-NEXT: retq 2546 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 2547 ret <16 x i16> %shuffle 2548} 2549 2550define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) { 2551; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 2552; AVX1: # BB#0: 2553; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2554; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2555; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2556; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2557; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2558; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2559; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2560; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2561; AVX1-NEXT: retq 2562; 2563; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: 2564; AVX2: # BB#0: 2565; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 2566; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2567; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2568; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7] 2569; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2570; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2571; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2572; AVX2-NEXT: retq 2573 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27> 2574 ret <16 x i16> %shuffle 2575} 2576 2577define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) { 2578; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 2579; AVX1: # BB#0: 2580; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2581; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3] 2582; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2583; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] 2584; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 2585; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7] 2586; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 2587; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] 2588; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] 2589; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 2590; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2591; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2592; AVX1-NEXT: retq 2593; 2594; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: 2595; AVX2: # BB#0: 2596; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2597; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7] 2598; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 2599; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] 2600; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7] 2601; AVX2-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7] 2602; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2603; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 2604; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] 2605; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2606; AVX2-NEXT: retq 2607 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31> 2608 ret <16 x i16> %shuffle 2609} 2610 2611define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) { 2612; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 2613; AVX1: # BB#0: 2614; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2615; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3] 2616; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2617; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3] 2618; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 2619; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] 2620; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3] 2621; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3] 2622; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2623; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2624; AVX1-NEXT: retq 2625; 2626; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: 2627; AVX2: # BB#0: 2628; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2629; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3] 2630; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4 2631; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] 2632; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 2633; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1 2634; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 2635; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15] 2636; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2637; AVX2-NEXT: retq 2638 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25> 2639 ret <16 x i16> %shuffle 2640} 2641 2642define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) { 2643; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 2644; AVX1: # BB#0: 2645; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2646; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2647; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11] 2648; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7] 2649; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 2650; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2651; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2652; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7] 2653; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5] 2654; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2655; AVX1-NEXT: retq 2656; 2657; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: 2658; AVX2: # BB#0: 2659; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2660; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2661; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7] 2662; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5] 2663; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5] 2664; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2665; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23] 2666; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 2667; AVX2-NEXT: retq 2668 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26> 2669 ret <16 x i16> %shuffle 2670} 2671 2672define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) { 2673; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 2674; AVX1: # BB#0: 2675; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2676; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2677; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2678; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2679; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15] 2680; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2681; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2682; AVX1-NEXT: retq 2683; 2684; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: 2685; AVX2: # BB#0: 2686; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2687; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2688; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2689; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 2690; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 2691; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2692; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11] 2693; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 2694; AVX2-NEXT: retq 2695 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11> 2696 ret <16 x i16> %shuffle 2697} 2698 2699define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) { 2700; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 2701; AVX1: # BB#0: 2702; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2703; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2704; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2705; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2706; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15] 2707; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2708; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 2709; AVX1-NEXT: retq 2710; 2711; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: 2712; AVX2: # BB#0: 2713; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2714; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2715; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2716; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 2717; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] 2718; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 2719; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15] 2720; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15] 2721; AVX2-NEXT: retq 2722 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15> 2723 ret <16 x i16> %shuffle 2724} 2725 2726define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) { 2727; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 2728; AVX1: # BB#0: 2729; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2730; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7] 2731; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2732; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] 2733; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7] 2734; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] 2735; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2736; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7] 2737; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 2738; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2739; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2740; AVX1-NEXT: retq 2741; 2742; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: 2743; AVX2: # BB#0: 2744; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2745; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2746; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7] 2747; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7] 2748; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2749; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7] 2750; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7] 2751; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2752; AVX2-NEXT: retq 2753 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31> 2754 ret <16 x i16> %shuffle 2755} 2756 2757define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2758; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 2759; AVX1: # BB#0: 2760; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 2761; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] 2762; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7] 2763; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2764; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2765; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 2766; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] 2767; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] 2768; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 2769; AVX1-NEXT: retq 2770; 2771; AVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: 2772; AVX2: # BB#0: 2773; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15] 2774; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7] 2775; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15] 2776; AVX2-NEXT: retq 2777 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef> 2778 ret <16 x i16> %shuffle 2779} 2780 2781define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2782; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 2783; AVX1: # BB#0: 2784; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2785; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2786; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 2787; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3] 2788; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 2789; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 2790; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 2791; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2792; AVX1-NEXT: retq 2793; 2794; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: 2795; AVX2: # BB#0: 2796; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2797; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19] 2798; AVX2-NEXT: retq 2799 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2800 ret <16 x i16> %shuffle 2801} 2802 2803define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2804; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu: 2805; AVX1: # BB#0: 2806; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7] 2807; AVX1-NEXT: retq 2808; 2809; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu: 2810; AVX2: # BB#0: 2811; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7] 2812; AVX2-NEXT: retq 2813 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2814 ret <16 x i16> %shuffle 2815} 2816 2817define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2818; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 2819; AVX1: # BB#0: 2820; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2821; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2822; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] 2823; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7] 2824; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 2825; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2826; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2827; AVX1-NEXT: retq 2828; 2829; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: 2830; AVX2: # BB#0: 2831; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 2832; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15] 2833; AVX2-NEXT: retq 2834 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef> 2835 ret <16 x i16> %shuffle 2836} 2837 2838define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) { 2839; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 2840; AVX1: # BB#0: 2841; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2842; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2843; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2844; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7] 2845; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7] 2846; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2847; AVX1-NEXT: retq 2848; 2849; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: 2850; AVX2: # BB#0: 2851; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2] 2852; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15] 2853; AVX2-NEXT: retq 2854 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11> 2855 ret <16 x i16> %shuffle 2856} 2857 2858define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 2859; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 2860; AVX1: # BB#0: 2861; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2862; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2863; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 2864; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7] 2865; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 2866; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7] 2867; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2868; AVX1-NEXT: retq 2869; 2870; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: 2871; AVX2: # BB#0: 2872; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] 2873; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15] 2874; AVX2-NEXT: retq 2875 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 2876 ret <16 x i16> %shuffle 2877} 2878 2879define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) { 2880; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 2881; AVX1: # BB#0: 2882; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2883; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2884; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3] 2885; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7] 2886; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2887; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 2888; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 2889; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2890; AVX1-NEXT: retq 2891; 2892; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: 2893; AVX2: # BB#0: 2894; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2895; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2896; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] 2897; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7] 2898; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2899; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2900; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2901; AVX2-NEXT: retq 2902 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11> 2903 ret <16 x i16> %shuffle 2904} 2905 2906define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) { 2907; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 2908; AVX1: # BB#0: 2909; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2910; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2911; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7] 2912; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 2913; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7] 2914; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2915; AVX1-NEXT: retq 2916; 2917; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: 2918; AVX2: # BB#0: 2919; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3] 2920; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15] 2921; AVX2-NEXT: retq 2922 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15> 2923 ret <16 x i16> %shuffle 2924} 2925 2926define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) { 2927; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 2928; AVX1: # BB#0: 2929; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2930; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] 2931; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2932; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7] 2933; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7] 2934; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7] 2935; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 2936; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 2937; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] 2938; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2939; AVX1-NEXT: retq 2940; 2941; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: 2942; AVX2: # BB#0: 2943; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 2944; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 2945; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15] 2946; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15] 2947; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] 2948; AVX2-NEXT: retq 2949 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25> 2950 ret <16 x i16> %shuffle 2951} 2952 2953define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) { 2954; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 2955; AVX1: # BB#0: 2956; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2957; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5] 2958; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 2959; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2960; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] 2961; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] 2962; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 2963; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 2964; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 2965; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2966; AVX1-NEXT: retq 2967; 2968; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: 2969; AVX2: # BB#0: 2970; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21] 2971; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2972; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 2973; AVX2-NEXT: retq 2974 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef> 2975 ret <16 x i16> %shuffle 2976} 2977 2978define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) { 2979; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 2980; AVX1: # BB#0: 2981; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2982; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2983; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 2984; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 2985; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 2986; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2987; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7] 2988; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2989; AVX1-NEXT: retq 2990; 2991; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: 2992; AVX2: # BB#0: 2993; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15] 2994; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2995; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 2996; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2997; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 2998; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2999; AVX2-NEXT: retq 3000 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12> 3001 ret <16 x i16> %shuffle 3002} 3003 3004define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3005; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu: 3006; AVX1: # BB#0: 3007; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3008; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3009; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3010; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9] 3011; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3012; AVX1-NEXT: retq 3013; 3014; AVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu: 3015; AVX2: # BB#0: 3016; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25] 3017; AVX2-NEXT: retq 3018 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3019 ret <16 x i16> %shuffle 3020} 3021 3022define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) { 3023; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 3024; AVX1: # BB#0: 3025; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3026; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 3027; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3028; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3029; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3030; AVX1-NEXT: retq 3031; 3032; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: 3033; AVX2: # BB#0: 3034; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3035; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 3036; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3037; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3038; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3039; AVX2-NEXT: retq 3040 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12> 3041 ret <16 x i16> %shuffle 3042} 3043 3044define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3045; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu: 3046; AVX1: # BB#0: 3047; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3048; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3049; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3050; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3051; AVX1-NEXT: retq 3052; 3053; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu: 3054; AVX2: # BB#0: 3055; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25] 3056; AVX2-NEXT: retq 3057 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3058 ret <16 x i16> %shuffle 3059} 3060 3061define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) { 3062; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu: 3063; AVX1: # BB#0: 3064; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 3065; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3066; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9] 3067; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3068; AVX1-NEXT: retq 3069; 3070; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu: 3071; AVX2: # BB#0: 3072; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25] 3073; AVX2-NEXT: retq 3074 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef> 3075 ret <16 x i16> %shuffle 3076} 3077 3078define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) { 3079; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3080; AVX1: # BB#0: 3081; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3082; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3083; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3084; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 3085; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11] 3086; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3087; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7] 3088; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3089; AVX1-NEXT: retq 3090; 3091; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: 3092; AVX2: # BB#0: 3093; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15] 3094; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3095; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3096; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3097; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3098; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3099; AVX2-NEXT: retq 3100 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10> 3101 ret <16 x i16> %shuffle 3102} 3103 3104define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) { 3105; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu: 3106; AVX1: # BB#0: 3107; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3108; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3109; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3110; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5] 3111; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3112; AVX1-NEXT: retq 3113; 3114; AVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu: 3115; AVX2: # BB#0: 3116; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21] 3117; AVX2-NEXT: retq 3118 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef> 3119 ret <16 x i16> %shuffle 3120} 3121 3122define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) { 3123; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 3124; AVX1: # BB#0: 3125; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3126; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3127; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3128; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3129; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3130; AVX1-NEXT: retq 3131; 3132; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: 3133; AVX2: # BB#0: 3134; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3135; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3136; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3137; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3138; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3139; AVX2-NEXT: retq 3140 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10> 3141 ret <16 x i16> %shuffle 3142} 3143 3144define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) { 3145; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu: 3146; AVX1: # BB#0: 3147; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3148; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3149; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3150; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3151; AVX1-NEXT: retq 3152; 3153; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu: 3154; AVX2: # BB#0: 3155; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21] 3156; AVX2-NEXT: retq 3157 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef> 3158 ret <16 x i16> %shuffle 3159} 3160 3161define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) { 3162; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu: 3163; AVX1: # BB#0: 3164; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3165; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3166; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3167; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3168; AVX1-NEXT: retq 3169; 3170; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu: 3171; AVX2: # BB#0: 3172; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero 3173; AVX2-NEXT: retq 3174 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef> 3175 ret <16 x i16> %shuffle 3176} 3177 3178define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) { 3179; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 3180; AVX1: # BB#0: 3181; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3182; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3183; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3184; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3185; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11] 3186; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 3187; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7] 3188; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3189; AVX1-NEXT: retq 3190; 3191; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: 3192; AVX2: # BB#0: 3193; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 3194; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3195; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] 3196; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3197; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] 3198; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3199; AVX2-NEXT: retq 3200 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26> 3201 ret <16 x i16> %shuffle 3202} 3203 3204define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) { 3205; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu: 3206; AVX1: # BB#0: 3207; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3208; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3209; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5] 3210; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 3211; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3212; AVX1-NEXT: retq 3213; 3214; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu: 3215; AVX2: # BB#0: 3216; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21] 3217; AVX2-NEXT: retq 3218 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef> 3219 ret <16 x i16> %shuffle 3220} 3221 3222define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) { 3223; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 3224; AVX1: # BB#0: 3225; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3226; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3227; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3228; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 3229; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 3230; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 3231; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7] 3232; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3233; AVX1-NEXT: retq 3234; 3235; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: 3236; AVX2: # BB#0: 3237; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15] 3238; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3239; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] 3240; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3241; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] 3242; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3243; AVX2-NEXT: retq 3244 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28> 3245 ret <16 x i16> %shuffle 3246} 3247 3248define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) { 3249; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu: 3250; AVX1: # BB#0: 3251; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3252; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3253; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9] 3254; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 3255; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3256; AVX1-NEXT: retq 3257; 3258; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu: 3259; AVX2: # BB#0: 3260; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25] 3261; AVX2-NEXT: retq 3262 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef> 3263 ret <16 x i16> %shuffle 3264} 3265 3266define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) { 3267; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 3268; AVX1: # BB#0: 3269; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3270; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 3271; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3272; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4] 3273; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3274; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 3275; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] 3276; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3277; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3278; AVX1-NEXT: retq 3279; 3280; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: 3281; AVX2: # BB#0: 3282; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15] 3283; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31] 3284; AVX2-NEXT: retq 3285 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef> 3286 ret <16 x i16> %shuffle 3287} 3288 3289define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) { 3290; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19: 3291; AVX1: # BB#0: 3292; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3293; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3294; AVX1-NEXT: retq 3295; 3296; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19: 3297; AVX2: # BB#0: 3298; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3299; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 3300; AVX2-NEXT: retq 3301 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19> 3302 ret <16 x i16> %shuffle 3303} 3304 3305define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) { 3306; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 3307; AVX1: # BB#0: 3308; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 3309; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3310; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3311; AVX1-NEXT: retq 3312; 3313; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: 3314; AVX2: # BB#0: 3315; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 3316; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3317; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 3318; AVX2-NEXT: retq 3319 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 3320 ret <16 x i16> %shuffle 3321} 3322 3323define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) { 3324; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: 3325; AVX1: # BB#0: 3326; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3327; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3328; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3329; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3330; AVX1-NEXT: retq 3331; 3332; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: 3333; AVX2: # BB#0: 3334; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 3335; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 3336; AVX2-NEXT: retq 3337 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 3338 ret <16 x i16> %shuffle 3339} 3340 3341define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) { 3342; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u: 3343; ALL: # BB#0: 3344; ALL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3345; ALL-NEXT: retq 3346 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 3347 ret <16 x i16> %shuffle 3348} 3349 3350define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) { 3351; ALL-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u: 3352; ALL: # BB#0: 3353; ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] 3354; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3355; ALL-NEXT: retq 3356 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 3357 ret <16 x i16> %shuffle 3358} 3359 3360define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) { 3361; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 3362; AVX1: # BB#0: 3363; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3364; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 3365; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3366; AVX1-NEXT: retq 3367; 3368; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: 3369; AVX2: # BB#0: 3370; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 3371; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 3372; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3373; AVX2-NEXT: retq 3374 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 3375 ret <16 x i16> %shuffle 3376} 3377 3378define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) { 3379; ALL-LABEL: insert_v16i16_0elt_into_zero_vector: 3380; ALL: # BB#0: 3381; ALL-NEXT: movzwl (%rdi), %eax 3382; ALL-NEXT: vmovd %eax, %xmm0 3383; ALL-NEXT: retq 3384 %val = load i16, i16* %ptr 3385 %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0 3386 ret <16 x i16> %i0 3387} 3388 3389define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) { 3390; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31: 3391; AVX1: # BB#0: 3392; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 3393; AVX1-NEXT: retq 3394; 3395; AVX2-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31: 3396; AVX2: # BB#0: 3397; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 3398; AVX2-NEXT: retq 3399 %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3400 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3401 %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3402 ret <16 x i16> %shuf 3403} 3404 3405define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) { 3406; ALL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc: 3407; ALL: # BB#0: 3408; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 3409; ALL-NEXT: retq 3410 %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3411 %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3412 %bc0hi = bitcast <8 x i16> %ahi to <16 x i8> 3413 %bc1hi = bitcast <8 x i16> %bhi to <16 x i8> 3414 %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 3415 %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16> 3416 ret <16 x i16> %shuffle16 3417} 3418 3419define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) { 3420; AVX1-LABEL: PR24935: 3421; AVX1: # BB#0: 3422; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1] 3423; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3424; AVX1-NEXT: vpalignr {{.*#+}} xmm4 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 3425; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5,6,7] 3426; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3427; AVX1-NEXT: vpshufhw {{.*#+}} xmm5 = xmm4[0,1,2,3,5,5,6,7] 3428; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm0[2,3,2,3,4,5,6,7,8,9,8,9,0,1,2,3] 3429; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1],xmm6[2,3],xmm5[4],xmm6[5,6,7] 3430; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3],xmm5[4,5,6],xmm2[7] 3431; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 3432; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7] 3433; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7] 3434; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,4,5,4,5,10,11,4,5,14,15,12,13,0,1] 3435; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3,4,5],xmm0[6],xmm1[7] 3436; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3437; AVX1-NEXT: retq 3438; 3439; AVX2-LABEL: PR24935: 3440; AVX2: # BB#0: 3441; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 3442; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[6,7,4,5,0,1,10,11,4,5,10,11,4,5,6,7,22,23,20,21,16,17,26,27,20,21,26,27,20,21,22,23] 3443; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9,10,11,4,5,8,9,0,1,14,15,12,13,0,1,24,25,26,27,20,21,24,25,16,17,30,31,28,29,16,17] 3444; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,u,u,255,255,255,255,0,0,u,u,0,0,u,u,u,u,255,255,0,0,u,u,u,u,u,u,0,0> 3445; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1 3446; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 3447; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u] 3448; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15] 3449; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,6,7,8,9,10,11,13,13,14,15] 3450; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1,2],ymm2[3],ymm0[4],ymm2[5,6,7,8],ymm0[9,10],ymm2[11],ymm0[12],ymm2[13,14,15] 3451; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255] 3452; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 3453; AVX2-NEXT: retq 3454 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 27, i32 26, i32 1, i32 29, i32 26, i32 23, i32 11, i32 16, i32 1, i32 9, i32 16, i32 28, i32 13, i32 4, i32 0, i32 24> 3455 ret <16 x i16> %shuffle 3456} 3457 3458define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) { 3459; AVX1-LABEL: insert_dup_mem_v16i16_i32: 3460; AVX1: # BB#0: 3461; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3462; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3463; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3464; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3465; AVX1-NEXT: retq 3466; 3467; AVX2-LABEL: insert_dup_mem_v16i16_i32: 3468; AVX2: # BB#0: 3469; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 3470; AVX2-NEXT: retq 3471 %tmp = load i32, i32* %ptr, align 4 3472 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 3473 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 3474 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer 3475 ret <16 x i16> %tmp3 3476} 3477 3478define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) { 3479; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16: 3480; AVX1: # BB#0: 3481; AVX1-NEXT: movswl (%rdi), %eax 3482; AVX1-NEXT: vmovd %eax, %xmm0 3483; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3484; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3485; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3486; AVX1-NEXT: retq 3487; 3488; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16: 3489; AVX2: # BB#0: 3490; AVX2-NEXT: movswl (%rdi), %eax 3491; AVX2-NEXT: vmovd %eax, %xmm0 3492; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 3493; AVX2-NEXT: retq 3494 %tmp = load i16, i16* %ptr, align 2 3495 %tmp1 = sext i16 %tmp to i32 3496 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 3497 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16> 3498 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer 3499 ret <16 x i16> %tmp4 3500} 3501 3502define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 { 3503; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32: 3504; AVX1: # BB#0: 3505; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3506; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 3507; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 3508; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3509; AVX1-NEXT: retq 3510; 3511; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32: 3512; AVX2: # BB#0: 3513; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0 3514; AVX2-NEXT: retq 3515 %tmp = load i32, i32* %ptr, align 4 3516 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 3517 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 3518 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3519 ret <16 x i16> %tmp3 3520} 3521 3522define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 { 3523; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32: 3524; AVX1: # BB#0: 3525; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3526; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] 3527; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 3528; AVX1-NEXT: retq 3529; 3530; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32: 3531; AVX2: # BB#0: 3532; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0 3533; AVX2-NEXT: retq 3534 %tmp = load i32, i32* %ptr, align 4 3535 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1 3536 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16> 3537 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 3538 ret <16 x i16> %tmp3 3539} 3540