1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-unknown" 10 11define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { 12; FIXME: SSE2 should look like the following: 13; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00 14; FIXME: # BB#0: 15; FIXME-NEXT: punpcklbw %xmm0, %xmm0 16; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] 17; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] 18; FIXME-NEXT: retq 19; 20; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 21; SSE2: # BB#0: 22; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 23; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 24; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 25; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 26; SSE2-NEXT: retq 27; 28; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 29; SSSE3: # BB#0: 30; SSSE3-NEXT: pxor %xmm1, %xmm1 31; SSSE3-NEXT: pshufb %xmm1, %xmm0 32; SSSE3-NEXT: retq 33; 34; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 35; SSE41: # BB#0: 36; SSE41-NEXT: pxor %xmm1, %xmm1 37; SSE41-NEXT: pshufb %xmm1, %xmm0 38; SSE41-NEXT: retq 39; 40; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 41; AVX1: # BB#0: 42; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 43; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 44; AVX1-NEXT: retq 45; 46; AVX2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 47; AVX2: # BB#0: 48; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 49; AVX2-NEXT: retq 50 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 51 ret <16 x i8> %shuffle 52} 53 54define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) { 55; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 56; SSE2: # BB#0: 57; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 58; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 59; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 60; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 61; SSE2-NEXT: retq 62; 63; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 64; SSSE3: # BB#0: 65; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 66; SSSE3-NEXT: retq 67; 68; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 69; SSE41: # BB#0: 70; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 71; SSE41-NEXT: retq 72; 73; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 74; AVX: # BB#0: 75; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 76; AVX-NEXT: retq 77 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 78 ret <16 x i8> %shuffle 79} 80 81define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { 82; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 83; SSE2: # BB#0: 84; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 85; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,2,4,5,6,7] 86; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 87; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 88; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6] 89; SSE2-NEXT: retq 90; 91; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 92; SSSE3: # BB#0: 93; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 94; SSSE3-NEXT: retq 95; 96; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 97; SSE41: # BB#0: 98; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 99; SSE41-NEXT: retq 100; 101; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 102; AVX: # BB#0: 103; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 104; AVX-NEXT: retq 105 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 106 ret <16 x i8> %shuffle 107} 108 109define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) { 110; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 111; SSE: # BB#0: 112; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 113; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 114; SSE-NEXT: retq 115; 116; AVX-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 117; AVX: # BB#0: 118; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 119; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 120; AVX-NEXT: retq 121 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> 122 ret <16 x i8> %shuffle 123} 124 125define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) { 126; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 127; SSE: # BB#0: 128; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 129; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 130; SSE-NEXT: retq 131; 132; AVX-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 133; AVX: # BB#0: 134; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 135; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 136; AVX-NEXT: retq 137 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> 138 ret <16 x i8> %shuffle 139} 140 141define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { 142; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 143; SSE2: # BB#0: 144; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 145; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 146; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 147; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 148; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 149; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 150; SSE2-NEXT: retq 151; 152; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 153; SSSE3: # BB#0: 154; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 155; SSSE3-NEXT: retq 156; 157; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 158; SSE41: # BB#0: 159; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 160; SSE41-NEXT: retq 161; 162; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 163; AVX: # BB#0: 164; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 165; AVX-NEXT: retq 166 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 167 ret <16 x i8> %shuffle 168} 169 170define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) { 171; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: 172; SSE: # BB#0: 173; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 174; SSE-NEXT: retq 175; 176; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: 177; AVX: # BB#0: 178; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 179; AVX-NEXT: retq 180 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 181 ret <16 x i8> %shuffle 182} 183 184define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { 185; FIXME: SSE2 should be the following: 186; FIXME-LABEL: @shuffle_v16i8_0101010101010101 187; FIXME: # BB#0: 188; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7] 189; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1] 190; FIXME-NEXT: retq 191; 192; SSE2-LABEL: shuffle_v16i8_0101010101010101: 193; SSE2: # BB#0: 194; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 195; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 196; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 197; SSE2-NEXT: retq 198; 199; SSSE3-LABEL: shuffle_v16i8_0101010101010101: 200; SSSE3: # BB#0: 201; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 202; SSSE3-NEXT: retq 203; 204; SSE41-LABEL: shuffle_v16i8_0101010101010101: 205; SSE41: # BB#0: 206; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 207; SSE41-NEXT: retq 208; 209; AVX1-LABEL: shuffle_v16i8_0101010101010101: 210; AVX1: # BB#0: 211; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 212; AVX1-NEXT: retq 213; 214; AVX2-LABEL: shuffle_v16i8_0101010101010101: 215; AVX2: # BB#0: 216; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 217; AVX2-NEXT: retq 218 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 219 ret <16 x i8> %shuffle 220} 221 222define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) { 223; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 224; SSE: # BB#0: 225; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 226; SSE-NEXT: retq 227; 228; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 229; AVX: # BB#0: 230; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 231; AVX-NEXT: retq 232 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 233 ret <16 x i8> %shuffle 234} 235 236define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) { 237; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31: 238; SSE: # BB#0: 239; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 240; SSE-NEXT: retq 241; 242; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31: 243; AVX: # BB#0: 244; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 245; AVX-NEXT: retq 246 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 247 ret <16 x i8> %shuffle 248} 249 250define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { 251; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 252; SSE2: # BB#0: 253; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 254; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 255; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] 256; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 257; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] 258; SSE2-NEXT: pand %xmm2, %xmm1 259; SSE2-NEXT: pandn %xmm0, %xmm2 260; SSE2-NEXT: por %xmm1, %xmm2 261; SSE2-NEXT: movdqa %xmm2, %xmm0 262; SSE2-NEXT: retq 263; 264; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 265; SSSE3: # BB#0: 266; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 267; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 268; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 269; SSSE3-NEXT: movdqa %xmm1, %xmm0 270; SSSE3-NEXT: retq 271; 272; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 273; SSE41: # BB#0: 274; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 275; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 276; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 277; SSE41-NEXT: movdqa %xmm1, %xmm0 278; SSE41-NEXT: retq 279; 280; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 281; AVX1: # BB#0: 282; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 283; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 284; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 285; AVX1-NEXT: retq 286; 287; AVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 288; AVX2: # BB#0: 289; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 290; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 291; AVX2-NEXT: retq 292 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7> 293 ret <16 x i8> %shuffle 294} 295 296define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) { 297; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 298; SSE2: # BB#0: 299; SSE2-NEXT: pxor %xmm1, %xmm1 300; SSE2-NEXT: movdqa %xmm0, %xmm2 301; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 302; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 303; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] 304; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 305; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 306; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 307; SSE2-NEXT: packuswb %xmm2, %xmm0 308; SSE2-NEXT: retq 309; 310; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 311; SSSE3: # BB#0: 312; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 313; SSSE3-NEXT: retq 314; 315; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 316; SSE41: # BB#0: 317; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 318; SSE41-NEXT: retq 319; 320; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 321; AVX: # BB#0: 322; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 323; AVX-NEXT: retq 324 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 325 ret <16 x i8> %shuffle 326} 327 328define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 329; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 330; SSE2: # BB#0: 331; SSE2-NEXT: pxor %xmm2, %xmm2 332; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 333; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 334; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] 335; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 336; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 337; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 338; SSE2-NEXT: packuswb %xmm1, %xmm0 339; SSE2-NEXT: retq 340; 341; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 342; SSSE3: # BB#0: 343; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 344; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 345; SSSE3-NEXT: retq 346; 347; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 348; SSE41: # BB#0: 349; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 350; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 351; SSE41-NEXT: retq 352; 353; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 354; AVX: # BB#0: 355; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 356; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 357; AVX-NEXT: retq 358 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20> 359 ret <16 x i8> %shuffle 360} 361 362define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 363; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 364; SSE2: # BB#0: 365; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 366; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 367; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 368; SSE2-NEXT: pxor %xmm1, %xmm1 369; SSE2-NEXT: movdqa %xmm0, %xmm2 370; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 371; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7] 372; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 373; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 374; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 375; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] 376; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 377; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] 378; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 379; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 380; SSE2-NEXT: packuswb %xmm3, %xmm0 381; SSE2-NEXT: retq 382; 383; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 384; SSSE3: # BB#0: 385; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 386; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 387; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 388; SSSE3-NEXT: retq 389; 390; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 391; SSE41: # BB#0: 392; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 393; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 394; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 395; SSE41-NEXT: retq 396; 397; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 398; AVX: # BB#0: 399; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 400; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 401; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 402; AVX-NEXT: retq 403 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20> 404 ret <16 x i8> %shuffle 405} 406 407define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) { 408; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 409; SSE2: # BB#0: 410; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 411; SSE2-NEXT: andps %xmm2, %xmm0 412; SSE2-NEXT: andnps %xmm1, %xmm2 413; SSE2-NEXT: orps %xmm2, %xmm0 414; SSE2-NEXT: retq 415; 416; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 417; SSSE3: # BB#0: 418; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] 419; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 420; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 421; SSSE3-NEXT: retq 422; 423; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 424; SSE41: # BB#0: 425; SSE41-NEXT: movdqa %xmm0, %xmm2 426; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 427; SSE41-NEXT: pblendvb %xmm2, %xmm1 428; SSE41-NEXT: movdqa %xmm1, %xmm0 429; SSE41-NEXT: retq 430; 431; AVX-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 432; AVX: # BB#0: 433; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 434; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 435; AVX-NEXT: retq 436 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 437 ret <16 x i8> %shuffle 438} 439 440define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) { 441; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 442; SSE2: # BB#0: 443; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 444; SSE2-NEXT: andps %xmm2, %xmm0 445; SSE2-NEXT: andnps %xmm1, %xmm2 446; SSE2-NEXT: orps %xmm2, %xmm0 447; SSE2-NEXT: retq 448; 449; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 450; SSSE3: # BB#0: 451; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15] 452; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero 453; SSSE3-NEXT: por %xmm1, %xmm0 454; SSSE3-NEXT: retq 455; 456; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 457; SSE41: # BB#0: 458; SSE41-NEXT: movdqa %xmm0, %xmm2 459; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 460; SSE41-NEXT: pblendvb %xmm2, %xmm1 461; SSE41-NEXT: movdqa %xmm1, %xmm0 462; SSE41-NEXT: retq 463; 464; AVX-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 465; AVX: # BB#0: 466; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 467; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 468; AVX-NEXT: retq 469 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 470 ret <16 x i8> %shuffle 471} 472 473define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) { 474; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 475; SSE: # BB#0: 476; SSE-NEXT: andps {{.*}}(%rip), %xmm0 477; SSE-NEXT: retq 478; 479; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 480; AVX: # BB#0: 481; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 482; AVX-NEXT: retq 483 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 484 ret <16 x i8> %shuffle 485} 486 487define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) { 488; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 489; SSE2: # BB#0: 490; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 491; SSE2-NEXT: andps %xmm2, %xmm0 492; SSE2-NEXT: andnps %xmm1, %xmm2 493; SSE2-NEXT: orps %xmm2, %xmm0 494; SSE2-NEXT: retq 495; 496; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 497; SSSE3: # BB#0: 498; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15] 499; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero 500; SSSE3-NEXT: por %xmm1, %xmm0 501; SSSE3-NEXT: retq 502; 503; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 504; SSE41: # BB#0: 505; SSE41-NEXT: movdqa %xmm0, %xmm2 506; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 507; SSE41-NEXT: pblendvb %xmm2, %xmm1 508; SSE41-NEXT: movdqa %xmm1, %xmm0 509; SSE41-NEXT: retq 510; 511; AVX-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 512; AVX: # BB#0: 513; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 514; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 515; AVX-NEXT: retq 516 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31> 517 ret <16 x i8> %shuffle 518} 519 520define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) { 521; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 522; SSE2: # BB#0: 523; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 524; SSE2-NEXT: andps %xmm2, %xmm1 525; SSE2-NEXT: andnps %xmm0, %xmm2 526; SSE2-NEXT: orps %xmm1, %xmm2 527; SSE2-NEXT: movaps %xmm2, %xmm0 528; SSE2-NEXT: retq 529; 530; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 531; SSSE3: # BB#0: 532; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15] 533; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero 534; SSSE3-NEXT: por %xmm1, %xmm0 535; SSSE3-NEXT: retq 536; 537; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 538; SSE41: # BB#0: 539; SSE41-NEXT: movdqa %xmm0, %xmm2 540; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 541; SSE41-NEXT: pblendvb %xmm1, %xmm2 542; SSE41-NEXT: movdqa %xmm2, %xmm0 543; SSE41-NEXT: retq 544; 545; AVX-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 546; AVX: # BB#0: 547; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 548; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 549; AVX-NEXT: retq 550 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15> 551 ret <16 x i8> %shuffle 552} 553 554define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { 555; SSE2-LABEL: trunc_v4i32_shuffle: 556; SSE2: # BB#0: 557; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 558; SSE2-NEXT: packuswb %xmm0, %xmm0 559; SSE2-NEXT: packuswb %xmm0, %xmm0 560; SSE2-NEXT: retq 561; 562; SSSE3-LABEL: trunc_v4i32_shuffle: 563; SSSE3: # BB#0: 564; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 565; SSSE3-NEXT: retq 566; 567; SSE41-LABEL: trunc_v4i32_shuffle: 568; SSE41: # BB#0: 569; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 570; SSE41-NEXT: retq 571; 572; AVX-LABEL: trunc_v4i32_shuffle: 573; AVX: # BB#0: 574; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 575; AVX-NEXT: retq 576 %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 577 ret <16 x i8> %shuffle 578} 579 580define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) { 581; We don't have anything useful to check here. This generates 100s of 582; instructions. Instead, just make sure we survived codegen. 583; ALL-LABEL: stress_test0: 584; ALL: retq 585entry: 586 %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6> 587 %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28> 588 %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8> 589 %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29> 590 %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29> 591 %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17> 592 %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23> 593 %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17> 594 %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 undef, i32 20, i32 undef, i32 3, i32 27, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 595 %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 undef, i32 undef, i32 31, i32 1, i32 undef, i32 10> 596 ret <16 x i8> %s.16.0 597} 598 599define <16 x i8> @undef_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind { 600; There is nothing interesting to check about these instructions other than 601; that they survive codegen. However, we actually do better and delete all of 602; them because the result is 'undef'. 603; 604; ALL-LABEL: undef_test1: 605; ALL: # BB#0: # %entry 606; ALL-NEXT: retq 607entry: 608 %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> <i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 6, i32 undef, i32 6, i32 undef, i32 14, i32 14, i32 undef, i32 undef, i32 0> 609 %s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 undef, i32 undef, i32 19, i32 undef, i32 undef, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 undef, i32 20, i32 22> 610 %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> undef, <16 x i32> <i32 3, i32 8, i32 undef, i32 7, i32 undef, i32 10, i32 8, i32 0, i32 15, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 9> 611 %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> undef, <16 x i32> <i32 7, i32 undef, i32 14, i32 7, i32 8, i32 undef, i32 7, i32 8, i32 5, i32 15, i32 undef, i32 1, i32 11, i32 undef, i32 undef, i32 11> 612 %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29> 613 %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 undef> 614 %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10> 615 %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 10, i32 undef, i32 0, i32 5, i32 undef, i32 9, i32 undef> 616 %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 617 %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 28, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 618 %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 5> 619 %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 620 %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef> 621 622 ret <16 x i8> %s.12.4 623} 624 625define <16 x i8> @PR20540(<8 x i8> %a) { 626; SSE2-LABEL: PR20540: 627; SSE2: # BB#0: 628; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 629; SSE2-NEXT: packuswb %xmm0, %xmm0 630; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 631; SSE2-NEXT: retq 632; 633; SSSE3-LABEL: PR20540: 634; SSSE3: # BB#0: 635; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 636; SSSE3-NEXT: retq 637; 638; SSE41-LABEL: PR20540: 639; SSE41: # BB#0: 640; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 641; SSE41-NEXT: retq 642; 643; AVX-LABEL: PR20540: 644; AVX: # BB#0: 645; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 646; AVX-NEXT: retq 647 %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 648 ret <16 x i8> %shuffle 649} 650 651define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 652; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 653; SSE: # BB#0: 654; SSE-NEXT: movzbl %dil, %eax 655; SSE-NEXT: movd %eax, %xmm0 656; SSE-NEXT: retq 657; 658; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 659; AVX: # BB#0: 660; AVX-NEXT: movzbl %dil, %eax 661; AVX-NEXT: vmovd %eax, %xmm0 662; AVX-NEXT: retq 663 %a = insertelement <16 x i8> undef, i8 %i, i32 0 664 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 665 ret <16 x i8> %shuffle 666} 667 668define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 669; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 670; SSE2: # BB#0: 671; SSE2-NEXT: shll $8, %edi 672; SSE2-NEXT: pxor %xmm0, %xmm0 673; SSE2-NEXT: pinsrw $2, %edi, %xmm0 674; SSE2-NEXT: retq 675; 676; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 677; SSSE3: # BB#0: 678; SSSE3-NEXT: shll $8, %edi 679; SSSE3-NEXT: pxor %xmm0, %xmm0 680; SSSE3-NEXT: pinsrw $2, %edi, %xmm0 681; SSSE3-NEXT: retq 682; 683; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 684; SSE41: # BB#0: 685; SSE41-NEXT: pxor %xmm0, %xmm0 686; SSE41-NEXT: pinsrb $5, %edi, %xmm0 687; SSE41-NEXT: retq 688; 689; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 690; AVX: # BB#0: 691; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 692; AVX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 693; AVX-NEXT: retq 694 %a = insertelement <16 x i8> undef, i8 %i, i32 0 695 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 696 ret <16 x i8> %shuffle 697} 698 699define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) { 700; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 701; SSE2: # BB#0: 702; SSE2-NEXT: shll $8, %edi 703; SSE2-NEXT: pxor %xmm0, %xmm0 704; SSE2-NEXT: pinsrw $7, %edi, %xmm0 705; SSE2-NEXT: retq 706; 707; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 708; SSSE3: # BB#0: 709; SSSE3-NEXT: shll $8, %edi 710; SSSE3-NEXT: pxor %xmm0, %xmm0 711; SSSE3-NEXT: pinsrw $7, %edi, %xmm0 712; SSSE3-NEXT: retq 713; 714; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 715; SSE41: # BB#0: 716; SSE41-NEXT: pxor %xmm0, %xmm0 717; SSE41-NEXT: pinsrb $15, %edi, %xmm0 718; SSE41-NEXT: retq 719; 720; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 721; AVX: # BB#0: 722; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 723; AVX-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 724; AVX-NEXT: retq 725 %a = insertelement <16 x i8> undef, i8 %i, i32 0 726 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16> 727 ret <16 x i8> %shuffle 728} 729 730define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 731; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 732; SSE2: # BB#0: 733; SSE2-NEXT: movzbl %dil, %eax 734; SSE2-NEXT: pxor %xmm0, %xmm0 735; SSE2-NEXT: pinsrw $1, %eax, %xmm0 736; SSE2-NEXT: retq 737; 738; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 739; SSSE3: # BB#0: 740; SSSE3-NEXT: movzbl %dil, %eax 741; SSSE3-NEXT: pxor %xmm0, %xmm0 742; SSSE3-NEXT: pinsrw $1, %eax, %xmm0 743; SSSE3-NEXT: retq 744; 745; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 746; SSE41: # BB#0: 747; SSE41-NEXT: pxor %xmm0, %xmm0 748; SSE41-NEXT: pinsrb $2, %edi, %xmm0 749; SSE41-NEXT: retq 750; 751; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 752; AVX: # BB#0: 753; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 754; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0 755; AVX-NEXT: retq 756 %a = insertelement <16 x i8> undef, i8 %i, i32 3 757 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 758 ret <16 x i8> %shuffle 759} 760 761define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) { 762; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu: 763; SSE: # BB#0: 764; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 765; SSE-NEXT: retq 766; 767; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu: 768; AVX: # BB#0: 769; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 770; AVX-NEXT: retq 771 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef> 772 ret <16 x i8> %shuffle 773} 774 775define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 776; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 777; SSE: # BB#0: 778; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 779; SSE-NEXT: retq 780; 781; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 782; AVX: # BB#0: 783; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 784; AVX-NEXT: retq 785 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 undef, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0> 786 ret <16 x i8> %shuffle 787} 788 789define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 790; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 791; SSE2: # BB#0: 792; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 793; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 794; SSE2-NEXT: por %xmm1, %xmm0 795; SSE2-NEXT: retq 796; 797; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 798; SSSE3: # BB#0: 799; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 800; SSSE3-NEXT: retq 801; 802; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 803; SSE41: # BB#0: 804; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 805; SSE41-NEXT: retq 806; 807; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 808; AVX: # BB#0: 809; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 810; AVX-NEXT: retq 811 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 812 ret <16 x i8> %shuffle 813} 814 815define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 816; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 817; SSE2: # BB#0: 818; SSE2-NEXT: movdqa %xmm0, %xmm1 819; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 820; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 821; SSE2-NEXT: por %xmm1, %xmm0 822; SSE2-NEXT: retq 823; 824; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 825; SSSE3: # BB#0: 826; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 827; SSSE3-NEXT: retq 828; 829; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 830; SSE41: # BB#0: 831; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 832; SSE41-NEXT: retq 833; 834; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 835; AVX: # BB#0: 836; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 837; AVX-NEXT: retq 838 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 839 ret <16 x i8> %shuffle 840} 841 842define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) { 843; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 844; SSE2: # BB#0: 845; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 846; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 847; SSE2-NEXT: por %xmm1, %xmm0 848; SSE2-NEXT: retq 849; 850; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 851; SSSE3: # BB#0: 852; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 853; SSSE3-NEXT: retq 854; 855; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 856; SSE41: # BB#0: 857; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 858; SSE41-NEXT: retq 859; 860; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 861; AVX: # BB#0: 862; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 863; AVX-NEXT: retq 864 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0> 865 ret <16 x i8> %shuffle 866} 867 868define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) { 869; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 870; SSE2: # BB#0: 871; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 872; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 873; SSE2-NEXT: por %xmm1, %xmm0 874; SSE2-NEXT: retq 875; 876; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 877; SSSE3: # BB#0: 878; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 879; SSSE3-NEXT: movdqa %xmm1, %xmm0 880; SSSE3-NEXT: retq 881; 882; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 883; SSE41: # BB#0: 884; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 885; SSE41-NEXT: movdqa %xmm1, %xmm0 886; SSE41-NEXT: retq 887; 888; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 889; AVX: # BB#0: 890; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 891; AVX-NEXT: retq 892 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 893 ret <16 x i8> %shuffle 894} 895 896define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) { 897; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 898; SSE2: # BB#0: 899; SSE2-NEXT: movdqa %xmm0, %xmm1 900; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 901; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 902; SSE2-NEXT: por %xmm1, %xmm0 903; SSE2-NEXT: retq 904; 905; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 906; SSSE3: # BB#0: 907; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 908; SSSE3-NEXT: retq 909; 910; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 911; SSE41: # BB#0: 912; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 913; SSE41-NEXT: retq 914; 915; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 916; AVX: # BB#0: 917; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 918; AVX-NEXT: retq 919 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0> 920 ret <16 x i8> %shuffle 921} 922 923define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) { 924; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 925; SSE2: # BB#0: 926; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 927; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 928; SSE2-NEXT: por %xmm1, %xmm0 929; SSE2-NEXT: retq 930; 931; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 932; SSSE3: # BB#0: 933; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 934; SSSE3-NEXT: movdqa %xmm1, %xmm0 935; SSSE3-NEXT: retq 936; 937; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 938; SSE41: # BB#0: 939; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 940; SSE41-NEXT: movdqa %xmm1, %xmm0 941; SSE41-NEXT: retq 942; 943; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 944; AVX: # BB#0: 945; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 946; AVX-NEXT: retq 947 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 948 ret <16 x i8> %shuffle 949} 950 951define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) { 952; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 953; SSE2: # BB#0: 954; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 955; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 956; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1] 957; SSE2-NEXT: retq 958; 959; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 960; SSSE3: # BB#0: 961; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 962; SSSE3-NEXT: retq 963; 964; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 965; SSE41: # BB#0: 966; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 967; SSE41-NEXT: retq 968; 969; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 970; AVX: # BB#0: 971; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 972; AVX-NEXT: retq 973 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 974 ret <16 x i8> %shuffle 975} 976 977define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 978; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 979; SSE2: # BB#0: 980; SSE2-NEXT: pxor %xmm1, %xmm1 981; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 982; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 983; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 984; SSE2-NEXT: retq 985; 986; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 987; SSSE3: # BB#0: 988; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 989; SSSE3-NEXT: retq 990; 991; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 992; SSE41: # BB#0: 993; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 994; SSE41-NEXT: retq 995; 996; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 997; AVX: # BB#0: 998; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 999; AVX-NEXT: retq 1000 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 1001 ret <16 x i8> %shuffle 1002} 1003 1004define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) { 1005; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1006; SSE2: # BB#0: 1007; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1008; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1009; SSE2-NEXT: retq 1010; 1011; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1012; SSSE3: # BB#0: 1013; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1014; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1015; SSSE3-NEXT: retq 1016; 1017; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1018; SSE41: # BB#0: 1019; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1020; SSE41-NEXT: retq 1021; 1022; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 1023; AVX: # BB#0: 1024; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1025; AVX-NEXT: retq 1026 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef> 1027 ret <16 x i8> %shuffle 1028} 1029 1030define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) { 1031; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1032; SSE2: # BB#0: 1033; SSE2-NEXT: pxor %xmm1, %xmm1 1034; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1035; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1036; SSE2-NEXT: retq 1037; 1038; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1039; SSSE3: # BB#0: 1040; SSSE3-NEXT: pxor %xmm1, %xmm1 1041; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1042; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1043; SSSE3-NEXT: retq 1044; 1045; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1046; SSE41: # BB#0: 1047; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1048; SSE41-NEXT: retq 1049; 1050; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1051; AVX: # BB#0: 1052; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1053; AVX-NEXT: retq 1054 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31> 1055 ret <16 x i8> %shuffle 1056} 1057 1058define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) { 1059; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1060; SSE2: # BB#0: 1061; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1062; SSE2-NEXT: retq 1063; 1064; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1065; SSSE3: # BB#0: 1066; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1067; SSSE3-NEXT: retq 1068; 1069; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1070; SSE41: # BB#0: 1071; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1072; SSE41-NEXT: retq 1073; 1074; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1075; AVX: # BB#0: 1076; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1077; AVX-NEXT: retq 1078 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef> 1079 ret <16 x i8> %shuffle 1080} 1081 1082define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) { 1083; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1084; SSE2: # BB#0: 1085; SSE2-NEXT: pxor %xmm1, %xmm1 1086; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1087; SSE2-NEXT: retq 1088; 1089; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1090; SSSE3: # BB#0: 1091; SSSE3-NEXT: pxor %xmm1, %xmm1 1092; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1093; SSSE3-NEXT: retq 1094; 1095; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1096; SSE41: # BB#0: 1097; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1098; SSE41-NEXT: retq 1099; 1100; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1101; AVX: # BB#0: 1102; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1103; AVX-NEXT: retq 1104 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31> 1105 ret <16 x i8> %shuffle 1106} 1107 1108define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) { 1109; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1110; SSE2: # BB#0: # %entry 1111; SSE2-NEXT: pxor %xmm2, %xmm2 1112; SSE2-NEXT: movdqa %xmm0, %xmm3 1113; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] 1114; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,3,0,1] 1115; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,2,2,4,5,6,7] 1116; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,7,7] 1117; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535] 1118; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1119; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,3] 1120; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,3,1,3,4,5,6,7] 1121; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,4] 1122; SSE2-NEXT: pand %xmm5, %xmm2 1123; SSE2-NEXT: pandn %xmm4, %xmm5 1124; SSE2-NEXT: por %xmm2, %xmm5 1125; SSE2-NEXT: psrlq $16, %xmm3 1126; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] 1127; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,1,3] 1128; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1129; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4] 1130; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 1131; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1132; SSE2-NEXT: packuswb %xmm5, %xmm2 1133; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 1134; SSE2-NEXT: pand %xmm0, %xmm2 1135; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3] 1136; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,3,3,4,5,6,7] 1137; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,7] 1138; SSE2-NEXT: pandn %xmm1, %xmm0 1139; SSE2-NEXT: por %xmm2, %xmm0 1140; SSE2-NEXT: retq 1141; 1142; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1143; SSSE3: # BB#0: # %entry 1144; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1145; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1146; SSSE3-NEXT: por %xmm1, %xmm0 1147; SSSE3-NEXT: retq 1148; 1149; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1150; SSE41: # BB#0: # %entry 1151; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1152; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1153; SSE41-NEXT: por %xmm1, %xmm0 1154; SSE41-NEXT: retq 1155; 1156; AVX-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1157; AVX: # BB#0: # %entry 1158; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1159; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1160; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 1161; AVX-NEXT: retq 1162entry: 1163 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0> 1164 1165 ret <16 x i8> %shuffle 1166} 1167 1168define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { 1169; Nothing interesting to test here. Just make sure we didn't crashe. 1170; ALL-LABEL: stress_test2: 1171; ALL: retq 1172entry: 1173 %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5> 1174 %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22> 1175 %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 undef, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19> 1176 1177 ret <16 x i8> %s.2.0 1178} 1179 1180define void @constant_gets_selected(<4 x i32>* %ptr1, <4 x i32>* %ptr2) { 1181; SSE-LABEL: constant_gets_selected: 1182; SSE: # BB#0: # %entry 1183; SSE-NEXT: xorps %xmm0, %xmm0 1184; SSE-NEXT: movaps %xmm0, (%rdi) 1185; SSE-NEXT: movaps %xmm0, (%rsi) 1186; SSE-NEXT: retq 1187; 1188; AVX-LABEL: constant_gets_selected: 1189; AVX: # BB#0: # %entry 1190; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1191; AVX-NEXT: vmovaps %xmm0, (%rdi) 1192; AVX-NEXT: vmovaps %xmm0, (%rsi) 1193; AVX-NEXT: retq 1194entry: 1195 %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8> 1196 %shuffle.i = shufflevector <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27> 1197 %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32> 1198 store <4 x i32> %weirder_zero, <4 x i32>* %ptr1, align 16 1199 store <4 x i32> zeroinitializer, <4 x i32>* %ptr2, align 16 1200 ret void 1201} 1202 1203; 1204; Shuffle to logical bit shifts 1205; 1206 1207define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) { 1208; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1209; SSE: # BB#0: 1210; SSE-NEXT: psllw $8, %xmm0 1211; SSE-NEXT: retq 1212; 1213; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1214; AVX: # BB#0: 1215; AVX-NEXT: vpsllw $8, %xmm0, %xmm0 1216; AVX-NEXT: retq 1217 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14> 1218 ret <16 x i8> %shuffle 1219} 1220 1221define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) { 1222; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1223; SSE: # BB#0: 1224; SSE-NEXT: pslld $24, %xmm0 1225; SSE-NEXT: retq 1226; 1227; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1228; AVX: # BB#0: 1229; AVX-NEXT: vpslld $24, %xmm0, %xmm0 1230; AVX-NEXT: retq 1231 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12> 1232 ret <16 x i8> %shuffle 1233} 1234 1235define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) { 1236; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08: 1237; SSE: # BB#0: 1238; SSE-NEXT: psllq $56, %xmm0 1239; SSE-NEXT: retq 1240; 1241; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08: 1242; AVX: # BB#0: 1243; AVX-NEXT: vpsllq $56, %xmm0, %xmm0 1244; AVX-NEXT: retq 1245 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8> 1246 ret <16 x i8> %shuffle 1247} 1248 1249define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 1250; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14: 1251; SSE: # BB#0: 1252; SSE-NEXT: psllq $8, %xmm0 1253; SSE-NEXT: retq 1254; 1255; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14: 1256; AVX: # BB#0: 1257; AVX-NEXT: vpsllq $8, %xmm0, %xmm0 1258; AVX-NEXT: retq 1259 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14> 1260 ret <16 x i8> %shuffle 1261} 1262 1263define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) { 1264; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz: 1265; SSE: # BB#0: 1266; SSE-NEXT: psrlw $8, %xmm0 1267; SSE-NEXT: retq 1268; 1269; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz: 1270; AVX: # BB#0: 1271; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 1272; AVX-NEXT: retq 1273 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16> 1274 ret <16 x i8> %shuffle 1275} 1276 1277define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) { 1278; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz: 1279; SSE: # BB#0: 1280; SSE-NEXT: psrld $16, %xmm0 1281; SSE-NEXT: retq 1282; 1283; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz: 1284; AVX: # BB#0: 1285; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 1286; AVX-NEXT: retq 1287 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16> 1288 ret <16 x i8> %shuffle 1289} 1290 1291define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) { 1292; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz: 1293; SSE: # BB#0: 1294; SSE-NEXT: psrlq $56, %xmm0 1295; SSE-NEXT: retq 1296; 1297; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz: 1298; AVX: # BB#0: 1299; AVX-NEXT: vpsrlq $56, %xmm0, %xmm0 1300; AVX-NEXT: retq 1301 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16> 1302 ret <16 x i8> %shuffle 1303} 1304 1305define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) { 1306; SSE2-LABEL: PR12412: 1307; SSE2: # BB#0: # %entry 1308; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 1309; SSE2-NEXT: pand %xmm2, %xmm1 1310; SSE2-NEXT: pand %xmm2, %xmm0 1311; SSE2-NEXT: packuswb %xmm1, %xmm0 1312; SSE2-NEXT: retq 1313; 1314; SSSE3-LABEL: PR12412: 1315; SSSE3: # BB#0: # %entry 1316; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1317; SSSE3-NEXT: pshufb %xmm2, %xmm1 1318; SSSE3-NEXT: pshufb %xmm2, %xmm0 1319; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1320; SSSE3-NEXT: retq 1321; 1322; SSE41-LABEL: PR12412: 1323; SSE41: # BB#0: # %entry 1324; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1325; SSE41-NEXT: pshufb %xmm2, %xmm1 1326; SSE41-NEXT: pshufb %xmm2, %xmm0 1327; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1328; SSE41-NEXT: retq 1329; 1330; AVX-LABEL: PR12412: 1331; AVX: # BB#0: # %entry 1332; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1333; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1334; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1335; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1336; AVX-NEXT: retq 1337entry: 1338 %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 1339 ret <16 x i8> %0 1340} 1341 1342define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) { 1343; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: 1344; SSE: # BB#0: 1345; SSE-NEXT: psrld $8, %xmm0 1346; SSE-NEXT: retq 1347; 1348; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: 1349; AVX: # BB#0: 1350; AVX-NEXT: vpsrld $8, %xmm0, %xmm0 1351; AVX-NEXT: retq 1352 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16> 1353 ret <16 x i8> %shuffle 1354} 1355 1356define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) { 1357; SSE-LABEL: shuffle_v16i8_bitcast_unpack: 1358; SSE: # BB#0: 1359; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1360; SSE-NEXT: retq 1361; 1362; AVX-LABEL: shuffle_v16i8_bitcast_unpack: 1363; AVX: # BB#0: 1364; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1365; AVX-NEXT: retq 1366 %shuffle8 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16> 1367 %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float> 1368 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1369 %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16> 1370 %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1371 %bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8> 1372 ret <16 x i8> %bitcast8 1373} 1374 1375define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) { 1376; SSE2-LABEL: insert_dup_mem_v16i8_i32: 1377; SSE2: # BB#0: 1378; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1379; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1380; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1381; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1382; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1383; SSE2-NEXT: retq 1384; 1385; SSSE3-LABEL: insert_dup_mem_v16i8_i32: 1386; SSSE3: # BB#0: 1387; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1388; SSSE3-NEXT: pxor %xmm1, %xmm1 1389; SSSE3-NEXT: pshufb %xmm1, %xmm0 1390; SSSE3-NEXT: retq 1391; 1392; SSE41-LABEL: insert_dup_mem_v16i8_i32: 1393; SSE41: # BB#0: 1394; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1395; SSE41-NEXT: pxor %xmm1, %xmm1 1396; SSE41-NEXT: pshufb %xmm1, %xmm0 1397; SSE41-NEXT: retq 1398; 1399; AVX1-LABEL: insert_dup_mem_v16i8_i32: 1400; AVX1: # BB#0: 1401; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1402; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1403; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1404; AVX1-NEXT: retq 1405; 1406; AVX2-LABEL: insert_dup_mem_v16i8_i32: 1407; AVX2: # BB#0: 1408; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 1409; AVX2-NEXT: retq 1410 %tmp = load i32, i32* %ptr, align 4 1411 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1412 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1413 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer 1414 ret <16 x i8> %tmp3 1415} 1416 1417define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) { 1418; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8: 1419; SSE2: # BB#0: 1420; SSE2-NEXT: movsbl (%rdi), %eax 1421; SSE2-NEXT: movd %eax, %xmm0 1422; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1423; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1424; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1425; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1426; SSE2-NEXT: retq 1427; 1428; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8: 1429; SSSE3: # BB#0: 1430; SSSE3-NEXT: movsbl (%rdi), %eax 1431; SSSE3-NEXT: movd %eax, %xmm0 1432; SSSE3-NEXT: pxor %xmm1, %xmm1 1433; SSSE3-NEXT: pshufb %xmm1, %xmm0 1434; SSSE3-NEXT: retq 1435; 1436; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8: 1437; SSE41: # BB#0: 1438; SSE41-NEXT: movsbl (%rdi), %eax 1439; SSE41-NEXT: movd %eax, %xmm0 1440; SSE41-NEXT: pxor %xmm1, %xmm1 1441; SSE41-NEXT: pshufb %xmm1, %xmm0 1442; SSE41-NEXT: retq 1443; 1444; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8: 1445; AVX1: # BB#0: 1446; AVX1-NEXT: movsbl (%rdi), %eax 1447; AVX1-NEXT: vmovd %eax, %xmm0 1448; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1449; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1450; AVX1-NEXT: retq 1451; 1452; AVX2-LABEL: insert_dup_mem_v16i8_sext_i8: 1453; AVX2: # BB#0: 1454; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 1455; AVX2-NEXT: retq 1456 %tmp = load i8, i8* %ptr, align 1 1457 %tmp1 = sext i8 %tmp to i32 1458 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1459 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1460 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer 1461 ret <16 x i8> %tmp4 1462} 1463 1464define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) { 1465; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32: 1466; SSE2: # BB#0: 1467; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1468; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1469; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1470; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 1471; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 1472; SSE2-NEXT: retq 1473; 1474; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32: 1475; SSSE3: # BB#0: 1476; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1477; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1478; SSSE3-NEXT: retq 1479; 1480; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32: 1481; SSE41: # BB#0: 1482; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1483; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1484; SSE41-NEXT: retq 1485; 1486; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32: 1487; AVX1: # BB#0: 1488; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1489; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1490; AVX1-NEXT: retq 1491; 1492; AVX2-LABEL: insert_dup_elt1_mem_v16i8_i32: 1493; AVX2: # BB#0: 1494; AVX2-NEXT: vpbroadcastb 1(%rdi), %xmm0 1495; AVX2-NEXT: retq 1496 %tmp = load i32, i32* %ptr, align 4 1497 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1498 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1499 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1500 ret <16 x i8> %tmp3 1501} 1502 1503define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) { 1504; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32: 1505; SSE2: # BB#0: 1506; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1507; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1508; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1] 1509; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7] 1510; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6] 1511; SSE2-NEXT: retq 1512; 1513; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32: 1514; SSSE3: # BB#0: 1515; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1516; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1517; SSSE3-NEXT: retq 1518; 1519; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32: 1520; SSE41: # BB#0: 1521; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1522; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1523; SSE41-NEXT: retq 1524; 1525; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32: 1526; AVX1: # BB#0: 1527; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1528; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1529; AVX1-NEXT: retq 1530; 1531; AVX2-LABEL: insert_dup_elt2_mem_v16i8_i32: 1532; AVX2: # BB#0: 1533; AVX2-NEXT: vpbroadcastb 2(%rdi), %xmm0 1534; AVX2-NEXT: retq 1535 %tmp = load i32, i32* %ptr, align 4 1536 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1537 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1538 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 1539 ret <16 x i8> %tmp3 1540} 1541 1542define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) { 1543; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1544; SSE2: # BB#0: 1545; SSE2-NEXT: movsbl (%rdi), %eax 1546; SSE2-NEXT: movd %eax, %xmm0 1547; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1548; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] 1549; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 1550; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 1551; SSE2-NEXT: retq 1552; 1553; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1554; SSSE3: # BB#0: 1555; SSSE3-NEXT: movsbl (%rdi), %eax 1556; SSSE3-NEXT: movd %eax, %xmm0 1557; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1558; SSSE3-NEXT: retq 1559; 1560; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1561; SSE41: # BB#0: 1562; SSE41-NEXT: movsbl (%rdi), %eax 1563; SSE41-NEXT: movd %eax, %xmm0 1564; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1565; SSE41-NEXT: retq 1566; 1567; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1568; AVX1: # BB#0: 1569; AVX1-NEXT: movsbl (%rdi), %eax 1570; AVX1-NEXT: vmovd %eax, %xmm0 1571; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1572; AVX1-NEXT: retq 1573; 1574; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1575; AVX2: # BB#0: 1576; AVX2-NEXT: movsbl (%rdi), %eax 1577; AVX2-NEXT: shrl $8, %eax 1578; AVX2-NEXT: vmovd %eax, %xmm0 1579; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 1580; AVX2-NEXT: retq 1581 %tmp = load i8, i8* %ptr, align 1 1582 %tmp1 = sext i8 %tmp to i32 1583 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1584 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1585 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1586 ret <16 x i8> %tmp4 1587} 1588 1589define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) { 1590; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1591; SSE2: # BB#0: 1592; SSE2-NEXT: movsbl (%rdi), %eax 1593; SSE2-NEXT: movd %eax, %xmm0 1594; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1595; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1] 1596; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7] 1597; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6] 1598; SSE2-NEXT: retq 1599; 1600; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1601; SSSE3: # BB#0: 1602; SSSE3-NEXT: movsbl (%rdi), %eax 1603; SSSE3-NEXT: movd %eax, %xmm0 1604; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1605; SSSE3-NEXT: retq 1606; 1607; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1608; SSE41: # BB#0: 1609; SSE41-NEXT: movsbl (%rdi), %eax 1610; SSE41-NEXT: movd %eax, %xmm0 1611; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1612; SSE41-NEXT: retq 1613; 1614; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1615; AVX1: # BB#0: 1616; AVX1-NEXT: movsbl (%rdi), %eax 1617; AVX1-NEXT: vmovd %eax, %xmm0 1618; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1619; AVX1-NEXT: retq 1620; 1621; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1622; AVX2: # BB#0: 1623; AVX2-NEXT: movsbl (%rdi), %eax 1624; AVX2-NEXT: shrl $16, %eax 1625; AVX2-NEXT: vmovd %eax, %xmm0 1626; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 1627; AVX2-NEXT: retq 1628 %tmp = load i8, i8* %ptr, align 1 1629 %tmp1 = sext i8 %tmp to i32 1630 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1631 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1632 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 1633 ret <16 x i8> %tmp4 1634} 1635