1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-unknown" 10 11define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) { 12; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 13; SSE2: # BB#0: 14; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 15; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 16; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 17; SSE2-NEXT: retq 18; 19; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 20; SSSE3: # BB#0: 21; SSSE3-NEXT: pxor %xmm1, %xmm1 22; SSSE3-NEXT: pshufb %xmm1, %xmm0 23; SSSE3-NEXT: retq 24; 25; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 26; SSE41: # BB#0: 27; SSE41-NEXT: pxor %xmm1, %xmm1 28; SSE41-NEXT: pshufb %xmm1, %xmm0 29; SSE41-NEXT: retq 30; 31; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 32; AVX1: # BB#0: 33; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 34; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 35; AVX1-NEXT: retq 36; 37; AVX2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: 38; AVX2: # BB#0: 39; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 40; AVX2-NEXT: retq 41 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 42 ret <16 x i8> %shuffle 43} 44 45define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) { 46; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 47; SSE2: # BB#0: 48; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 49; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 50; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 51; SSE2-NEXT: retq 52; 53; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 54; SSSE3: # BB#0: 55; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 56; SSSE3-NEXT: retq 57; 58; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 59; SSE41: # BB#0: 60; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 61; SSE41-NEXT: retq 62; 63; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01: 64; AVX: # BB#0: 65; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 66; AVX-NEXT: retq 67 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 68 ret <16 x i8> %shuffle 69} 70 71define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) { 72; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 73; SSE2: # BB#0: 74; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 75; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 76; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 77; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 78; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 79; SSE2-NEXT: retq 80; 81; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 82; SSSE3: # BB#0: 83; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 84; SSSE3-NEXT: retq 85; 86; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 87; SSE41: # BB#0: 88; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 89; SSE41-NEXT: retq 90; 91; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08: 92; AVX: # BB#0: 93; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8] 94; AVX-NEXT: retq 95 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 96 ret <16 x i8> %shuffle 97} 98 99define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) { 100; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 101; SSE: # BB#0: 102; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 103; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 104; SSE-NEXT: retq 105; 106; AVX-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03: 107; AVX: # BB#0: 108; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 109; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 110; AVX-NEXT: retq 111 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3> 112 ret <16 x i8> %shuffle 113} 114 115define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) { 116; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 117; SSE: # BB#0: 118; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 119; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 120; SSE-NEXT: retq 121; 122; AVX-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07: 123; AVX: # BB#0: 124; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 125; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 126; AVX-NEXT: retq 127 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7> 128 ret <16 x i8> %shuffle 129} 130 131define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) { 132; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 133; SSE2: # BB#0: 134; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 135; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 136; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 137; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 138; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] 139; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 140; SSE2-NEXT: retq 141; 142; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 143; SSSE3: # BB#0: 144; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 145; SSSE3-NEXT: retq 146; 147; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 148; SSE41: # BB#0: 149; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 150; SSE41-NEXT: retq 151; 152; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12: 153; AVX: # BB#0: 154; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12] 155; AVX-NEXT: retq 156 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12> 157 ret <16 x i8> %shuffle 158} 159 160define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) { 161; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: 162; SSE: # BB#0: 163; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 164; SSE-NEXT: retq 165; 166; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07: 167; AVX: # BB#0: 168; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 169; AVX-NEXT: retq 170 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 171 ret <16 x i8> %shuffle 172} 173 174define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { 175; SSE-LABEL: shuffle_v16i8_0101010101010101: 176; SSE: # BB#0: 177; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 178; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 179; SSE-NEXT: retq 180; 181; AVX1-LABEL: shuffle_v16i8_0101010101010101: 182; AVX1: # BB#0: 183; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 184; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 185; AVX1-NEXT: retq 186; 187; AVX2-LABEL: shuffle_v16i8_0101010101010101: 188; AVX2: # BB#0: 189; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 190; AVX2-NEXT: retq 191 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 192 ret <16 x i8> %shuffle 193} 194 195define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) { 196; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 197; SSE: # BB#0: 198; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 199; SSE-NEXT: retq 200; 201; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: 202; AVX: # BB#0: 203; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 204; AVX-NEXT: retq 205 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 206 ret <16 x i8> %shuffle 207} 208 209define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) { 210; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31: 211; SSE: # BB#0: 212; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 213; SSE-NEXT: retq 214; 215; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31: 216; AVX: # BB#0: 217; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 218; AVX-NEXT: retq 219 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 220 ret <16 x i8> %shuffle 221} 222 223define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) { 224; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 225; SSE2: # BB#0: 226; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 227; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 228; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 229; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 230; SSE2-NEXT: pand %xmm2, %xmm1 231; SSE2-NEXT: pandn %xmm0, %xmm2 232; SSE2-NEXT: por %xmm1, %xmm2 233; SSE2-NEXT: movdqa %xmm2, %xmm0 234; SSE2-NEXT: retq 235; 236; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 237; SSSE3: # BB#0: 238; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 239; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 240; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 241; SSSE3-NEXT: movdqa %xmm1, %xmm0 242; SSSE3-NEXT: retq 243; 244; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 245; SSE41: # BB#0: 246; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 247; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 248; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 249; SSE41-NEXT: movdqa %xmm1, %xmm0 250; SSE41-NEXT: retq 251; 252; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 253; AVX1: # BB#0: 254; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 255; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 256; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 257; AVX1-NEXT: retq 258; 259; AVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07: 260; AVX2: # BB#0: 261; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 262; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 263; AVX2-NEXT: retq 264 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7> 265 ret <16 x i8> %shuffle 266} 267 268define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) { 269; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 270; SSE2: # BB#0: 271; SSE2-NEXT: pxor %xmm1, %xmm1 272; SSE2-NEXT: movdqa %xmm0, %xmm2 273; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 274; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 275; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] 276; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 277; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 278; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 279; SSE2-NEXT: packuswb %xmm2, %xmm0 280; SSE2-NEXT: retq 281; 282; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 283; SSSE3: # BB#0: 284; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 285; SSSE3-NEXT: retq 286; 287; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 288; SSE41: # BB#0: 289; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 290; SSE41-NEXT: retq 291; 292; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12: 293; AVX: # BB#0: 294; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 295; AVX-NEXT: retq 296 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 297 ret <16 x i8> %shuffle 298} 299 300define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 301; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 302; SSE2: # BB#0: 303; SSE2-NEXT: pxor %xmm2, %xmm2 304; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 305; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 306; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] 307; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 308; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 309; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 310; SSE2-NEXT: packuswb %xmm1, %xmm0 311; SSE2-NEXT: retq 312; 313; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 314; SSSE3: # BB#0: 315; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 316; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 317; SSSE3-NEXT: retq 318; 319; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 320; SSE41: # BB#0: 321; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 322; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 323; SSE41-NEXT: retq 324; 325; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20: 326; AVX: # BB#0: 327; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 328; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9] 329; AVX-NEXT: retq 330 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20> 331 ret <16 x i8> %shuffle 332} 333 334define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) { 335; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 336; SSE2: # BB#0: 337; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 338; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 339; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 340; SSE2-NEXT: pxor %xmm1, %xmm1 341; SSE2-NEXT: movdqa %xmm0, %xmm2 342; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 343; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7] 344; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 345; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 346; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 347; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] 348; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 349; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] 350; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7] 351; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 352; SSE2-NEXT: packuswb %xmm3, %xmm0 353; SSE2-NEXT: retq 354; 355; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 356; SSSE3: # BB#0: 357; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 358; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 359; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 360; SSSE3-NEXT: retq 361; 362; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 363; SSE41: # BB#0: 364; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 365; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 366; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 367; SSE41-NEXT: retq 368; 369; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: 370; AVX: # BB#0: 371; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u] 372; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u] 373; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 374; AVX-NEXT: retq 375 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20> 376 ret <16 x i8> %shuffle 377} 378 379define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) { 380; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 381; SSE2: # BB#0: 382; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 383; SSE2-NEXT: andps %xmm2, %xmm0 384; SSE2-NEXT: andnps %xmm1, %xmm2 385; SSE2-NEXT: orps %xmm2, %xmm0 386; SSE2-NEXT: retq 387; 388; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 389; SSSE3: # BB#0: 390; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] 391; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 392; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 393; SSSE3-NEXT: retq 394; 395; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 396; SSE41: # BB#0: 397; SSE41-NEXT: movdqa %xmm0, %xmm2 398; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 399; SSE41-NEXT: pblendvb %xmm2, %xmm1 400; SSE41-NEXT: movdqa %xmm1, %xmm0 401; SSE41-NEXT: retq 402; 403; AVX-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: 404; AVX: # BB#0: 405; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 406; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 407; AVX-NEXT: retq 408 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 409 ret <16 x i8> %shuffle 410} 411 412define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) { 413; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 414; SSE2: # BB#0: 415; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 416; SSE2-NEXT: andps %xmm2, %xmm0 417; SSE2-NEXT: andnps %xmm1, %xmm2 418; SSE2-NEXT: orps %xmm2, %xmm0 419; SSE2-NEXT: retq 420; 421; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 422; SSSE3: # BB#0: 423; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15] 424; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero 425; SSSE3-NEXT: por %xmm1, %xmm0 426; SSSE3-NEXT: retq 427; 428; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 429; SSE41: # BB#0: 430; SSE41-NEXT: movdqa %xmm0, %xmm2 431; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 432; SSE41-NEXT: pblendvb %xmm2, %xmm1 433; SSE41-NEXT: movdqa %xmm1, %xmm0 434; SSE41-NEXT: retq 435; 436; AVX-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: 437; AVX: # BB#0: 438; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] 439; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 440; AVX-NEXT: retq 441 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 442 ret <16 x i8> %shuffle 443} 444 445define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) { 446; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 447; SSE: # BB#0: 448; SSE-NEXT: andps {{.*}}(%rip), %xmm0 449; SSE-NEXT: retq 450; 451; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: 452; AVX: # BB#0: 453; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 454; AVX-NEXT: retq 455 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31> 456 ret <16 x i8> %shuffle 457} 458 459define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) { 460; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 461; SSE2: # BB#0: 462; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 463; SSE2-NEXT: andps %xmm2, %xmm0 464; SSE2-NEXT: andnps %xmm1, %xmm2 465; SSE2-NEXT: orps %xmm2, %xmm0 466; SSE2-NEXT: retq 467; 468; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 469; SSSE3: # BB#0: 470; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15] 471; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero 472; SSSE3-NEXT: por %xmm1, %xmm0 473; SSSE3-NEXT: retq 474; 475; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 476; SSE41: # BB#0: 477; SSE41-NEXT: movdqa %xmm0, %xmm2 478; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 479; SSE41-NEXT: pblendvb %xmm2, %xmm1 480; SSE41-NEXT: movdqa %xmm1, %xmm0 481; SSE41-NEXT: retq 482; 483; AVX-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: 484; AVX: # BB#0: 485; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] 486; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 487; AVX-NEXT: retq 488 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31> 489 ret <16 x i8> %shuffle 490} 491 492define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) { 493; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 494; SSE2: # BB#0: 495; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 496; SSE2-NEXT: andps %xmm2, %xmm1 497; SSE2-NEXT: andnps %xmm0, %xmm2 498; SSE2-NEXT: orps %xmm1, %xmm2 499; SSE2-NEXT: movaps %xmm2, %xmm0 500; SSE2-NEXT: retq 501; 502; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 503; SSSE3: # BB#0: 504; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15] 505; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero 506; SSSE3-NEXT: por %xmm1, %xmm0 507; SSSE3-NEXT: retq 508; 509; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 510; SSE41: # BB#0: 511; SSE41-NEXT: movdqa %xmm0, %xmm2 512; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 513; SSE41-NEXT: pblendvb %xmm1, %xmm2 514; SSE41-NEXT: movdqa %xmm2, %xmm0 515; SSE41-NEXT: retq 516; 517; AVX-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: 518; AVX: # BB#0: 519; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] 520; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 521; AVX-NEXT: retq 522 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15> 523 ret <16 x i8> %shuffle 524} 525 526define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { 527; SSE2-LABEL: trunc_v4i32_shuffle: 528; SSE2: # BB#0: 529; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 530; SSE2-NEXT: packuswb %xmm0, %xmm0 531; SSE2-NEXT: packuswb %xmm0, %xmm0 532; SSE2-NEXT: retq 533; 534; SSSE3-LABEL: trunc_v4i32_shuffle: 535; SSSE3: # BB#0: 536; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 537; SSSE3-NEXT: retq 538; 539; SSE41-LABEL: trunc_v4i32_shuffle: 540; SSE41: # BB#0: 541; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 542; SSE41-NEXT: retq 543; 544; AVX-LABEL: trunc_v4i32_shuffle: 545; AVX: # BB#0: 546; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 547; AVX-NEXT: retq 548 %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 549 ret <16 x i8> %shuffle 550} 551 552define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) { 553; We don't have anything useful to check here. This generates 100s of 554; instructions. Instead, just make sure we survived codegen. 555; ALL-LABEL: stress_test0: 556; ALL: retq 557entry: 558 %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6> 559 %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28> 560 %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8> 561 %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29> 562 %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29> 563 %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17> 564 %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23> 565 %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17> 566 %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 undef, i32 20, i32 undef, i32 3, i32 27, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> 567 %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 undef, i32 undef, i32 31, i32 1, i32 undef, i32 10> 568 ret <16 x i8> %s.16.0 569} 570 571define <16 x i8> @undef_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind { 572; There is nothing interesting to check about these instructions other than 573; that they survive codegen. However, we actually do better and delete all of 574; them because the result is 'undef'. 575; 576; ALL-LABEL: undef_test1: 577; ALL: # BB#0: # %entry 578; ALL-NEXT: retq 579entry: 580 %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> <i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 6, i32 undef, i32 6, i32 undef, i32 14, i32 14, i32 undef, i32 undef, i32 0> 581 %s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 undef, i32 undef, i32 19, i32 undef, i32 undef, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 undef, i32 20, i32 22> 582 %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> undef, <16 x i32> <i32 3, i32 8, i32 undef, i32 7, i32 undef, i32 10, i32 8, i32 0, i32 15, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 9> 583 %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> undef, <16 x i32> <i32 7, i32 undef, i32 14, i32 7, i32 8, i32 undef, i32 7, i32 8, i32 5, i32 15, i32 undef, i32 1, i32 11, i32 undef, i32 undef, i32 11> 584 %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29> 585 %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 undef> 586 %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10> 587 %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 10, i32 undef, i32 0, i32 5, i32 undef, i32 9, i32 undef> 588 %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 589 %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 28, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 590 %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 5> 591 %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 592 %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef> 593 594 ret <16 x i8> %s.12.4 595} 596 597define <16 x i8> @PR20540(<8 x i8> %a) { 598; SSE2-LABEL: PR20540: 599; SSE2: # BB#0: 600; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 601; SSE2-NEXT: packuswb %xmm0, %xmm0 602; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 603; SSE2-NEXT: retq 604; 605; SSSE3-LABEL: PR20540: 606; SSSE3: # BB#0: 607; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 608; SSSE3-NEXT: retq 609; 610; SSE41-LABEL: PR20540: 611; SSE41: # BB#0: 612; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 613; SSE41-NEXT: retq 614; 615; AVX-LABEL: PR20540: 616; AVX: # BB#0: 617; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 618; AVX-NEXT: retq 619 %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 620 ret <16 x i8> %shuffle 621} 622 623define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 624; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 625; SSE: # BB#0: 626; SSE-NEXT: movzbl %dil, %eax 627; SSE-NEXT: movd %eax, %xmm0 628; SSE-NEXT: retq 629; 630; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 631; AVX: # BB#0: 632; AVX-NEXT: movzbl %dil, %eax 633; AVX-NEXT: vmovd %eax, %xmm0 634; AVX-NEXT: retq 635 %a = insertelement <16 x i8> undef, i8 %i, i32 0 636 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 637 ret <16 x i8> %shuffle 638} 639 640define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 641; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 642; SSE2: # BB#0: 643; SSE2-NEXT: shll $8, %edi 644; SSE2-NEXT: pxor %xmm0, %xmm0 645; SSE2-NEXT: pinsrw $2, %edi, %xmm0 646; SSE2-NEXT: retq 647; 648; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 649; SSSE3: # BB#0: 650; SSSE3-NEXT: shll $8, %edi 651; SSSE3-NEXT: pxor %xmm0, %xmm0 652; SSSE3-NEXT: pinsrw $2, %edi, %xmm0 653; SSSE3-NEXT: retq 654; 655; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 656; SSE41: # BB#0: 657; SSE41-NEXT: pxor %xmm0, %xmm0 658; SSE41-NEXT: pinsrb $5, %edi, %xmm0 659; SSE41-NEXT: retq 660; 661; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 662; AVX: # BB#0: 663; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 664; AVX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 665; AVX-NEXT: retq 666 %a = insertelement <16 x i8> undef, i8 %i, i32 0 667 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 668 ret <16 x i8> %shuffle 669} 670 671define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) { 672; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 673; SSE2: # BB#0: 674; SSE2-NEXT: shll $8, %edi 675; SSE2-NEXT: pxor %xmm0, %xmm0 676; SSE2-NEXT: pinsrw $7, %edi, %xmm0 677; SSE2-NEXT: retq 678; 679; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 680; SSSE3: # BB#0: 681; SSSE3-NEXT: shll $8, %edi 682; SSSE3-NEXT: pxor %xmm0, %xmm0 683; SSSE3-NEXT: pinsrw $7, %edi, %xmm0 684; SSSE3-NEXT: retq 685; 686; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 687; SSE41: # BB#0: 688; SSE41-NEXT: pxor %xmm0, %xmm0 689; SSE41-NEXT: pinsrb $15, %edi, %xmm0 690; SSE41-NEXT: retq 691; 692; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: 693; AVX: # BB#0: 694; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 695; AVX-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 696; AVX-NEXT: retq 697 %a = insertelement <16 x i8> undef, i8 %i, i32 0 698 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16> 699 ret <16 x i8> %shuffle 700} 701 702define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) { 703; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 704; SSE2: # BB#0: 705; SSE2-NEXT: movzbl %dil, %eax 706; SSE2-NEXT: pxor %xmm0, %xmm0 707; SSE2-NEXT: pinsrw $1, %eax, %xmm0 708; SSE2-NEXT: retq 709; 710; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 711; SSSE3: # BB#0: 712; SSSE3-NEXT: movzbl %dil, %eax 713; SSSE3-NEXT: pxor %xmm0, %xmm0 714; SSSE3-NEXT: pinsrw $1, %eax, %xmm0 715; SSSE3-NEXT: retq 716; 717; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 718; SSE41: # BB#0: 719; SSE41-NEXT: pxor %xmm0, %xmm0 720; SSE41-NEXT: pinsrb $2, %edi, %xmm0 721; SSE41-NEXT: retq 722; 723; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 724; AVX: # BB#0: 725; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 726; AVX-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0 727; AVX-NEXT: retq 728 %a = insertelement <16 x i8> undef, i8 %i, i32 3 729 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 730 ret <16 x i8> %shuffle 731} 732 733define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) { 734; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu: 735; SSE: # BB#0: 736; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 737; SSE-NEXT: retq 738; 739; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu: 740; AVX: # BB#0: 741; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] 742; AVX-NEXT: retq 743 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef> 744 ret <16 x i8> %shuffle 745} 746 747define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 748; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 749; SSE: # BB#0: 750; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 751; SSE-NEXT: retq 752; 753; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: 754; AVX: # BB#0: 755; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 756; AVX-NEXT: retq 757 %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 undef, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0> 758 ret <16 x i8> %shuffle 759} 760 761define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 762; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 763; SSE2: # BB#0: 764; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 765; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 766; SSE2-NEXT: por %xmm1, %xmm0 767; SSE2-NEXT: retq 768; 769; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 770; SSSE3: # BB#0: 771; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 772; SSSE3-NEXT: retq 773; 774; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 775; SSE41: # BB#0: 776; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 777; SSE41-NEXT: retq 778; 779; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 780; AVX: # BB#0: 781; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 782; AVX-NEXT: retq 783 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 784 ret <16 x i8> %shuffle 785} 786 787define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 788; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 789; SSE2: # BB#0: 790; SSE2-NEXT: movdqa %xmm0, %xmm1 791; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 792; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 793; SSE2-NEXT: por %xmm1, %xmm0 794; SSE2-NEXT: retq 795; 796; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 797; SSSE3: # BB#0: 798; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 799; SSSE3-NEXT: retq 800; 801; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 802; SSE41: # BB#0: 803; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 804; SSE41-NEXT: retq 805; 806; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14: 807; AVX: # BB#0: 808; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 809; AVX-NEXT: retq 810 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 811 ret <16 x i8> %shuffle 812} 813 814define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) { 815; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 816; SSE2: # BB#0: 817; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 818; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 819; SSE2-NEXT: por %xmm1, %xmm0 820; SSE2-NEXT: retq 821; 822; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 823; SSSE3: # BB#0: 824; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 825; SSSE3-NEXT: retq 826; 827; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 828; SSE41: # BB#0: 829; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 830; SSE41-NEXT: retq 831; 832; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00: 833; AVX: # BB#0: 834; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0] 835; AVX-NEXT: retq 836 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0> 837 ret <16 x i8> %shuffle 838} 839 840define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) { 841; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 842; SSE2: # BB#0: 843; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 844; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] 845; SSE2-NEXT: por %xmm1, %xmm0 846; SSE2-NEXT: retq 847; 848; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 849; SSSE3: # BB#0: 850; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 851; SSSE3-NEXT: movdqa %xmm1, %xmm0 852; SSSE3-NEXT: retq 853; 854; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 855; SSE41: # BB#0: 856; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 857; SSE41-NEXT: movdqa %xmm1, %xmm0 858; SSE41-NEXT: retq 859; 860; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16: 861; AVX: # BB#0: 862; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] 863; AVX-NEXT: retq 864 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 865 ret <16 x i8> %shuffle 866} 867 868define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) { 869; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 870; SSE2: # BB#0: 871; SSE2-NEXT: movdqa %xmm0, %xmm1 872; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 873; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] 874; SSE2-NEXT: por %xmm1, %xmm0 875; SSE2-NEXT: retq 876; 877; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 878; SSSE3: # BB#0: 879; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 880; SSSE3-NEXT: retq 881; 882; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 883; SSE41: # BB#0: 884; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 885; SSE41-NEXT: retq 886; 887; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00: 888; AVX: # BB#0: 889; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] 890; AVX-NEXT: retq 891 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0> 892 ret <16 x i8> %shuffle 893} 894 895define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) { 896; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 897; SSE2: # BB#0: 898; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 899; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 900; SSE2-NEXT: por %xmm1, %xmm0 901; SSE2-NEXT: retq 902; 903; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 904; SSSE3: # BB#0: 905; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 906; SSSE3-NEXT: movdqa %xmm1, %xmm0 907; SSSE3-NEXT: retq 908; 909; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 910; SSE41: # BB#0: 911; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 912; SSE41-NEXT: movdqa %xmm1, %xmm0 913; SSE41-NEXT: retq 914; 915; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30: 916; AVX: # BB#0: 917; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 918; AVX-NEXT: retq 919 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 920 ret <16 x i8> %shuffle 921} 922 923define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) { 924; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 925; SSE2: # BB#0: 926; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 927; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 928; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 929; SSE2-NEXT: retq 930; 931; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 932; SSSE3: # BB#0: 933; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 934; SSSE3-NEXT: retq 935; 936; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 937; SSE41: # BB#0: 938; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 939; SSE41-NEXT: retq 940; 941; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu: 942; AVX: # BB#0: 943; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 944; AVX-NEXT: retq 945 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 946 ret <16 x i8> %shuffle 947} 948 949define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) { 950; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 951; SSE2: # BB#0: 952; SSE2-NEXT: pxor %xmm1, %xmm1 953; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 954; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 955; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 956; SSE2-NEXT: retq 957; 958; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 959; SSSE3: # BB#0: 960; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 961; SSSE3-NEXT: retq 962; 963; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 964; SSE41: # BB#0: 965; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 966; SSE41-NEXT: retq 967; 968; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz: 969; AVX: # BB#0: 970; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 971; AVX-NEXT: retq 972 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 973 ret <16 x i8> %shuffle 974} 975 976define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) { 977; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 978; SSE2: # BB#0: 979; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 980; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 981; SSE2-NEXT: retq 982; 983; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 984; SSSE3: # BB#0: 985; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 986; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 987; SSSE3-NEXT: retq 988; 989; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 990; SSE41: # BB#0: 991; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 992; SSE41-NEXT: retq 993; 994; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu: 995; AVX: # BB#0: 996; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 997; AVX-NEXT: retq 998 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef> 999 ret <16 x i8> %shuffle 1000} 1001 1002define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) { 1003; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1004; SSE2: # BB#0: 1005; SSE2-NEXT: pxor %xmm1, %xmm1 1006; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1007; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1008; SSE2-NEXT: retq 1009; 1010; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1011; SSSE3: # BB#0: 1012; SSSE3-NEXT: pxor %xmm1, %xmm1 1013; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1014; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1015; SSSE3-NEXT: retq 1016; 1017; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1018; SSE41: # BB#0: 1019; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1020; SSE41-NEXT: retq 1021; 1022; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz: 1023; AVX: # BB#0: 1024; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1025; AVX-NEXT: retq 1026 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31> 1027 ret <16 x i8> %shuffle 1028} 1029 1030define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) { 1031; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1032; SSE2: # BB#0: 1033; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1034; SSE2-NEXT: retq 1035; 1036; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1037; SSSE3: # BB#0: 1038; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1039; SSSE3-NEXT: retq 1040; 1041; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1042; SSE41: # BB#0: 1043; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1044; SSE41-NEXT: retq 1045; 1046; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu: 1047; AVX: # BB#0: 1048; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1049; AVX-NEXT: retq 1050 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef> 1051 ret <16 x i8> %shuffle 1052} 1053 1054define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) { 1055; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1056; SSE2: # BB#0: 1057; SSE2-NEXT: pxor %xmm1, %xmm1 1058; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1059; SSE2-NEXT: retq 1060; 1061; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1062; SSSE3: # BB#0: 1063; SSSE3-NEXT: pxor %xmm1, %xmm1 1064; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1065; SSSE3-NEXT: retq 1066; 1067; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1068; SSE41: # BB#0: 1069; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1070; SSE41-NEXT: retq 1071; 1072; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz: 1073; AVX: # BB#0: 1074; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1075; AVX-NEXT: retq 1076 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31> 1077 ret <16 x i8> %shuffle 1078} 1079 1080define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) { 1081; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1082; SSE2: # BB#0: # %entry 1083; SSE2-NEXT: pxor %xmm2, %xmm2 1084; SSE2-NEXT: movdqa %xmm0, %xmm3 1085; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] 1086; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,3,0,1] 1087; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,2,2,4,5,6,7] 1088; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,7,7] 1089; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535] 1090; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1091; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,3] 1092; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,3,1,3,4,5,6,7] 1093; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,4] 1094; SSE2-NEXT: pand %xmm5, %xmm2 1095; SSE2-NEXT: pandn %xmm4, %xmm5 1096; SSE2-NEXT: por %xmm2, %xmm5 1097; SSE2-NEXT: psrlq $16, %xmm3 1098; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] 1099; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,1,3] 1100; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7] 1101; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4] 1102; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 1103; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1104; SSE2-NEXT: packuswb %xmm5, %xmm2 1105; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 1106; SSE2-NEXT: pand %xmm0, %xmm2 1107; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,3,1,1,4,5,6,7] 1108; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 1109; SSE2-NEXT: pandn %xmm1, %xmm0 1110; SSE2-NEXT: por %xmm2, %xmm0 1111; SSE2-NEXT: retq 1112; 1113; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1114; SSSE3: # BB#0: # %entry 1115; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1116; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1117; SSSE3-NEXT: por %xmm1, %xmm0 1118; SSSE3-NEXT: retq 1119; 1120; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1121; SSE41: # BB#0: # %entry 1122; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1123; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1124; SSE41-NEXT: por %xmm1, %xmm0 1125; SSE41-NEXT: retq 1126; 1127; AVX-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00: 1128; AVX: # BB#0: # %entry 1129; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero 1130; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] 1131; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 1132; AVX-NEXT: retq 1133entry: 1134 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0> 1135 1136 ret <16 x i8> %shuffle 1137} 1138 1139define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) { 1140; Nothing interesting to test here. Just make sure we didn't crashe. 1141; ALL-LABEL: stress_test2: 1142; ALL: retq 1143entry: 1144 %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5> 1145 %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22> 1146 %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 undef, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19> 1147 1148 ret <16 x i8> %s.2.0 1149} 1150 1151define void @constant_gets_selected(<4 x i32>* %ptr1, <4 x i32>* %ptr2) { 1152; SSE-LABEL: constant_gets_selected: 1153; SSE: # BB#0: # %entry 1154; SSE-NEXT: xorps %xmm0, %xmm0 1155; SSE-NEXT: movaps %xmm0, (%rdi) 1156; SSE-NEXT: movaps %xmm0, (%rsi) 1157; SSE-NEXT: retq 1158; 1159; AVX-LABEL: constant_gets_selected: 1160; AVX: # BB#0: # %entry 1161; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1162; AVX-NEXT: vmovaps %xmm0, (%rdi) 1163; AVX-NEXT: vmovaps %xmm0, (%rsi) 1164; AVX-NEXT: retq 1165entry: 1166 %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8> 1167 %shuffle.i = shufflevector <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27> 1168 %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32> 1169 store <4 x i32> %weirder_zero, <4 x i32>* %ptr1, align 16 1170 store <4 x i32> zeroinitializer, <4 x i32>* %ptr2, align 16 1171 ret void 1172} 1173 1174; 1175; Shuffle to logical bit shifts 1176; 1177 1178define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) { 1179; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1180; SSE: # BB#0: 1181; SSE-NEXT: psllw $8, %xmm0 1182; SSE-NEXT: retq 1183; 1184; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14: 1185; AVX: # BB#0: 1186; AVX-NEXT: vpsllw $8, %xmm0, %xmm0 1187; AVX-NEXT: retq 1188 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14> 1189 ret <16 x i8> %shuffle 1190} 1191 1192define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) { 1193; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1194; SSE: # BB#0: 1195; SSE-NEXT: pslld $24, %xmm0 1196; SSE-NEXT: retq 1197; 1198; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12: 1199; AVX: # BB#0: 1200; AVX-NEXT: vpslld $24, %xmm0, %xmm0 1201; AVX-NEXT: retq 1202 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12> 1203 ret <16 x i8> %shuffle 1204} 1205 1206define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) { 1207; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08: 1208; SSE: # BB#0: 1209; SSE-NEXT: psllq $56, %xmm0 1210; SSE-NEXT: retq 1211; 1212; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08: 1213; AVX: # BB#0: 1214; AVX-NEXT: vpsllq $56, %xmm0, %xmm0 1215; AVX-NEXT: retq 1216 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8> 1217 ret <16 x i8> %shuffle 1218} 1219 1220define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) { 1221; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14: 1222; SSE: # BB#0: 1223; SSE-NEXT: psllq $8, %xmm0 1224; SSE-NEXT: retq 1225; 1226; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14: 1227; AVX: # BB#0: 1228; AVX-NEXT: vpsllq $8, %xmm0, %xmm0 1229; AVX-NEXT: retq 1230 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14> 1231 ret <16 x i8> %shuffle 1232} 1233 1234define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) { 1235; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz: 1236; SSE: # BB#0: 1237; SSE-NEXT: psrlw $8, %xmm0 1238; SSE-NEXT: retq 1239; 1240; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz: 1241; AVX: # BB#0: 1242; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 1243; AVX-NEXT: retq 1244 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16> 1245 ret <16 x i8> %shuffle 1246} 1247 1248define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) { 1249; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz: 1250; SSE: # BB#0: 1251; SSE-NEXT: psrld $16, %xmm0 1252; SSE-NEXT: retq 1253; 1254; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz: 1255; AVX: # BB#0: 1256; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 1257; AVX-NEXT: retq 1258 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16> 1259 ret <16 x i8> %shuffle 1260} 1261 1262define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) { 1263; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz: 1264; SSE: # BB#0: 1265; SSE-NEXT: psrlq $56, %xmm0 1266; SSE-NEXT: retq 1267; 1268; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz: 1269; AVX: # BB#0: 1270; AVX-NEXT: vpsrlq $56, %xmm0, %xmm0 1271; AVX-NEXT: retq 1272 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16> 1273 ret <16 x i8> %shuffle 1274} 1275 1276define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) { 1277; SSE2-LABEL: PR12412: 1278; SSE2: # BB#0: # %entry 1279; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 1280; SSE2-NEXT: pand %xmm2, %xmm1 1281; SSE2-NEXT: pand %xmm2, %xmm0 1282; SSE2-NEXT: packuswb %xmm1, %xmm0 1283; SSE2-NEXT: retq 1284; 1285; SSSE3-LABEL: PR12412: 1286; SSSE3: # BB#0: # %entry 1287; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1288; SSSE3-NEXT: pshufb %xmm2, %xmm1 1289; SSSE3-NEXT: pshufb %xmm2, %xmm0 1290; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1291; SSSE3-NEXT: retq 1292; 1293; SSE41-LABEL: PR12412: 1294; SSE41: # BB#0: # %entry 1295; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1296; SSE41-NEXT: pshufb %xmm2, %xmm1 1297; SSE41-NEXT: pshufb %xmm2, %xmm0 1298; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1299; SSE41-NEXT: retq 1300; 1301; AVX-LABEL: PR12412: 1302; AVX: # BB#0: # %entry 1303; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1304; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1305; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1306; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1307; AVX-NEXT: retq 1308entry: 1309 %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 1310 ret <16 x i8> %0 1311} 1312 1313define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) { 1314; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: 1315; SSE: # BB#0: 1316; SSE-NEXT: psrld $8, %xmm0 1317; SSE-NEXT: retq 1318; 1319; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: 1320; AVX: # BB#0: 1321; AVX-NEXT: vpsrld $8, %xmm0, %xmm0 1322; AVX-NEXT: retq 1323 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16> 1324 ret <16 x i8> %shuffle 1325} 1326 1327define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) { 1328; SSE-LABEL: shuffle_v16i8_bitcast_unpack: 1329; SSE: # BB#0: 1330; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1331; SSE-NEXT: retq 1332; 1333; AVX-LABEL: shuffle_v16i8_bitcast_unpack: 1334; AVX: # BB#0: 1335; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1336; AVX-NEXT: retq 1337 %shuffle8 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16> 1338 %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float> 1339 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1340 %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16> 1341 %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1342 %bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8> 1343 ret <16 x i8> %bitcast8 1344} 1345 1346define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) { 1347; SSE2-LABEL: insert_dup_mem_v16i8_i32: 1348; SSE2: # BB#0: 1349; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1350; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1351; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1352; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1353; SSE2-NEXT: retq 1354; 1355; SSSE3-LABEL: insert_dup_mem_v16i8_i32: 1356; SSSE3: # BB#0: 1357; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1358; SSSE3-NEXT: pxor %xmm1, %xmm1 1359; SSSE3-NEXT: pshufb %xmm1, %xmm0 1360; SSSE3-NEXT: retq 1361; 1362; SSE41-LABEL: insert_dup_mem_v16i8_i32: 1363; SSE41: # BB#0: 1364; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1365; SSE41-NEXT: pxor %xmm1, %xmm1 1366; SSE41-NEXT: pshufb %xmm1, %xmm0 1367; SSE41-NEXT: retq 1368; 1369; AVX1-LABEL: insert_dup_mem_v16i8_i32: 1370; AVX1: # BB#0: 1371; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1372; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1373; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1374; AVX1-NEXT: retq 1375; 1376; AVX2-LABEL: insert_dup_mem_v16i8_i32: 1377; AVX2: # BB#0: 1378; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 1379; AVX2-NEXT: retq 1380 %tmp = load i32, i32* %ptr, align 4 1381 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1382 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1383 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer 1384 ret <16 x i8> %tmp3 1385} 1386 1387define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) { 1388; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8: 1389; SSE2: # BB#0: 1390; SSE2-NEXT: movsbl (%rdi), %eax 1391; SSE2-NEXT: movd %eax, %xmm0 1392; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1393; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1394; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1395; SSE2-NEXT: retq 1396; 1397; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8: 1398; SSSE3: # BB#0: 1399; SSSE3-NEXT: movsbl (%rdi), %eax 1400; SSSE3-NEXT: movd %eax, %xmm0 1401; SSSE3-NEXT: pxor %xmm1, %xmm1 1402; SSSE3-NEXT: pshufb %xmm1, %xmm0 1403; SSSE3-NEXT: retq 1404; 1405; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8: 1406; SSE41: # BB#0: 1407; SSE41-NEXT: movsbl (%rdi), %eax 1408; SSE41-NEXT: movd %eax, %xmm0 1409; SSE41-NEXT: pxor %xmm1, %xmm1 1410; SSE41-NEXT: pshufb %xmm1, %xmm0 1411; SSE41-NEXT: retq 1412; 1413; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8: 1414; AVX1: # BB#0: 1415; AVX1-NEXT: movsbl (%rdi), %eax 1416; AVX1-NEXT: vmovd %eax, %xmm0 1417; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1418; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1419; AVX1-NEXT: retq 1420; 1421; AVX2-LABEL: insert_dup_mem_v16i8_sext_i8: 1422; AVX2: # BB#0: 1423; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 1424; AVX2-NEXT: retq 1425 %tmp = load i8, i8* %ptr, align 1 1426 %tmp1 = sext i8 %tmp to i32 1427 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1428 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1429 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer 1430 ret <16 x i8> %tmp4 1431} 1432 1433define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) { 1434; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32: 1435; SSE2: # BB#0: 1436; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1437; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1438; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 1439; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1440; SSE2-NEXT: retq 1441; 1442; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32: 1443; SSSE3: # BB#0: 1444; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1445; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1446; SSSE3-NEXT: retq 1447; 1448; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32: 1449; SSE41: # BB#0: 1450; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1451; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1452; SSE41-NEXT: retq 1453; 1454; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32: 1455; AVX1: # BB#0: 1456; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1457; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1458; AVX1-NEXT: retq 1459; 1460; AVX2-LABEL: insert_dup_elt1_mem_v16i8_i32: 1461; AVX2: # BB#0: 1462; AVX2-NEXT: vpbroadcastb 1(%rdi), %xmm0 1463; AVX2-NEXT: retq 1464 %tmp = load i32, i32* %ptr, align 4 1465 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1466 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1467 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1468 ret <16 x i8> %tmp3 1469} 1470 1471define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) { 1472; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32: 1473; SSE2: # BB#0: 1474; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1475; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1476; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7] 1477; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1478; SSE2-NEXT: retq 1479; 1480; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32: 1481; SSSE3: # BB#0: 1482; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1483; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1484; SSSE3-NEXT: retq 1485; 1486; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32: 1487; SSE41: # BB#0: 1488; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1489; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1490; SSE41-NEXT: retq 1491; 1492; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32: 1493; AVX1: # BB#0: 1494; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1495; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1496; AVX1-NEXT: retq 1497; 1498; AVX2-LABEL: insert_dup_elt2_mem_v16i8_i32: 1499; AVX2: # BB#0: 1500; AVX2-NEXT: vpbroadcastb 2(%rdi), %xmm0 1501; AVX2-NEXT: retq 1502 %tmp = load i32, i32* %ptr, align 4 1503 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0 1504 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8> 1505 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 1506 ret <16 x i8> %tmp3 1507} 1508 1509define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) { 1510; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1511; SSE2: # BB#0: 1512; SSE2-NEXT: movsbl (%rdi), %eax 1513; SSE2-NEXT: movd %eax, %xmm0 1514; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1515; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 1516; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1517; SSE2-NEXT: retq 1518; 1519; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1520; SSSE3: # BB#0: 1521; SSSE3-NEXT: movsbl (%rdi), %eax 1522; SSSE3-NEXT: movd %eax, %xmm0 1523; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1524; SSSE3-NEXT: retq 1525; 1526; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1527; SSE41: # BB#0: 1528; SSE41-NEXT: movsbl (%rdi), %eax 1529; SSE41-NEXT: movd %eax, %xmm0 1530; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1531; SSE41-NEXT: retq 1532; 1533; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1534; AVX1: # BB#0: 1535; AVX1-NEXT: movsbl (%rdi), %eax 1536; AVX1-NEXT: vmovd %eax, %xmm0 1537; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 1538; AVX1-NEXT: retq 1539; 1540; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8: 1541; AVX2: # BB#0: 1542; AVX2-NEXT: movsbl (%rdi), %eax 1543; AVX2-NEXT: shrl $8, %eax 1544; AVX2-NEXT: vmovd %eax, %xmm0 1545; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 1546; AVX2-NEXT: retq 1547 %tmp = load i8, i8* %ptr, align 1 1548 %tmp1 = sext i8 %tmp to i32 1549 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1550 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1551 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1552 ret <16 x i8> %tmp4 1553} 1554 1555define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) { 1556; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1557; SSE2: # BB#0: 1558; SSE2-NEXT: movsbl (%rdi), %eax 1559; SSE2-NEXT: movd %eax, %xmm0 1560; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1561; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7] 1562; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1563; SSE2-NEXT: retq 1564; 1565; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1566; SSSE3: # BB#0: 1567; SSSE3-NEXT: movsbl (%rdi), %eax 1568; SSSE3-NEXT: movd %eax, %xmm0 1569; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1570; SSSE3-NEXT: retq 1571; 1572; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1573; SSE41: # BB#0: 1574; SSE41-NEXT: movsbl (%rdi), %eax 1575; SSE41-NEXT: movd %eax, %xmm0 1576; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1577; SSE41-NEXT: retq 1578; 1579; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1580; AVX1: # BB#0: 1581; AVX1-NEXT: movsbl (%rdi), %eax 1582; AVX1-NEXT: vmovd %eax, %xmm0 1583; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 1584; AVX1-NEXT: retq 1585; 1586; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8: 1587; AVX2: # BB#0: 1588; AVX2-NEXT: movsbl (%rdi), %eax 1589; AVX2-NEXT: shrl $16, %eax 1590; AVX2-NEXT: vmovd %eax, %xmm0 1591; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 1592; AVX2-NEXT: retq 1593 %tmp = load i8, i8* %ptr, align 1 1594 %tmp1 = sext i8 %tmp to i32 1595 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0 1596 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 1597 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 1598 ret <16 x i8> %tmp4 1599} 1600