1; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 3 4target triple = "x86_64-unknown-unknown" 5 6define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) { 7; AVX1-LABEL: shuffle_v8f32_00000000: 8; AVX1: # BB#0: 9; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 10; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 11; AVX1-NEXT: retq 12; 13; AVX2-LABEL: shuffle_v8f32_00000000: 14; AVX2: # BB#0: 15; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 16; AVX2-NEXT: retq 17 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 18 ret <8 x float> %shuffle 19} 20 21define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) { 22; AVX1-LABEL: shuffle_v8f32_00000010: 23; AVX1: # BB#0: 24; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 25; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 26; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 27; AVX1-NEXT: retq 28; 29; AVX2-LABEL: shuffle_v8f32_00000010: 30; AVX2: # BB#0: 31; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 32; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 33; AVX2-NEXT: retq 34 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 35 ret <8 x float> %shuffle 36} 37 38define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) { 39; AVX1-LABEL: shuffle_v8f32_00000200: 40; AVX1: # BB#0: 41; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 42; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 43; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 44; AVX1-NEXT: retq 45; 46; AVX2-LABEL: shuffle_v8f32_00000200: 47; AVX2: # BB#0: 48; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 49; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 50; AVX2-NEXT: retq 51 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 52 ret <8 x float> %shuffle 53} 54 55define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) { 56; AVX1-LABEL: shuffle_v8f32_00003000: 57; AVX1: # BB#0: 58; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 59; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 60; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 61; AVX1-NEXT: retq 62; 63; AVX2-LABEL: shuffle_v8f32_00003000: 64; AVX2: # BB#0: 65; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 66; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 67; AVX2-NEXT: retq 68 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 69 ret <8 x float> %shuffle 70} 71 72define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) { 73; AVX1-LABEL: shuffle_v8f32_00040000: 74; AVX1: # BB#0: 75; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 76; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 77; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7] 78; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7] 79; AVX1-NEXT: retq 80; 81; AVX2-LABEL: shuffle_v8f32_00040000: 82; AVX2: # BB#0: 83; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 84; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 85; AVX2-NEXT: retq 86 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 87 ret <8 x float> %shuffle 88} 89 90define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) { 91; AVX1-LABEL: shuffle_v8f32_00500000: 92; AVX1: # BB#0: 93; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 94; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 95; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 96; AVX1-NEXT: retq 97; 98; AVX2-LABEL: shuffle_v8f32_00500000: 99; AVX2: # BB#0: 100; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 101; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 102; AVX2-NEXT: retq 103 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 104 ret <8 x float> %shuffle 105} 106 107define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) { 108; AVX1-LABEL: shuffle_v8f32_06000000: 109; AVX1: # BB#0: 110; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 111; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 112; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 113; AVX1-NEXT: retq 114; 115; AVX2-LABEL: shuffle_v8f32_06000000: 116; AVX2: # BB#0: 117; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 118; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 119; AVX2-NEXT: retq 120 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 121 ret <8 x float> %shuffle 122} 123 124define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { 125; AVX1-LABEL: shuffle_v8f32_70000000: 126; AVX1: # BB#0: 127; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 128; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 129; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 130; AVX1-NEXT: retq 131; 132; AVX2-LABEL: shuffle_v8f32_70000000: 133; AVX2: # BB#0: 134; AVX2-NEXT: movl $7, %eax 135; AVX2-NEXT: vmovd %eax, %xmm1 136; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 137; AVX2-NEXT: retq 138 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 139 ret <8 x float> %shuffle 140} 141 142define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) { 143; ALL-LABEL: shuffle_v8f32_01014545: 144; ALL: # BB#0: 145; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 146; ALL-NEXT: retq 147 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 148 ret <8 x float> %shuffle 149} 150 151define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) { 152; AVX1-LABEL: shuffle_v8f32_00112233: 153; AVX1: # BB#0: 154; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1] 155; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3] 156; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 157; AVX1-NEXT: retq 158; 159; AVX2-LABEL: shuffle_v8f32_00112233: 160; AVX2: # BB#0: 161; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 162; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 163; AVX2-NEXT: retq 164 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 165 ret <8 x float> %shuffle 166} 167 168define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) { 169; AVX1-LABEL: shuffle_v8f32_00001111: 170; AVX1: # BB#0: 171; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 172; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 173; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 174; AVX1-NEXT: retq 175; 176; AVX2-LABEL: shuffle_v8f32_00001111: 177; AVX2: # BB#0: 178; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 179; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 180; AVX2-NEXT: retq 181 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 182 ret <8 x float> %shuffle 183} 184 185define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) { 186; ALL-LABEL: shuffle_v8f32_81a3c5e7: 187; ALL: # BB#0: 188; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 189; ALL-NEXT: retq 190 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 191 ret <8 x float> %shuffle 192} 193 194define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { 195; AVX1-LABEL: shuffle_v8f32_08080808: 196; AVX1: # BB#0: 197; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] 198; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 199; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 200; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 201; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 202; AVX1-NEXT: retq 203; 204; AVX2-LABEL: shuffle_v8f32_08080808: 205; AVX2: # BB#0: 206; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 207; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 208; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 209; AVX2-NEXT: retq 210 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 211 ret <8 x float> %shuffle 212} 213 214define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) { 215; ALL-LABEL: shuffle_v8f32_08084c4c: 216; ALL: # BB#0: 217; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 218; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 219; ALL-NEXT: retq 220 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 221 ret <8 x float> %shuffle 222} 223 224define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) { 225; ALL-LABEL: shuffle_v8f32_8823cc67: 226; ALL: # BB#0: 227; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 228; ALL-NEXT: retq 229 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 230 ret <8 x float> %shuffle 231} 232 233define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) { 234; ALL-LABEL: shuffle_v8f32_9832dc76: 235; ALL: # BB#0: 236; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 237; ALL-NEXT: retq 238 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 239 ret <8 x float> %shuffle 240} 241 242define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) { 243; ALL-LABEL: shuffle_v8f32_9810dc54: 244; ALL: # BB#0: 245; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 246; ALL-NEXT: retq 247 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 248 ret <8 x float> %shuffle 249} 250 251define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) { 252; ALL-LABEL: shuffle_v8f32_08194c5d: 253; ALL: # BB#0: 254; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 255; ALL-NEXT: retq 256 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 257 ret <8 x float> %shuffle 258} 259 260define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) { 261; ALL-LABEL: shuffle_v8f32_2a3b6e7f: 262; ALL: # BB#0: 263; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 264; ALL-NEXT: retq 265 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 266 ret <8 x float> %shuffle 267} 268 269define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) { 270; AVX1-LABEL: shuffle_v8f32_08192a3b: 271; AVX1: # BB#0: 272; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 273; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 274; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 275; AVX1-NEXT: retq 276; 277; AVX2-LABEL: shuffle_v8f32_08192a3b: 278; AVX2: # BB#0: 279; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 280; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 281; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 282; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 283; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 284; AVX2-NEXT: retq 285 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 286 ret <8 x float> %shuffle 287} 288 289define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) { 290; AVX1-LABEL: shuffle_v8f32_08991abb: 291; AVX1: # BB#0: 292; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 293; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 294; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 295; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 296; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 297; AVX1-NEXT: retq 298; 299; AVX2-LABEL: shuffle_v8f32_08991abb: 300; AVX2: # BB#0: 301; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 302; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 303; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 304; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 305; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 306; AVX2-NEXT: retq 307 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 308 ret <8 x float> %shuffle 309} 310 311define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) { 312; AVX1-LABEL: shuffle_v8f32_091b2d3f: 313; AVX1: # BB#0: 314; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 315; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 316; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 317; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 318; AVX1-NEXT: retq 319; 320; AVX2-LABEL: shuffle_v8f32_091b2d3f: 321; AVX2: # BB#0: 322; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u> 323; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 324; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 325; AVX2-NEXT: retq 326 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 327 ret <8 x float> %shuffle 328} 329 330define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) { 331; AVX1-LABEL: shuffle_v8f32_09ab1def: 332; AVX1: # BB#0: 333; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 334; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 335; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 336; AVX1-NEXT: retq 337; 338; AVX2-LABEL: shuffle_v8f32_09ab1def: 339; AVX2: # BB#0: 340; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 341; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 342; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 343; AVX2-NEXT: retq 344 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 345 ret <8 x float> %shuffle 346} 347 348define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) { 349; ALL-LABEL: shuffle_v8f32_00014445: 350; ALL: # BB#0: 351; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 352; ALL-NEXT: retq 353 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 354 ret <8 x float> %shuffle 355} 356 357define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) { 358; ALL-LABEL: shuffle_v8f32_00204464: 359; ALL: # BB#0: 360; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 361; ALL-NEXT: retq 362 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 363 ret <8 x float> %shuffle 364} 365 366define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) { 367; ALL-LABEL: shuffle_v8f32_03004744: 368; ALL: # BB#0: 369; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 370; ALL-NEXT: retq 371 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 372 ret <8 x float> %shuffle 373} 374 375define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) { 376; ALL-LABEL: shuffle_v8f32_10005444: 377; ALL: # BB#0: 378; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 379; ALL-NEXT: retq 380 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 381 ret <8 x float> %shuffle 382} 383 384define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) { 385; ALL-LABEL: shuffle_v8f32_22006644: 386; ALL: # BB#0: 387; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 388; ALL-NEXT: retq 389 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 390 ret <8 x float> %shuffle 391} 392 393define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) { 394; ALL-LABEL: shuffle_v8f32_33307774: 395; ALL: # BB#0: 396; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 397; ALL-NEXT: retq 398 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 399 ret <8 x float> %shuffle 400} 401 402define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) { 403; ALL-LABEL: shuffle_v8f32_32107654: 404; ALL: # BB#0: 405; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 406; ALL-NEXT: retq 407 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 408 ret <8 x float> %shuffle 409} 410 411define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) { 412; ALL-LABEL: shuffle_v8f32_00234467: 413; ALL: # BB#0: 414; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 415; ALL-NEXT: retq 416 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 417 ret <8 x float> %shuffle 418} 419 420define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 421; ALL-LABEL: shuffle_v8f32_00224466: 422; ALL: # BB#0: 423; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 424; ALL-NEXT: retq 425 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 426 ret <8 x float> %shuffle 427} 428 429define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) { 430; ALL-LABEL: shuffle_v8f32_10325476: 431; ALL: # BB#0: 432; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 433; ALL-NEXT: retq 434 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 435 ret <8 x float> %shuffle 436} 437 438define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 439; ALL-LABEL: shuffle_v8f32_11335577: 440; ALL: # BB#0: 441; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 442; ALL-NEXT: retq 443 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 444 ret <8 x float> %shuffle 445} 446 447define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) { 448; ALL-LABEL: shuffle_v8f32_10235467: 449; ALL: # BB#0: 450; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 451; ALL-NEXT: retq 452 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 453 ret <8 x float> %shuffle 454} 455 456define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) { 457; ALL-LABEL: shuffle_v8f32_10225466: 458; ALL: # BB#0: 459; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 460; ALL-NEXT: retq 461 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 462 ret <8 x float> %shuffle 463} 464 465define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { 466; ALL-LABEL: shuffle_v8f32_00015444: 467; ALL: # BB#0: 468; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 469; ALL-NEXT: retq 470 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 471 ret <8 x float> %shuffle 472} 473 474define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { 475; ALL-LABEL: shuffle_v8f32_00204644: 476; ALL: # BB#0: 477; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 478; ALL-NEXT: retq 479 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 480 ret <8 x float> %shuffle 481} 482 483define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { 484; ALL-LABEL: shuffle_v8f32_03004474: 485; ALL: # BB#0: 486; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 487; ALL-NEXT: retq 488 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 489 ret <8 x float> %shuffle 490} 491 492define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { 493; ALL-LABEL: shuffle_v8f32_10004444: 494; ALL: # BB#0: 495; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 496; ALL-NEXT: retq 497 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 498 ret <8 x float> %shuffle 499} 500 501define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { 502; ALL-LABEL: shuffle_v8f32_22006446: 503; ALL: # BB#0: 504; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 505; ALL-NEXT: retq 506 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 507 ret <8 x float> %shuffle 508} 509 510define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { 511; ALL-LABEL: shuffle_v8f32_33307474: 512; ALL: # BB#0: 513; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 514; ALL-NEXT: retq 515 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 516 ret <8 x float> %shuffle 517} 518 519define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { 520; ALL-LABEL: shuffle_v8f32_32104567: 521; ALL: # BB#0: 522; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 523; ALL-NEXT: retq 524 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 525 ret <8 x float> %shuffle 526} 527 528define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { 529; ALL-LABEL: shuffle_v8f32_00236744: 530; ALL: # BB#0: 531; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 532; ALL-NEXT: retq 533 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 534 ret <8 x float> %shuffle 535} 536 537define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { 538; ALL-LABEL: shuffle_v8f32_00226644: 539; ALL: # BB#0: 540; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 541; ALL-NEXT: retq 542 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 543 ret <8 x float> %shuffle 544} 545 546define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { 547; ALL-LABEL: shuffle_v8f32_10324567: 548; ALL: # BB#0: 549; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 550; ALL-NEXT: retq 551 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 552 ret <8 x float> %shuffle 553} 554 555define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { 556; ALL-LABEL: shuffle_v8f32_11334567: 557; ALL: # BB#0: 558; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 559; ALL-NEXT: retq 560 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 561 ret <8 x float> %shuffle 562} 563 564define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { 565; ALL-LABEL: shuffle_v8f32_01235467: 566; ALL: # BB#0: 567; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 568; ALL-NEXT: retq 569 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 570 ret <8 x float> %shuffle 571} 572 573define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { 574; ALL-LABEL: shuffle_v8f32_01235466: 575; ALL: # BB#0: 576; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 577; ALL-NEXT: retq 578 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 579 ret <8 x float> %shuffle 580} 581 582define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { 583; ALL-LABEL: shuffle_v8f32_002u6u44: 584; ALL: # BB#0: 585; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 586; ALL-NEXT: retq 587 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 588 ret <8 x float> %shuffle 589} 590 591define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { 592; ALL-LABEL: shuffle_v8f32_00uu66uu: 593; ALL: # BB#0: 594; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 595; ALL-NEXT: retq 596 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 597 ret <8 x float> %shuffle 598} 599 600define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { 601; ALL-LABEL: shuffle_v8f32_103245uu: 602; ALL: # BB#0: 603; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 604; ALL-NEXT: retq 605 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 606 ret <8 x float> %shuffle 607} 608 609define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { 610; ALL-LABEL: shuffle_v8f32_1133uu67: 611; ALL: # BB#0: 612; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 613; ALL-NEXT: retq 614 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 615 ret <8 x float> %shuffle 616} 617 618define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { 619; ALL-LABEL: shuffle_v8f32_0uu354uu: 620; ALL: # BB#0: 621; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 622; ALL-NEXT: retq 623 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 624 ret <8 x float> %shuffle 625} 626 627define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { 628; ALL-LABEL: shuffle_v8f32_uuu3uu66: 629; ALL: # BB#0: 630; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 631; ALL-NEXT: retq 632 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 633 ret <8 x float> %shuffle 634} 635 636define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) { 637; AVX1-LABEL: shuffle_v8f32_c348cda0: 638; AVX1: # BB#0: 639; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 640; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4] 641; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 642; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4] 643; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3] 644; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 645; AVX1-NEXT: retq 646; 647; AVX2-LABEL: shuffle_v8f32_c348cda0: 648; AVX2: # BB#0: 649; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0> 650; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 651; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u> 652; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 653; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7] 654; AVX2-NEXT: retq 655 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0> 656 ret <8 x float> %shuffle 657} 658 659define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) { 660; AVX1-LABEL: shuffle_v8f32_f511235a: 661; AVX1: # BB#0: 662; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 663; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2] 664; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5] 665; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3] 666; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] 667; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6] 668; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 669; AVX1-NEXT: retq 670; 671; AVX2-LABEL: shuffle_v8f32_f511235a: 672; AVX2: # BB#0: 673; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2> 674; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 675; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u> 676; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 677; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7] 678; AVX2-NEXT: retq 679 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10> 680 ret <8 x float> %shuffle 681} 682 683define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) { 684; AVX1-LABEL: shuffle_v8f32_32103210: 685; AVX1: # BB#0: 686; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 687; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 688; AVX1-NEXT: retq 689; 690; AVX2-LABEL: shuffle_v8f32_32103210: 691; AVX2: # BB#0: 692; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0] 693; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 694; AVX2-NEXT: retq 695 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 696 ret <8 x float> %shuffle 697} 698 699define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) { 700; AVX1-LABEL: shuffle_v8f32_76547654: 701; AVX1: # BB#0: 702; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 703; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 704; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 705; AVX1-NEXT: retq 706; 707; AVX2-LABEL: shuffle_v8f32_76547654: 708; AVX2: # BB#0: 709; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 710; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 711; AVX2-NEXT: retq 712 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 713 ret <8 x float> %shuffle 714} 715 716define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { 717; AVX1-LABEL: shuffle_v8f32_76543210: 718; AVX1: # BB#0: 719; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 720; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 721; AVX1-NEXT: retq 722; 723; AVX2-LABEL: shuffle_v8f32_76543210: 724; AVX2: # BB#0: 725; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 726; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 727; AVX2-NEXT: retq 728 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 729 ret <8 x float> %shuffle 730} 731 732define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) { 733; ALL-LABEL: shuffle_v8f32_3210ba98: 734; ALL: # BB#0: 735; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 736; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 737; ALL-NEXT: retq 738 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 739 ret <8 x float> %shuffle 740} 741 742define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { 743; ALL-LABEL: shuffle_v8f32_3210fedc: 744; ALL: # BB#0: 745; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 746; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 747; ALL-NEXT: retq 748 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 749 ret <8 x float> %shuffle 750} 751 752define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { 753; ALL-LABEL: shuffle_v8f32_7654fedc: 754; ALL: # BB#0: 755; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 756; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 757; ALL-NEXT: retq 758 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 759 ret <8 x float> %shuffle 760} 761 762define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { 763; ALL-LABEL: shuffle_v8f32_fedc7654: 764; ALL: # BB#0: 765; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 766; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 767; ALL-NEXT: retq 768 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 769 ret <8 x float> %shuffle 770} 771 772define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) { 773; AVX1-LABEL: PR21138: 774; AVX1: # BB#0: 775; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 776; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 777; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 778; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 779; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] 780; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 781; AVX1-NEXT: retq 782; 783; AVX2-LABEL: PR21138: 784; AVX2: # BB#0: 785; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7> 786; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 787; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u> 788; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0 789; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 790; AVX2-NEXT: retq 791 %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 792 ret <8 x float> %shuffle 793} 794 795define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { 796; ALL-LABEL: shuffle_v8f32_ba987654: 797; ALL: # BB#0: 798; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 799; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 800; ALL-NEXT: retq 801 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 802 ret <8 x float> %shuffle 803} 804 805define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) { 806; ALL-LABEL: shuffle_v8f32_ba983210: 807; ALL: # BB#0: 808; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 809; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 810; ALL-NEXT: retq 811 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 812 ret <8 x float> %shuffle 813} 814 815define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) { 816; ALL-LABEL: shuffle_v8f32_80u1c4u5: 817; ALL: # BB#0: 818; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 819; ALL-NEXT: retq 820 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5> 821 ret <8 x float> %shuffle 822} 823 824define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { 825; ALL-LABEL: shuffle_v8f32_a2u3e6f7: 826; ALL: # BB#0: 827; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7] 828; ALL-NEXT: retq 829 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7> 830 ret <8 x float> %shuffle 831} 832 833define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) { 834; AVX1-LABEL: shuffle_v8i32_00000000: 835; AVX1: # BB#0: 836; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 837; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 838; AVX1-NEXT: retq 839; 840; AVX2-LABEL: shuffle_v8i32_00000000: 841; AVX2: # BB#0: 842; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 843; AVX2-NEXT: retq 844 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 845 ret <8 x i32> %shuffle 846} 847 848define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) { 849; AVX1-LABEL: shuffle_v8i32_00000010: 850; AVX1: # BB#0: 851; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 852; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0] 853; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 854; AVX1-NEXT: retq 855; 856; AVX2-LABEL: shuffle_v8i32_00000010: 857; AVX2: # BB#0: 858; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0] 859; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 860; AVX2-NEXT: retq 861 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0> 862 ret <8 x i32> %shuffle 863} 864 865define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) { 866; AVX1-LABEL: shuffle_v8i32_00000200: 867; AVX1: # BB#0: 868; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 869; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0] 870; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 871; AVX1-NEXT: retq 872; 873; AVX2-LABEL: shuffle_v8i32_00000200: 874; AVX2: # BB#0: 875; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0] 876; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 877; AVX2-NEXT: retq 878 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0> 879 ret <8 x i32> %shuffle 880} 881 882define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) { 883; AVX1-LABEL: shuffle_v8i32_00003000: 884; AVX1: # BB#0: 885; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 886; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0] 887; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 888; AVX1-NEXT: retq 889; 890; AVX2-LABEL: shuffle_v8i32_00003000: 891; AVX2: # BB#0: 892; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0] 893; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 894; AVX2-NEXT: retq 895 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0> 896 ret <8 x i32> %shuffle 897} 898 899define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) { 900; AVX1-LABEL: shuffle_v8i32_00040000: 901; AVX1: # BB#0: 902; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 903; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4] 904; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7] 905; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7] 906; AVX1-NEXT: retq 907; 908; AVX2-LABEL: shuffle_v8i32_00040000: 909; AVX2: # BB#0: 910; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] 911; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 912; AVX2-NEXT: retq 913 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0> 914 ret <8 x i32> %shuffle 915} 916 917define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) { 918; AVX1-LABEL: shuffle_v8i32_00500000: 919; AVX1: # BB#0: 920; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 921; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] 922; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4] 923; AVX1-NEXT: retq 924; 925; AVX2-LABEL: shuffle_v8i32_00500000: 926; AVX2: # BB#0: 927; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] 928; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 929; AVX2-NEXT: retq 930 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0> 931 ret <8 x i32> %shuffle 932} 933 934define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) { 935; AVX1-LABEL: shuffle_v8i32_06000000: 936; AVX1: # BB#0: 937; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 938; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 939; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4] 940; AVX1-NEXT: retq 941; 942; AVX2-LABEL: shuffle_v8i32_06000000: 943; AVX2: # BB#0: 944; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] 945; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 946; AVX2-NEXT: retq 947 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 948 ret <8 x i32> %shuffle 949} 950 951define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { 952; AVX1-LABEL: shuffle_v8i32_70000000: 953; AVX1: # BB#0: 954; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 955; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 956; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4] 957; AVX1-NEXT: retq 958; 959; AVX2-LABEL: shuffle_v8i32_70000000: 960; AVX2: # BB#0: 961; AVX2-NEXT: movl $7, %eax 962; AVX2-NEXT: vmovd %eax, %xmm1 963; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 964; AVX2-NEXT: retq 965 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 966 ret <8 x i32> %shuffle 967} 968 969define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) { 970; AVX1-LABEL: shuffle_v8i32_01014545: 971; AVX1: # BB#0: 972; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 973; AVX1-NEXT: retq 974; 975; AVX2-LABEL: shuffle_v8i32_01014545: 976; AVX2: # BB#0: 977; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 978; AVX2-NEXT: retq 979 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5> 980 ret <8 x i32> %shuffle 981} 982 983define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) { 984; AVX1-LABEL: shuffle_v8i32_00112233: 985; AVX1: # BB#0: 986; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1] 987; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 988; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 989; AVX1-NEXT: retq 990; 991; AVX2-LABEL: shuffle_v8i32_00112233: 992; AVX2: # BB#0: 993; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] 994; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 995; AVX2-NEXT: retq 996 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 997 ret <8 x i32> %shuffle 998} 999 1000define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) { 1001; AVX1-LABEL: shuffle_v8i32_00001111: 1002; AVX1: # BB#0: 1003; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0] 1004; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1005; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1006; AVX1-NEXT: retq 1007; 1008; AVX2-LABEL: shuffle_v8i32_00001111: 1009; AVX2: # BB#0: 1010; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1] 1011; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1012; AVX2-NEXT: retq 1013 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 1014 ret <8 x i32> %shuffle 1015} 1016 1017define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) { 1018; AVX1-LABEL: shuffle_v8i32_81a3c5e7: 1019; AVX1: # BB#0: 1020; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1021; AVX1-NEXT: retq 1022; 1023; AVX2-LABEL: shuffle_v8i32_81a3c5e7: 1024; AVX2: # BB#0: 1025; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 1026; AVX2-NEXT: retq 1027 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 1028 ret <8 x i32> %shuffle 1029} 1030 1031define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { 1032; AVX1-LABEL: shuffle_v8i32_08080808: 1033; AVX1: # BB#0: 1034; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0] 1035; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1036; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1037; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1038; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1039; AVX1-NEXT: retq 1040; 1041; AVX2-LABEL: shuffle_v8i32_08080808: 1042; AVX2: # BB#0: 1043; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 1044; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 1045; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1046; AVX2-NEXT: retq 1047 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> 1048 ret <8 x i32> %shuffle 1049} 1050 1051define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) { 1052; AVX1-LABEL: shuffle_v8i32_08084c4c: 1053; AVX1: # BB#0: 1054; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 1055; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1056; AVX1-NEXT: retq 1057; 1058; AVX2-LABEL: shuffle_v8i32_08084c4c: 1059; AVX2: # BB#0: 1060; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4] 1061; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1062; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1063; AVX2-NEXT: retq 1064 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12> 1065 ret <8 x i32> %shuffle 1066} 1067 1068define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) { 1069; AVX1-LABEL: shuffle_v8i32_8823cc67: 1070; AVX1: # BB#0: 1071; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7] 1072; AVX1-NEXT: retq 1073; 1074; AVX2-LABEL: shuffle_v8i32_8823cc67: 1075; AVX2: # BB#0: 1076; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7] 1077; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1078; AVX2-NEXT: retq 1079 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7> 1080 ret <8 x i32> %shuffle 1081} 1082 1083define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) { 1084; AVX1-LABEL: shuffle_v8i32_9832dc76: 1085; AVX1: # BB#0: 1086; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6] 1087; AVX1-NEXT: retq 1088; 1089; AVX2-LABEL: shuffle_v8i32_9832dc76: 1090; AVX2: # BB#0: 1091; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1092; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1093; AVX2-NEXT: retq 1094 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6> 1095 ret <8 x i32> %shuffle 1096} 1097 1098define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) { 1099; AVX1-LABEL: shuffle_v8i32_9810dc54: 1100; AVX1: # BB#0: 1101; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4] 1102; AVX1-NEXT: retq 1103; 1104; AVX2-LABEL: shuffle_v8i32_9810dc54: 1105; AVX2: # BB#0: 1106; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4] 1107; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7] 1108; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1109; AVX2-NEXT: retq 1110 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4> 1111 ret <8 x i32> %shuffle 1112} 1113 1114define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) { 1115; AVX1-LABEL: shuffle_v8i32_08194c5d: 1116; AVX1: # BB#0: 1117; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1118; AVX1-NEXT: retq 1119; 1120; AVX2-LABEL: shuffle_v8i32_08194c5d: 1121; AVX2: # BB#0: 1122; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 1123; AVX2-NEXT: retq 1124 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1125 ret <8 x i32> %shuffle 1126} 1127 1128define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) { 1129; AVX1-LABEL: shuffle_v8i32_2a3b6e7f: 1130; AVX1: # BB#0: 1131; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1132; AVX1-NEXT: retq 1133; 1134; AVX2-LABEL: shuffle_v8i32_2a3b6e7f: 1135; AVX2: # BB#0: 1136; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 1137; AVX2-NEXT: retq 1138 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1139 ret <8 x i32> %shuffle 1140} 1141 1142define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { 1143; AVX1-LABEL: shuffle_v8i32_08192a3b: 1144; AVX1: # BB#0: 1145; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1146; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1147; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1148; AVX1-NEXT: retq 1149; 1150; AVX2-LABEL: shuffle_v8i32_08192a3b: 1151; AVX2: # BB#0: 1152; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3> 1153; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1154; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1155; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1156; AVX2-NEXT: retq 1157 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1158 ret <8 x i32> %shuffle 1159} 1160 1161define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) { 1162; AVX1-LABEL: shuffle_v8i32_08991abb: 1163; AVX1: # BB#0: 1164; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0] 1165; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1] 1166; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1167; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] 1168; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1169; AVX1-NEXT: retq 1170; 1171; AVX2-LABEL: shuffle_v8i32_08991abb: 1172; AVX2: # BB#0: 1173; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1174; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1175; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3> 1176; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1177; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1178; AVX2-NEXT: retq 1179 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11> 1180 ret <8 x i32> %shuffle 1181} 1182 1183define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { 1184; AVX1-LABEL: shuffle_v8i32_091b2d3f: 1185; AVX1: # BB#0: 1186; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3] 1187; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3] 1188; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1189; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1190; AVX1-NEXT: retq 1191; 1192; AVX2-LABEL: shuffle_v8i32_091b2d3f: 1193; AVX2: # BB#0: 1194; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1195; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1196; AVX2-NEXT: retq 1197 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15> 1198 ret <8 x i32> %shuffle 1199} 1200 1201define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { 1202; AVX1-LABEL: shuffle_v8i32_09ab1def: 1203; AVX1: # BB#0: 1204; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 1205; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1206; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1207; AVX1-NEXT: retq 1208; 1209; AVX2-LABEL: shuffle_v8i32_09ab1def: 1210; AVX2: # BB#0: 1211; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u> 1212; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0 1213; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 1214; AVX2-NEXT: retq 1215 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15> 1216 ret <8 x i32> %shuffle 1217} 1218 1219define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) { 1220; AVX1-LABEL: shuffle_v8i32_00014445: 1221; AVX1: # BB#0: 1222; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1223; AVX1-NEXT: retq 1224; 1225; AVX2-LABEL: shuffle_v8i32_00014445: 1226; AVX2: # BB#0: 1227; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5] 1228; AVX2-NEXT: retq 1229 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5> 1230 ret <8 x i32> %shuffle 1231} 1232 1233define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) { 1234; AVX1-LABEL: shuffle_v8i32_00204464: 1235; AVX1: # BB#0: 1236; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1237; AVX1-NEXT: retq 1238; 1239; AVX2-LABEL: shuffle_v8i32_00204464: 1240; AVX2: # BB#0: 1241; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4] 1242; AVX2-NEXT: retq 1243 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4> 1244 ret <8 x i32> %shuffle 1245} 1246 1247define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) { 1248; AVX1-LABEL: shuffle_v8i32_03004744: 1249; AVX1: # BB#0: 1250; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1251; AVX1-NEXT: retq 1252; 1253; AVX2-LABEL: shuffle_v8i32_03004744: 1254; AVX2: # BB#0: 1255; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4] 1256; AVX2-NEXT: retq 1257 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4> 1258 ret <8 x i32> %shuffle 1259} 1260 1261define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) { 1262; AVX1-LABEL: shuffle_v8i32_10005444: 1263; AVX1: # BB#0: 1264; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1265; AVX1-NEXT: retq 1266; 1267; AVX2-LABEL: shuffle_v8i32_10005444: 1268; AVX2: # BB#0: 1269; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4] 1270; AVX2-NEXT: retq 1271 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4> 1272 ret <8 x i32> %shuffle 1273} 1274 1275define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) { 1276; AVX1-LABEL: shuffle_v8i32_22006644: 1277; AVX1: # BB#0: 1278; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1279; AVX1-NEXT: retq 1280; 1281; AVX2-LABEL: shuffle_v8i32_22006644: 1282; AVX2: # BB#0: 1283; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4] 1284; AVX2-NEXT: retq 1285 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4> 1286 ret <8 x i32> %shuffle 1287} 1288 1289define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) { 1290; AVX1-LABEL: shuffle_v8i32_33307774: 1291; AVX1: # BB#0: 1292; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1293; AVX1-NEXT: retq 1294; 1295; AVX2-LABEL: shuffle_v8i32_33307774: 1296; AVX2: # BB#0: 1297; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4] 1298; AVX2-NEXT: retq 1299 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4> 1300 ret <8 x i32> %shuffle 1301} 1302 1303define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) { 1304; AVX1-LABEL: shuffle_v8i32_32107654: 1305; AVX1: # BB#0: 1306; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1307; AVX1-NEXT: retq 1308; 1309; AVX2-LABEL: shuffle_v8i32_32107654: 1310; AVX2: # BB#0: 1311; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1312; AVX2-NEXT: retq 1313 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1314 ret <8 x i32> %shuffle 1315} 1316 1317define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) { 1318; AVX1-LABEL: shuffle_v8i32_00234467: 1319; AVX1: # BB#0: 1320; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1321; AVX1-NEXT: retq 1322; 1323; AVX2-LABEL: shuffle_v8i32_00234467: 1324; AVX2: # BB#0: 1325; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 1326; AVX2-NEXT: retq 1327 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7> 1328 ret <8 x i32> %shuffle 1329} 1330 1331define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) { 1332; AVX1-LABEL: shuffle_v8i32_00224466: 1333; AVX1: # BB#0: 1334; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1335; AVX1-NEXT: retq 1336; 1337; AVX2-LABEL: shuffle_v8i32_00224466: 1338; AVX2: # BB#0: 1339; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1340; AVX2-NEXT: retq 1341 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1342 ret <8 x i32> %shuffle 1343} 1344 1345define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) { 1346; AVX1-LABEL: shuffle_v8i32_10325476: 1347; AVX1: # BB#0: 1348; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1349; AVX1-NEXT: retq 1350; 1351; AVX2-LABEL: shuffle_v8i32_10325476: 1352; AVX2: # BB#0: 1353; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] 1354; AVX2-NEXT: retq 1355 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 1356 ret <8 x i32> %shuffle 1357} 1358 1359define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) { 1360; AVX1-LABEL: shuffle_v8i32_11335577: 1361; AVX1: # BB#0: 1362; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1363; AVX1-NEXT: retq 1364; 1365; AVX2-LABEL: shuffle_v8i32_11335577: 1366; AVX2: # BB#0: 1367; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1368; AVX2-NEXT: retq 1369 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1370 ret <8 x i32> %shuffle 1371} 1372 1373define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) { 1374; AVX1-LABEL: shuffle_v8i32_10235467: 1375; AVX1: # BB#0: 1376; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1377; AVX1-NEXT: retq 1378; 1379; AVX2-LABEL: shuffle_v8i32_10235467: 1380; AVX2: # BB#0: 1381; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7] 1382; AVX2-NEXT: retq 1383 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1384 ret <8 x i32> %shuffle 1385} 1386 1387define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) { 1388; AVX1-LABEL: shuffle_v8i32_10225466: 1389; AVX1: # BB#0: 1390; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1391; AVX1-NEXT: retq 1392; 1393; AVX2-LABEL: shuffle_v8i32_10225466: 1394; AVX2: # BB#0: 1395; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6] 1396; AVX2-NEXT: retq 1397 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6> 1398 ret <8 x i32> %shuffle 1399} 1400 1401define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { 1402; AVX1-LABEL: shuffle_v8i32_00015444: 1403; AVX1: # BB#0: 1404; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4] 1405; AVX1-NEXT: retq 1406; 1407; AVX2-LABEL: shuffle_v8i32_00015444: 1408; AVX2: # BB#0: 1409; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] 1410; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1411; AVX2-NEXT: retq 1412 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> 1413 ret <8 x i32> %shuffle 1414} 1415 1416define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { 1417; AVX1-LABEL: shuffle_v8i32_00204644: 1418; AVX1: # BB#0: 1419; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4] 1420; AVX1-NEXT: retq 1421; 1422; AVX2-LABEL: shuffle_v8i32_00204644: 1423; AVX2: # BB#0: 1424; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] 1425; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1426; AVX2-NEXT: retq 1427 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> 1428 ret <8 x i32> %shuffle 1429} 1430 1431define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { 1432; AVX1-LABEL: shuffle_v8i32_03004474: 1433; AVX1: # BB#0: 1434; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4] 1435; AVX1-NEXT: retq 1436; 1437; AVX2-LABEL: shuffle_v8i32_03004474: 1438; AVX2: # BB#0: 1439; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] 1440; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1441; AVX2-NEXT: retq 1442 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> 1443 ret <8 x i32> %shuffle 1444} 1445 1446define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { 1447; AVX1-LABEL: shuffle_v8i32_10004444: 1448; AVX1: # BB#0: 1449; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4] 1450; AVX1-NEXT: retq 1451; 1452; AVX2-LABEL: shuffle_v8i32_10004444: 1453; AVX2: # BB#0: 1454; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] 1455; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1456; AVX2-NEXT: retq 1457 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 1458 ret <8 x i32> %shuffle 1459} 1460 1461define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { 1462; AVX1-LABEL: shuffle_v8i32_22006446: 1463; AVX1: # BB#0: 1464; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6] 1465; AVX1-NEXT: retq 1466; 1467; AVX2-LABEL: shuffle_v8i32_22006446: 1468; AVX2: # BB#0: 1469; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] 1470; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1471; AVX2-NEXT: retq 1472 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> 1473 ret <8 x i32> %shuffle 1474} 1475 1476define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { 1477; AVX1-LABEL: shuffle_v8i32_33307474: 1478; AVX1: # BB#0: 1479; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4] 1480; AVX1-NEXT: retq 1481; 1482; AVX2-LABEL: shuffle_v8i32_33307474: 1483; AVX2: # BB#0: 1484; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] 1485; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1486; AVX2-NEXT: retq 1487 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> 1488 ret <8 x i32> %shuffle 1489} 1490 1491define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { 1492; AVX1-LABEL: shuffle_v8i32_32104567: 1493; AVX1: # BB#0: 1494; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7] 1495; AVX1-NEXT: retq 1496; 1497; AVX2-LABEL: shuffle_v8i32_32104567: 1498; AVX2: # BB#0: 1499; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] 1500; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1501; AVX2-NEXT: retq 1502 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 1503 ret <8 x i32> %shuffle 1504} 1505 1506define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { 1507; AVX1-LABEL: shuffle_v8i32_00236744: 1508; AVX1: # BB#0: 1509; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4] 1510; AVX1-NEXT: retq 1511; 1512; AVX2-LABEL: shuffle_v8i32_00236744: 1513; AVX2: # BB#0: 1514; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] 1515; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1516; AVX2-NEXT: retq 1517 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> 1518 ret <8 x i32> %shuffle 1519} 1520 1521define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { 1522; AVX1-LABEL: shuffle_v8i32_00226644: 1523; AVX1: # BB#0: 1524; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4] 1525; AVX1-NEXT: retq 1526; 1527; AVX2-LABEL: shuffle_v8i32_00226644: 1528; AVX2: # BB#0: 1529; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] 1530; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1531; AVX2-NEXT: retq 1532 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> 1533 ret <8 x i32> %shuffle 1534} 1535 1536define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { 1537; AVX1-LABEL: shuffle_v8i32_10324567: 1538; AVX1: # BB#0: 1539; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7] 1540; AVX1-NEXT: retq 1541; 1542; AVX2-LABEL: shuffle_v8i32_10324567: 1543; AVX2: # BB#0: 1544; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] 1545; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1546; AVX2-NEXT: retq 1547 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 1548 ret <8 x i32> %shuffle 1549} 1550 1551define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { 1552; AVX1-LABEL: shuffle_v8i32_11334567: 1553; AVX1: # BB#0: 1554; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7] 1555; AVX1-NEXT: retq 1556; 1557; AVX2-LABEL: shuffle_v8i32_11334567: 1558; AVX2: # BB#0: 1559; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] 1560; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1561; AVX2-NEXT: retq 1562 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> 1563 ret <8 x i32> %shuffle 1564} 1565 1566define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { 1567; AVX1-LABEL: shuffle_v8i32_01235467: 1568; AVX1: # BB#0: 1569; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7] 1570; AVX1-NEXT: retq 1571; 1572; AVX2-LABEL: shuffle_v8i32_01235467: 1573; AVX2: # BB#0: 1574; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] 1575; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1576; AVX2-NEXT: retq 1577 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> 1578 ret <8 x i32> %shuffle 1579} 1580 1581define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { 1582; AVX1-LABEL: shuffle_v8i32_01235466: 1583; AVX1: # BB#0: 1584; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6] 1585; AVX1-NEXT: retq 1586; 1587; AVX2-LABEL: shuffle_v8i32_01235466: 1588; AVX2: # BB#0: 1589; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] 1590; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1591; AVX2-NEXT: retq 1592 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> 1593 ret <8 x i32> %shuffle 1594} 1595 1596define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { 1597; AVX1-LABEL: shuffle_v8i32_002u6u44: 1598; AVX1: # BB#0: 1599; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4] 1600; AVX1-NEXT: retq 1601; 1602; AVX2-LABEL: shuffle_v8i32_002u6u44: 1603; AVX2: # BB#0: 1604; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> 1605; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1606; AVX2-NEXT: retq 1607 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> 1608 ret <8 x i32> %shuffle 1609} 1610 1611define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { 1612; AVX1-LABEL: shuffle_v8i32_00uu66uu: 1613; AVX1: # BB#0: 1614; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u] 1615; AVX1-NEXT: retq 1616; 1617; AVX2-LABEL: shuffle_v8i32_00uu66uu: 1618; AVX2: # BB#0: 1619; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> 1620; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1621; AVX2-NEXT: retq 1622 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> 1623 ret <8 x i32> %shuffle 1624} 1625 1626define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { 1627; AVX1-LABEL: shuffle_v8i32_103245uu: 1628; AVX1: # BB#0: 1629; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u] 1630; AVX1-NEXT: retq 1631; 1632; AVX2-LABEL: shuffle_v8i32_103245uu: 1633; AVX2: # BB#0: 1634; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> 1635; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1636; AVX2-NEXT: retq 1637 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> 1638 ret <8 x i32> %shuffle 1639} 1640 1641define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { 1642; AVX1-LABEL: shuffle_v8i32_1133uu67: 1643; AVX1: # BB#0: 1644; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7] 1645; AVX1-NEXT: retq 1646; 1647; AVX2-LABEL: shuffle_v8i32_1133uu67: 1648; AVX2: # BB#0: 1649; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> 1650; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1651; AVX2-NEXT: retq 1652 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> 1653 ret <8 x i32> %shuffle 1654} 1655 1656define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { 1657; AVX1-LABEL: shuffle_v8i32_0uu354uu: 1658; AVX1: # BB#0: 1659; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u] 1660; AVX1-NEXT: retq 1661; 1662; AVX2-LABEL: shuffle_v8i32_0uu354uu: 1663; AVX2: # BB#0: 1664; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> 1665; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1666; AVX2-NEXT: retq 1667 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> 1668 ret <8 x i32> %shuffle 1669} 1670 1671define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { 1672; AVX1-LABEL: shuffle_v8i32_uuu3uu66: 1673; AVX1: # BB#0: 1674; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6] 1675; AVX1-NEXT: retq 1676; 1677; AVX2-LABEL: shuffle_v8i32_uuu3uu66: 1678; AVX2: # BB#0: 1679; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6> 1680; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1681; AVX2-NEXT: retq 1682 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> 1683 ret <8 x i32> %shuffle 1684} 1685 1686define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { 1687; AVX1-LABEL: shuffle_v8i32_6caa87e5: 1688; AVX1: # BB#0: 1689; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1] 1690; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6] 1691; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1692; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1693; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1694; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1695; AVX1-NEXT: retq 1696; 1697; AVX2-LABEL: shuffle_v8i32_6caa87e5: 1698; AVX2: # BB#0: 1699; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u> 1700; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 1701; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2] 1702; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7] 1703; AVX2-NEXT: retq 1704 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5> 1705 ret <8 x i32> %shuffle 1706} 1707 1708define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) { 1709; AVX1-LABEL: shuffle_v8i32_32103210: 1710; AVX1: # BB#0: 1711; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1712; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1713; AVX1-NEXT: retq 1714; 1715; AVX2-LABEL: shuffle_v8i32_32103210: 1716; AVX2: # BB#0: 1717; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0] 1718; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1719; AVX2-NEXT: retq 1720 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> 1721 ret <8 x i32> %shuffle 1722} 1723 1724define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) { 1725; AVX1-LABEL: shuffle_v8i32_76547654: 1726; AVX1: # BB#0: 1727; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1728; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1729; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1730; AVX1-NEXT: retq 1731; 1732; AVX2-LABEL: shuffle_v8i32_76547654: 1733; AVX2: # BB#0: 1734; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4] 1735; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1736; AVX2-NEXT: retq 1737 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4> 1738 ret <8 x i32> %shuffle 1739} 1740 1741define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) { 1742; AVX1-LABEL: shuffle_v8i32_76543210: 1743; AVX1: # BB#0: 1744; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 1745; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1746; AVX1-NEXT: retq 1747; 1748; AVX2-LABEL: shuffle_v8i32_76543210: 1749; AVX2: # BB#0: 1750; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] 1751; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 1752; AVX2-NEXT: retq 1753 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 1754 ret <8 x i32> %shuffle 1755} 1756 1757define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) { 1758; AVX1-LABEL: shuffle_v8i32_3210ba98: 1759; AVX1: # BB#0: 1760; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1761; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1762; AVX1-NEXT: retq 1763; 1764; AVX2-LABEL: shuffle_v8i32_3210ba98: 1765; AVX2: # BB#0: 1766; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1767; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1768; AVX2-NEXT: retq 1769 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8> 1770 ret <8 x i32> %shuffle 1771} 1772 1773define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) { 1774; AVX1-LABEL: shuffle_v8i32_3210fedc: 1775; AVX1: # BB#0: 1776; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1777; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1778; AVX1-NEXT: retq 1779; 1780; AVX2-LABEL: shuffle_v8i32_3210fedc: 1781; AVX2: # BB#0: 1782; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1783; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1784; AVX2-NEXT: retq 1785 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12> 1786 ret <8 x i32> %shuffle 1787} 1788 1789define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { 1790; AVX1-LABEL: shuffle_v8i32_7654fedc: 1791; AVX1: # BB#0: 1792; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1793; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1794; AVX1-NEXT: retq 1795; 1796; AVX2-LABEL: shuffle_v8i32_7654fedc: 1797; AVX2: # BB#0: 1798; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1799; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1800; AVX2-NEXT: retq 1801 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> 1802 ret <8 x i32> %shuffle 1803} 1804 1805define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { 1806; AVX1-LABEL: shuffle_v8i32_fedc7654: 1807; AVX1: # BB#0: 1808; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1809; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1810; AVX1-NEXT: retq 1811; 1812; AVX2-LABEL: shuffle_v8i32_fedc7654: 1813; AVX2: # BB#0: 1814; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] 1815; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1816; AVX2-NEXT: retq 1817 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> 1818 ret <8 x i32> %shuffle 1819} 1820 1821define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) { 1822; AVX1-LABEL: shuffle_v8i32_ba987654: 1823; AVX1: # BB#0: 1824; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1825; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1826; AVX1-NEXT: retq 1827; 1828; AVX2-LABEL: shuffle_v8i32_ba987654: 1829; AVX2: # BB#0: 1830; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1831; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1832; AVX2-NEXT: retq 1833 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1834 ret <8 x i32> %shuffle 1835} 1836 1837define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) { 1838; AVX1-LABEL: shuffle_v8i32_ba983210: 1839; AVX1: # BB#0: 1840; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1841; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1842; AVX1-NEXT: retq 1843; 1844; AVX2-LABEL: shuffle_v8i32_ba983210: 1845; AVX2: # BB#0: 1846; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1847; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1848; AVX2-NEXT: retq 1849 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4> 1850 ret <8 x i32> %shuffle 1851} 1852 1853define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) { 1854; AVX1-LABEL: shuffle_v8i32_zuu8zuuc: 1855; AVX1: # BB#0: 1856; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1857; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4] 1858; AVX1-NEXT: retq 1859; 1860; AVX2-LABEL: shuffle_v8i32_zuu8zuuc: 1861; AVX2: # BB#0: 1862; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19] 1863; AVX2-NEXT: retq 1864 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12> 1865 ret <8 x i32> %shuffle 1866} 1867 1868define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { 1869; AVX1-LABEL: shuffle_v8i32_9ubzdefz: 1870; AVX1: # BB#0: 1871; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1872; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4] 1873; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 1874; AVX1-NEXT: retq 1875; 1876; AVX2-LABEL: shuffle_v8i32_9ubzdefz: 1877; AVX2: # BB#0: 1878; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero 1879; AVX2-NEXT: retq 1880 %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0> 1881 ret <8 x i32> %shuffle 1882} 1883 1884define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { 1885; AVX1-LABEL: shuffle_v8i32_80u1b4uu: 1886; AVX1: # BB#0: 1887; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1888; AVX1-NEXT: retq 1889; 1890; AVX2-LABEL: shuffle_v8i32_80u1b4uu: 1891; AVX2: # BB#0: 1892; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] 1893; AVX2-NEXT: retq 1894 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef> 1895 ret <8 x i32> %shuffle 1896} 1897 1898define <8 x float> @splat_mem_v8f32_2(float* %p) { 1899; ALL-LABEL: splat_mem_v8f32_2: 1900; ALL: # BB#0: 1901; ALL-NEXT: vbroadcastss (%rdi), %ymm0 1902; ALL-NEXT: retq 1903 %1 = load float, float* %p 1904 %2 = insertelement <4 x float> undef, float %1, i32 0 1905 %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer 1906 ret <8 x float> %3 1907} 1908 1909define <8 x float> @splat_v8f32(<4 x float> %r) { 1910; AVX1-LABEL: splat_v8f32: 1911; AVX1: # BB#0: 1912; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1913; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1914; AVX1-NEXT: retq 1915; 1916; AVX2-LABEL: splat_v8f32: 1917; AVX2: # BB#0: 1918; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 1919; AVX2-NEXT: retq 1920 %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer 1921 ret <8 x float> %1 1922} 1923 1924; 1925; Shuffle to logical bit shifts 1926; 1927 1928define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) { 1929; AVX1-LABEL: shuffle_v8i32_z0U2zUz6: 1930; AVX1: # BB#0: 1931; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1932; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 1933; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 1934; AVX1-NEXT: retq 1935; 1936; AVX2-LABEL: shuffle_v8i32_z0U2zUz6: 1937; AVX2: # BB#0: 1938; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 1939; AVX2-NEXT: retq 1940 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6> 1941 ret <8 x i32> %shuffle 1942} 1943 1944define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) { 1945; AVX1-LABEL: shuffle_v8i32_1U3z5zUU: 1946; AVX1: # BB#0: 1947; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1948; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 1949; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 1950; AVX1-NEXT: retq 1951; 1952; AVX2-LABEL: shuffle_v8i32_1U3z5zUU: 1953; AVX2: # BB#0: 1954; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 1955; AVX2-NEXT: retq 1956 %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef> 1957 ret <8 x i32> %shuffle 1958} 1959 1960define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) { 1961; AVX1-LABEL: shuffle_v8i32_B012F456: 1962; AVX1: # BB#0: 1963; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4] 1964; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6] 1965; AVX1-NEXT: retq 1966; 1967; AVX2-LABEL: shuffle_v8i32_B012F456: 1968; AVX2: # BB#0: 1969; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27] 1970; AVX2-NEXT: retq 1971 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 1972 ret <8 x i32> %shuffle 1973} 1974 1975define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) { 1976; AVX1-LABEL: shuffle_v8i32_1238567C: 1977; AVX1: # BB#0: 1978; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4] 1979; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4] 1980; AVX1-NEXT: retq 1981; 1982; AVX2-LABEL: shuffle_v8i32_1238567C: 1983; AVX2: # BB#0: 1984; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19] 1985; AVX2-NEXT: retq 1986 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12> 1987 ret <8 x i32> %shuffle 1988} 1989 1990define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) { 1991; AVX1-LABEL: shuffle_v8i32_9AB0DEF4: 1992; AVX1: # BB#0: 1993; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4] 1994; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4] 1995; AVX1-NEXT: retq 1996; 1997; AVX2-LABEL: shuffle_v8i32_9AB0DEF4: 1998; AVX2: # BB#0: 1999; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19] 2000; AVX2-NEXT: retq 2001 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4> 2002 ret <8 x i32> %shuffle 2003} 2004 2005define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) { 2006; AVX1-LABEL: shuffle_v8i32_389A7CDE: 2007; AVX1: # BB#0: 2008; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4] 2009; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6] 2010; AVX1-NEXT: retq 2011; 2012; AVX2-LABEL: shuffle_v8i32_389A7CDE: 2013; AVX2: # BB#0: 2014; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27] 2015; AVX2-NEXT: retq 2016 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14> 2017 ret <8 x i32> %shuffle 2018} 2019 2020define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) { 2021; AVX1-LABEL: shuffle_v8i32_30127456: 2022; AVX1: # BB#0: 2023; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2024; AVX1-NEXT: retq 2025; 2026; AVX2-LABEL: shuffle_v8i32_30127456: 2027; AVX2: # BB#0: 2028; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6] 2029; AVX2-NEXT: retq 2030 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6> 2031 ret <8 x i32> %shuffle 2032} 2033 2034define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) { 2035; AVX1-LABEL: shuffle_v8i32_12305674: 2036; AVX1: # BB#0: 2037; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2038; AVX1-NEXT: retq 2039; 2040; AVX2-LABEL: shuffle_v8i32_12305674: 2041; AVX2: # BB#0: 2042; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4] 2043; AVX2-NEXT: retq 2044 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4> 2045 ret <8 x i32> %shuffle 2046} 2047 2048define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2049; ALL-LABEL: concat_v2f32_1: 2050; ALL: # BB#0: # %entry 2051; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2052; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2053; ALL-NEXT: retq 2054entry: 2055 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2056 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2057 %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2058 %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2059 %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef> 2060 ret <8 x float> %tmp76 2061} 2062 2063define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2064; ALL-LABEL: concat_v2f32_2: 2065; ALL: # BB#0: # %entry 2066; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2067; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2068; ALL-NEXT: retq 2069entry: 2070 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2071 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2072 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2073 ret <8 x float> %tmp76 2074} 2075 2076define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) { 2077; ALL-LABEL: concat_v2f32_3: 2078; ALL: # BB#0: # %entry 2079; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2080; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 2081; ALL-NEXT: retq 2082entry: 2083 %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8 2084 %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8 2085 %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2086 %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2087 ret <8 x float> %res 2088} 2089 2090define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) { 2091; AVX1-LABEL: insert_mem_and_zero_v8i32: 2092; AVX1: # BB#0: 2093; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2094; AVX1-NEXT: retq 2095; 2096; AVX2-LABEL: insert_mem_and_zero_v8i32: 2097; AVX2: # BB#0: 2098; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2099; AVX2-NEXT: retq 2100 %a = load i32, i32* %ptr 2101 %v = insertelement <8 x i32> undef, i32 %a, i32 0 2102 %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2103 ret <8 x i32> %shuffle 2104} 2105 2106