1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-unknown" 10 11define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) { 12; SSE-LABEL: shuffle_v2i64_00: 13; SSE: # BB#0: 14; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 15; SSE-NEXT: retq 16; 17; AVX1-LABEL: shuffle_v2i64_00: 18; AVX1: # BB#0: 19; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 20; AVX1-NEXT: retq 21; 22; AVX2-LABEL: shuffle_v2i64_00: 23; AVX2: # BB#0: 24; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 25; AVX2-NEXT: retq 26 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> 27 ret <2 x i64> %shuffle 28} 29define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) { 30; SSE-LABEL: shuffle_v2i64_10: 31; SSE: # BB#0: 32; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 33; SSE-NEXT: retq 34; 35; AVX-LABEL: shuffle_v2i64_10: 36; AVX: # BB#0: 37; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 38; AVX-NEXT: retq 39 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0> 40 ret <2 x i64> %shuffle 41} 42define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) { 43; SSE-LABEL: shuffle_v2i64_11: 44; SSE: # BB#0: 45; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 46; SSE-NEXT: retq 47; 48; AVX-LABEL: shuffle_v2i64_11: 49; AVX: # BB#0: 50; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 51; AVX-NEXT: retq 52 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1> 53 ret <2 x i64> %shuffle 54} 55define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) { 56; SSE-LABEL: shuffle_v2i64_22: 57; SSE: # BB#0: 58; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 59; SSE-NEXT: retq 60; 61; AVX1-LABEL: shuffle_v2i64_22: 62; AVX1: # BB#0: 63; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] 64; AVX1-NEXT: retq 65; 66; AVX2-LABEL: shuffle_v2i64_22: 67; AVX2: # BB#0: 68; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0 69; AVX2-NEXT: retq 70 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2> 71 ret <2 x i64> %shuffle 72} 73define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) { 74; SSE-LABEL: shuffle_v2i64_32: 75; SSE: # BB#0: 76; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 77; SSE-NEXT: retq 78; 79; AVX-LABEL: shuffle_v2i64_32: 80; AVX: # BB#0: 81; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 82; AVX-NEXT: retq 83 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2> 84 ret <2 x i64> %shuffle 85} 86define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) { 87; SSE-LABEL: shuffle_v2i64_33: 88; SSE: # BB#0: 89; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 90; SSE-NEXT: retq 91; 92; AVX-LABEL: shuffle_v2i64_33: 93; AVX: # BB#0: 94; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 95; AVX-NEXT: retq 96 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3> 97 ret <2 x i64> %shuffle 98} 99 100define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 101; SSE2-LABEL: shuffle_v2f64_00: 102; SSE2: # BB#0: 103; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 104; SSE2-NEXT: retq 105; 106; SSE3-LABEL: shuffle_v2f64_00: 107; SSE3: # BB#0: 108; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 109; SSE3-NEXT: retq 110; 111; SSSE3-LABEL: shuffle_v2f64_00: 112; SSSE3: # BB#0: 113; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 114; SSSE3-NEXT: retq 115; 116; SSE41-LABEL: shuffle_v2f64_00: 117; SSE41: # BB#0: 118; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 119; SSE41-NEXT: retq 120; 121; AVX-LABEL: shuffle_v2f64_00: 122; AVX: # BB#0: 123; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 124; AVX-NEXT: retq 125 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 126 ret <2 x double> %shuffle 127} 128define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { 129; SSE-LABEL: shuffle_v2f64_10: 130; SSE: # BB#0: 131; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 132; SSE-NEXT: retq 133; 134; AVX-LABEL: shuffle_v2f64_10: 135; AVX: # BB#0: 136; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 137; AVX-NEXT: retq 138 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0> 139 ret <2 x double> %shuffle 140} 141define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) { 142; SSE-LABEL: shuffle_v2f64_11: 143; SSE: # BB#0: 144; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 145; SSE-NEXT: retq 146; 147; AVX-LABEL: shuffle_v2f64_11: 148; AVX: # BB#0: 149; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] 150; AVX-NEXT: retq 151 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1> 152 ret <2 x double> %shuffle 153} 154define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { 155; SSE2-LABEL: shuffle_v2f64_22: 156; SSE2: # BB#0: 157; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] 158; SSE2-NEXT: movaps %xmm1, %xmm0 159; SSE2-NEXT: retq 160; 161; SSE3-LABEL: shuffle_v2f64_22: 162; SSE3: # BB#0: 163; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 164; SSE3-NEXT: retq 165; 166; SSSE3-LABEL: shuffle_v2f64_22: 167; SSSE3: # BB#0: 168; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 169; SSSE3-NEXT: retq 170; 171; SSE41-LABEL: shuffle_v2f64_22: 172; SSE41: # BB#0: 173; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 174; SSE41-NEXT: retq 175; 176; AVX-LABEL: shuffle_v2f64_22: 177; AVX: # BB#0: 178; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 179; AVX-NEXT: retq 180 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2> 181 ret <2 x double> %shuffle 182} 183define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { 184; SSE-LABEL: shuffle_v2f64_32: 185; SSE: # BB#0: 186; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 187; SSE-NEXT: movapd %xmm1, %xmm0 188; SSE-NEXT: retq 189; 190; AVX-LABEL: shuffle_v2f64_32: 191; AVX: # BB#0: 192; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 193; AVX-NEXT: retq 194 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2> 195 ret <2 x double> %shuffle 196} 197define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { 198; SSE-LABEL: shuffle_v2f64_33: 199; SSE: # BB#0: 200; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 201; SSE-NEXT: movaps %xmm1, %xmm0 202; SSE-NEXT: retq 203; 204; AVX-LABEL: shuffle_v2f64_33: 205; AVX: # BB#0: 206; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1] 207; AVX-NEXT: retq 208 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3> 209 ret <2 x double> %shuffle 210} 211define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { 212; SSE2-LABEL: shuffle_v2f64_03: 213; SSE2: # BB#0: 214; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 215; SSE2-NEXT: movapd %xmm1, %xmm0 216; SSE2-NEXT: retq 217; 218; SSE3-LABEL: shuffle_v2f64_03: 219; SSE3: # BB#0: 220; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 221; SSE3-NEXT: movapd %xmm1, %xmm0 222; SSE3-NEXT: retq 223; 224; SSSE3-LABEL: shuffle_v2f64_03: 225; SSSE3: # BB#0: 226; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 227; SSSE3-NEXT: movapd %xmm1, %xmm0 228; SSSE3-NEXT: retq 229; 230; SSE41-LABEL: shuffle_v2f64_03: 231; SSE41: # BB#0: 232; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 233; SSE41-NEXT: retq 234; 235; AVX-LABEL: shuffle_v2f64_03: 236; AVX: # BB#0: 237; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 238; AVX-NEXT: retq 239 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3> 240 ret <2 x double> %shuffle 241} 242define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { 243; SSE2-LABEL: shuffle_v2f64_21: 244; SSE2: # BB#0: 245; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 246; SSE2-NEXT: retq 247; 248; SSE3-LABEL: shuffle_v2f64_21: 249; SSE3: # BB#0: 250; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 251; SSE3-NEXT: retq 252; 253; SSSE3-LABEL: shuffle_v2f64_21: 254; SSSE3: # BB#0: 255; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 256; SSSE3-NEXT: retq 257; 258; SSE41-LABEL: shuffle_v2f64_21: 259; SSE41: # BB#0: 260; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 261; SSE41-NEXT: retq 262; 263; AVX-LABEL: shuffle_v2f64_21: 264; AVX: # BB#0: 265; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 266; AVX-NEXT: retq 267 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1> 268 ret <2 x double> %shuffle 269} 270 271 272define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { 273; SSE-LABEL: shuffle_v2i64_02: 274; SSE: # BB#0: 275; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 276; SSE-NEXT: retq 277; 278; AVX-LABEL: shuffle_v2i64_02: 279; AVX: # BB#0: 280; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 281; AVX-NEXT: retq 282 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 283 ret <2 x i64> %shuffle 284} 285define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 286; SSE-LABEL: shuffle_v2i64_02_copy: 287; SSE: # BB#0: 288; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 289; SSE-NEXT: movdqa %xmm1, %xmm0 290; SSE-NEXT: retq 291; 292; AVX-LABEL: shuffle_v2i64_02_copy: 293; AVX: # BB#0: 294; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] 295; AVX-NEXT: retq 296 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 297 ret <2 x i64> %shuffle 298} 299define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { 300; SSE2-LABEL: shuffle_v2i64_03: 301; SSE2: # BB#0: 302; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 303; SSE2-NEXT: movapd %xmm1, %xmm0 304; SSE2-NEXT: retq 305; 306; SSE3-LABEL: shuffle_v2i64_03: 307; SSE3: # BB#0: 308; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 309; SSE3-NEXT: movapd %xmm1, %xmm0 310; SSE3-NEXT: retq 311; 312; SSSE3-LABEL: shuffle_v2i64_03: 313; SSSE3: # BB#0: 314; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 315; SSSE3-NEXT: movapd %xmm1, %xmm0 316; SSSE3-NEXT: retq 317; 318; SSE41-LABEL: shuffle_v2i64_03: 319; SSE41: # BB#0: 320; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 321; SSE41-NEXT: retq 322; 323; AVX1-LABEL: shuffle_v2i64_03: 324; AVX1: # BB#0: 325; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 326; AVX1-NEXT: retq 327; 328; AVX2-LABEL: shuffle_v2i64_03: 329; AVX2: # BB#0: 330; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 331; AVX2-NEXT: retq 332 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 333 ret <2 x i64> %shuffle 334} 335define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 336; SSE2-LABEL: shuffle_v2i64_03_copy: 337; SSE2: # BB#0: 338; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 339; SSE2-NEXT: movapd %xmm2, %xmm0 340; SSE2-NEXT: retq 341; 342; SSE3-LABEL: shuffle_v2i64_03_copy: 343; SSE3: # BB#0: 344; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 345; SSE3-NEXT: movapd %xmm2, %xmm0 346; SSE3-NEXT: retq 347; 348; SSSE3-LABEL: shuffle_v2i64_03_copy: 349; SSSE3: # BB#0: 350; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 351; SSSE3-NEXT: movapd %xmm2, %xmm0 352; SSSE3-NEXT: retq 353; 354; SSE41-LABEL: shuffle_v2i64_03_copy: 355; SSE41: # BB#0: 356; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 357; SSE41-NEXT: movdqa %xmm1, %xmm0 358; SSE41-NEXT: retq 359; 360; AVX1-LABEL: shuffle_v2i64_03_copy: 361; AVX1: # BB#0: 362; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7] 363; AVX1-NEXT: retq 364; 365; AVX2-LABEL: shuffle_v2i64_03_copy: 366; AVX2: # BB#0: 367; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3] 368; AVX2-NEXT: retq 369 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 370 ret <2 x i64> %shuffle 371} 372define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { 373; SSE2-LABEL: shuffle_v2i64_12: 374; SSE2: # BB#0: 375; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 376; SSE2-NEXT: retq 377; 378; SSE3-LABEL: shuffle_v2i64_12: 379; SSE3: # BB#0: 380; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 381; SSE3-NEXT: retq 382; 383; SSSE3-LABEL: shuffle_v2i64_12: 384; SSSE3: # BB#0: 385; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 386; SSSE3-NEXT: movdqa %xmm1, %xmm0 387; SSSE3-NEXT: retq 388; 389; SSE41-LABEL: shuffle_v2i64_12: 390; SSE41: # BB#0: 391; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 392; SSE41-NEXT: movdqa %xmm1, %xmm0 393; SSE41-NEXT: retq 394; 395; AVX-LABEL: shuffle_v2i64_12: 396; AVX: # BB#0: 397; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 398; AVX-NEXT: retq 399 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 400 ret <2 x i64> %shuffle 401} 402define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 403; SSE2-LABEL: shuffle_v2i64_12_copy: 404; SSE2: # BB#0: 405; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 406; SSE2-NEXT: movapd %xmm1, %xmm0 407; SSE2-NEXT: retq 408; 409; SSE3-LABEL: shuffle_v2i64_12_copy: 410; SSE3: # BB#0: 411; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0] 412; SSE3-NEXT: movapd %xmm1, %xmm0 413; SSE3-NEXT: retq 414; 415; SSSE3-LABEL: shuffle_v2i64_12_copy: 416; SSSE3: # BB#0: 417; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 418; SSSE3-NEXT: movdqa %xmm2, %xmm0 419; SSSE3-NEXT: retq 420; 421; SSE41-LABEL: shuffle_v2i64_12_copy: 422; SSE41: # BB#0: 423; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 424; SSE41-NEXT: movdqa %xmm2, %xmm0 425; SSE41-NEXT: retq 426; 427; AVX-LABEL: shuffle_v2i64_12_copy: 428; AVX: # BB#0: 429; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7] 430; AVX-NEXT: retq 431 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 432 ret <2 x i64> %shuffle 433} 434define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { 435; SSE-LABEL: shuffle_v2i64_13: 436; SSE: # BB#0: 437; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 438; SSE-NEXT: retq 439; 440; AVX-LABEL: shuffle_v2i64_13: 441; AVX: # BB#0: 442; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 443; AVX-NEXT: retq 444 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 445 ret <2 x i64> %shuffle 446} 447define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 448; SSE-LABEL: shuffle_v2i64_13_copy: 449; SSE: # BB#0: 450; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1] 451; SSE-NEXT: movdqa %xmm1, %xmm0 452; SSE-NEXT: retq 453; 454; AVX-LABEL: shuffle_v2i64_13_copy: 455; AVX: # BB#0: 456; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] 457; AVX-NEXT: retq 458 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 459 ret <2 x i64> %shuffle 460} 461define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { 462; SSE-LABEL: shuffle_v2i64_20: 463; SSE: # BB#0: 464; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 465; SSE-NEXT: movdqa %xmm1, %xmm0 466; SSE-NEXT: retq 467; 468; AVX-LABEL: shuffle_v2i64_20: 469; AVX: # BB#0: 470; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 471; AVX-NEXT: retq 472 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 473 ret <2 x i64> %shuffle 474} 475define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 476; SSE-LABEL: shuffle_v2i64_20_copy: 477; SSE: # BB#0: 478; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 479; SSE-NEXT: movdqa %xmm2, %xmm0 480; SSE-NEXT: retq 481; 482; AVX-LABEL: shuffle_v2i64_20_copy: 483; AVX: # BB#0: 484; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] 485; AVX-NEXT: retq 486 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 487 ret <2 x i64> %shuffle 488} 489define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { 490; SSE2-LABEL: shuffle_v2i64_21: 491; SSE2: # BB#0: 492; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 493; SSE2-NEXT: retq 494; 495; SSE3-LABEL: shuffle_v2i64_21: 496; SSE3: # BB#0: 497; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 498; SSE3-NEXT: retq 499; 500; SSSE3-LABEL: shuffle_v2i64_21: 501; SSSE3: # BB#0: 502; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 503; SSSE3-NEXT: retq 504; 505; SSE41-LABEL: shuffle_v2i64_21: 506; SSE41: # BB#0: 507; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 508; SSE41-NEXT: retq 509; 510; AVX1-LABEL: shuffle_v2i64_21: 511; AVX1: # BB#0: 512; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 513; AVX1-NEXT: retq 514; 515; AVX2-LABEL: shuffle_v2i64_21: 516; AVX2: # BB#0: 517; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 518; AVX2-NEXT: retq 519 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 520 ret <2 x i64> %shuffle 521} 522define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 523; SSE2-LABEL: shuffle_v2i64_21_copy: 524; SSE2: # BB#0: 525; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 526; SSE2-NEXT: movapd %xmm1, %xmm0 527; SSE2-NEXT: retq 528; 529; SSE3-LABEL: shuffle_v2i64_21_copy: 530; SSE3: # BB#0: 531; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 532; SSE3-NEXT: movapd %xmm1, %xmm0 533; SSE3-NEXT: retq 534; 535; SSSE3-LABEL: shuffle_v2i64_21_copy: 536; SSSE3: # BB#0: 537; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 538; SSSE3-NEXT: movapd %xmm1, %xmm0 539; SSSE3-NEXT: retq 540; 541; SSE41-LABEL: shuffle_v2i64_21_copy: 542; SSE41: # BB#0: 543; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 544; SSE41-NEXT: movdqa %xmm1, %xmm0 545; SSE41-NEXT: retq 546; 547; AVX1-LABEL: shuffle_v2i64_21_copy: 548; AVX1: # BB#0: 549; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7] 550; AVX1-NEXT: retq 551; 552; AVX2-LABEL: shuffle_v2i64_21_copy: 553; AVX2: # BB#0: 554; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3] 555; AVX2-NEXT: retq 556 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1> 557 ret <2 x i64> %shuffle 558} 559define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { 560; SSE2-LABEL: shuffle_v2i64_30: 561; SSE2: # BB#0: 562; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 563; SSE2-NEXT: movapd %xmm1, %xmm0 564; SSE2-NEXT: retq 565; 566; SSE3-LABEL: shuffle_v2i64_30: 567; SSE3: # BB#0: 568; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 569; SSE3-NEXT: movapd %xmm1, %xmm0 570; SSE3-NEXT: retq 571; 572; SSSE3-LABEL: shuffle_v2i64_30: 573; SSSE3: # BB#0: 574; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 575; SSSE3-NEXT: retq 576; 577; SSE41-LABEL: shuffle_v2i64_30: 578; SSE41: # BB#0: 579; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 580; SSE41-NEXT: retq 581; 582; AVX-LABEL: shuffle_v2i64_30: 583; AVX: # BB#0: 584; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 585; AVX-NEXT: retq 586 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 587 ret <2 x i64> %shuffle 588} 589define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 590; SSE2-LABEL: shuffle_v2i64_30_copy: 591; SSE2: # BB#0: 592; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 593; SSE2-NEXT: movapd %xmm2, %xmm0 594; SSE2-NEXT: retq 595; 596; SSE3-LABEL: shuffle_v2i64_30_copy: 597; SSE3: # BB#0: 598; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0] 599; SSE3-NEXT: movapd %xmm2, %xmm0 600; SSE3-NEXT: retq 601; 602; SSSE3-LABEL: shuffle_v2i64_30_copy: 603; SSSE3: # BB#0: 604; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 605; SSSE3-NEXT: movdqa %xmm1, %xmm0 606; SSSE3-NEXT: retq 607; 608; SSE41-LABEL: shuffle_v2i64_30_copy: 609; SSE41: # BB#0: 610; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 611; SSE41-NEXT: movdqa %xmm1, %xmm0 612; SSE41-NEXT: retq 613; 614; AVX-LABEL: shuffle_v2i64_30_copy: 615; AVX: # BB#0: 616; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] 617; AVX-NEXT: retq 618 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0> 619 ret <2 x i64> %shuffle 620} 621define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { 622; SSE-LABEL: shuffle_v2i64_31: 623; SSE: # BB#0: 624; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] 625; SSE-NEXT: movdqa %xmm1, %xmm0 626; SSE-NEXT: retq 627; 628; AVX-LABEL: shuffle_v2i64_31: 629; AVX: # BB#0: 630; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] 631; AVX-NEXT: retq 632 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 633 ret <2 x i64> %shuffle 634} 635define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { 636; SSE-LABEL: shuffle_v2i64_31_copy: 637; SSE: # BB#0: 638; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 639; SSE-NEXT: movdqa %xmm2, %xmm0 640; SSE-NEXT: retq 641; 642; AVX-LABEL: shuffle_v2i64_31_copy: 643; AVX: # BB#0: 644; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] 645; AVX-NEXT: retq 646 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> 647 ret <2 x i64> %shuffle 648} 649 650define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) { 651; SSE-LABEL: shuffle_v2i64_0z: 652; SSE: # BB#0: 653; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 654; SSE-NEXT: retq 655; 656; AVX-LABEL: shuffle_v2i64_0z: 657; AVX: # BB#0: 658; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 659; AVX-NEXT: retq 660 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 661 ret <2 x i64> %shuffle 662} 663 664define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) { 665; SSE-LABEL: shuffle_v2i64_1z: 666; SSE: # BB#0: 667; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 668; SSE-NEXT: retq 669; 670; AVX-LABEL: shuffle_v2i64_1z: 671; AVX: # BB#0: 672; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero 673; AVX-NEXT: retq 674 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3> 675 ret <2 x i64> %shuffle 676} 677 678define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { 679; SSE-LABEL: shuffle_v2i64_z0: 680; SSE: # BB#0: 681; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 682; SSE-NEXT: retq 683; 684; AVX-LABEL: shuffle_v2i64_z0: 685; AVX: # BB#0: 686; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 687; AVX-NEXT: retq 688 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0> 689 ret <2 x i64> %shuffle 690} 691 692define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { 693; SSE2-LABEL: shuffle_v2i64_z1: 694; SSE2: # BB#0: 695; SSE2-NEXT: xorpd %xmm1, %xmm1 696; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 697; SSE2-NEXT: retq 698; 699; SSE3-LABEL: shuffle_v2i64_z1: 700; SSE3: # BB#0: 701; SSE3-NEXT: xorpd %xmm1, %xmm1 702; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 703; SSE3-NEXT: retq 704; 705; SSSE3-LABEL: shuffle_v2i64_z1: 706; SSSE3: # BB#0: 707; SSSE3-NEXT: xorpd %xmm1, %xmm1 708; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 709; SSSE3-NEXT: retq 710; 711; SSE41-LABEL: shuffle_v2i64_z1: 712; SSE41: # BB#0: 713; SSE41-NEXT: pxor %xmm1, %xmm1 714; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 715; SSE41-NEXT: retq 716; 717; AVX1-LABEL: shuffle_v2i64_z1: 718; AVX1: # BB#0: 719; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 720; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 721; AVX1-NEXT: retq 722; 723; AVX2-LABEL: shuffle_v2i64_z1: 724; AVX2: # BB#0: 725; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 726; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 727; AVX2-NEXT: retq 728 %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1> 729 ret <2 x i64> %shuffle 730} 731 732define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { 733; SSE-LABEL: shuffle_v2f64_0z: 734; SSE: # BB#0: 735; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 736; SSE-NEXT: retq 737; 738; AVX-LABEL: shuffle_v2f64_0z: 739; AVX: # BB#0: 740; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 741; AVX-NEXT: retq 742 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 743 ret <2 x double> %shuffle 744} 745 746define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { 747; SSE-LABEL: shuffle_v2f64_1z: 748; SSE: # BB#0: 749; SSE-NEXT: xorpd %xmm1, %xmm1 750; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 751; SSE-NEXT: retq 752; 753; AVX-LABEL: shuffle_v2f64_1z: 754; AVX: # BB#0: 755; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 756; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 757; AVX-NEXT: retq 758 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3> 759 ret <2 x double> %shuffle 760} 761 762define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { 763; SSE-LABEL: shuffle_v2f64_z0: 764; SSE: # BB#0: 765; SSE-NEXT: xorpd %xmm1, %xmm1 766; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 767; SSE-NEXT: movapd %xmm1, %xmm0 768; SSE-NEXT: retq 769; 770; AVX-LABEL: shuffle_v2f64_z0: 771; AVX: # BB#0: 772; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 773; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 774; AVX-NEXT: retq 775 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0> 776 ret <2 x double> %shuffle 777} 778 779define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { 780; SSE2-LABEL: shuffle_v2f64_z1: 781; SSE2: # BB#0: 782; SSE2-NEXT: xorpd %xmm1, %xmm1 783; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 784; SSE2-NEXT: retq 785; 786; SSE3-LABEL: shuffle_v2f64_z1: 787; SSE3: # BB#0: 788; SSE3-NEXT: xorpd %xmm1, %xmm1 789; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 790; SSE3-NEXT: retq 791; 792; SSSE3-LABEL: shuffle_v2f64_z1: 793; SSSE3: # BB#0: 794; SSSE3-NEXT: xorpd %xmm1, %xmm1 795; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 796; SSSE3-NEXT: retq 797; 798; SSE41-LABEL: shuffle_v2f64_z1: 799; SSE41: # BB#0: 800; SSE41-NEXT: xorpd %xmm1, %xmm1 801; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 802; SSE41-NEXT: retq 803; 804; AVX-LABEL: shuffle_v2f64_z1: 805; AVX: # BB#0: 806; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 807; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 808; AVX-NEXT: retq 809 %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 810 ret <2 x double> %shuffle 811} 812 813define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 814; SSE-LABEL: shuffle_v2f64_bitcast_1z: 815; SSE: # BB#0: 816; SSE-NEXT: xorpd %xmm1, %xmm1 817; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 818; SSE-NEXT: retq 819; 820; AVX-LABEL: shuffle_v2f64_bitcast_1z: 821; AVX: # BB#0: 822; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 823; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 824; AVX-NEXT: retq 825 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 826 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 827 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 828 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 829 ret <2 x double> %bitcast64 830} 831 832define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) { 833; SSE-LABEL: insert_reg_and_zero_v2i64: 834; SSE: # BB#0: 835; SSE-NEXT: movd %rdi, %xmm0 836; SSE-NEXT: retq 837; 838; AVX-LABEL: insert_reg_and_zero_v2i64: 839; AVX: # BB#0: 840; AVX-NEXT: vmovq %rdi, %xmm0 841; AVX-NEXT: retq 842 %v = insertelement <2 x i64> undef, i64 %a, i32 0 843 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 844 ret <2 x i64> %shuffle 845} 846 847define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) { 848; SSE-LABEL: insert_mem_and_zero_v2i64: 849; SSE: # BB#0: 850; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 851; SSE-NEXT: retq 852; 853; AVX-LABEL: insert_mem_and_zero_v2i64: 854; AVX: # BB#0: 855; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 856; AVX-NEXT: retq 857 %a = load i64, i64* %ptr 858 %v = insertelement <2 x i64> undef, i64 %a, i32 0 859 %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3> 860 ret <2 x i64> %shuffle 861} 862 863define <2 x double> @insert_reg_and_zero_v2f64(double %a) { 864; SSE-LABEL: insert_reg_and_zero_v2f64: 865; SSE: # BB#0: 866; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 867; SSE-NEXT: retq 868; 869; AVX-LABEL: insert_reg_and_zero_v2f64: 870; AVX: # BB#0: 871; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 872; AVX-NEXT: retq 873 %v = insertelement <2 x double> undef, double %a, i32 0 874 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 875 ret <2 x double> %shuffle 876} 877 878define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) { 879; SSE-LABEL: insert_mem_and_zero_v2f64: 880; SSE: # BB#0: 881; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 882; SSE-NEXT: retq 883; 884; AVX-LABEL: insert_mem_and_zero_v2f64: 885; AVX: # BB#0: 886; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 887; AVX-NEXT: retq 888 %a = load double, double* %ptr 889 %v = insertelement <2 x double> undef, double %a, i32 0 890 %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3> 891 ret <2 x double> %shuffle 892} 893 894define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { 895; SSE2-LABEL: insert_reg_lo_v2i64: 896; SSE2: # BB#0: 897; SSE2-NEXT: movd %rdi, %xmm1 898; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 899; SSE2-NEXT: retq 900; 901; SSE3-LABEL: insert_reg_lo_v2i64: 902; SSE3: # BB#0: 903; SSE3-NEXT: movd %rdi, %xmm1 904; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 905; SSE3-NEXT: retq 906; 907; SSSE3-LABEL: insert_reg_lo_v2i64: 908; SSSE3: # BB#0: 909; SSSE3-NEXT: movd %rdi, %xmm1 910; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 911; SSSE3-NEXT: retq 912; 913; SSE41-LABEL: insert_reg_lo_v2i64: 914; SSE41: # BB#0: 915; SSE41-NEXT: movd %rdi, %xmm1 916; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 917; SSE41-NEXT: retq 918; 919; AVX1-LABEL: insert_reg_lo_v2i64: 920; AVX1: # BB#0: 921; AVX1-NEXT: vmovq %rdi, %xmm1 922; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 923; AVX1-NEXT: retq 924; 925; AVX2-LABEL: insert_reg_lo_v2i64: 926; AVX2: # BB#0: 927; AVX2-NEXT: vmovq %rdi, %xmm1 928; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 929; AVX2-NEXT: retq 930 %v = insertelement <2 x i64> undef, i64 %a, i32 0 931 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 932 ret <2 x i64> %shuffle 933} 934 935define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { 936; SSE2-LABEL: insert_mem_lo_v2i64: 937; SSE2: # BB#0: 938; SSE2-NEXT: movlpd (%rdi), %xmm0 939; SSE2-NEXT: retq 940; 941; SSE3-LABEL: insert_mem_lo_v2i64: 942; SSE3: # BB#0: 943; SSE3-NEXT: movlpd (%rdi), %xmm0 944; SSE3-NEXT: retq 945; 946; SSSE3-LABEL: insert_mem_lo_v2i64: 947; SSSE3: # BB#0: 948; SSSE3-NEXT: movlpd (%rdi), %xmm0 949; SSSE3-NEXT: retq 950; 951; SSE41-LABEL: insert_mem_lo_v2i64: 952; SSE41: # BB#0: 953; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 954; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 955; SSE41-NEXT: retq 956; 957; AVX1-LABEL: insert_mem_lo_v2i64: 958; AVX1: # BB#0: 959; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 960; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 961; AVX1-NEXT: retq 962; 963; AVX2-LABEL: insert_mem_lo_v2i64: 964; AVX2: # BB#0: 965; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 966; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 967; AVX2-NEXT: retq 968 %a = load i64, i64* %ptr 969 %v = insertelement <2 x i64> undef, i64 %a, i32 0 970 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3> 971 ret <2 x i64> %shuffle 972} 973 974define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { 975; SSE-LABEL: insert_reg_hi_v2i64: 976; SSE: # BB#0: 977; SSE-NEXT: movd %rdi, %xmm1 978; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 979; SSE-NEXT: retq 980; 981; AVX-LABEL: insert_reg_hi_v2i64: 982; AVX: # BB#0: 983; AVX-NEXT: vmovq %rdi, %xmm1 984; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 985; AVX-NEXT: retq 986 %v = insertelement <2 x i64> undef, i64 %a, i32 0 987 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 988 ret <2 x i64> %shuffle 989} 990 991define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { 992; SSE-LABEL: insert_mem_hi_v2i64: 993; SSE: # BB#0: 994; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 995; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 996; SSE-NEXT: retq 997; 998; AVX-LABEL: insert_mem_hi_v2i64: 999; AVX: # BB#0: 1000; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1001; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1002; AVX-NEXT: retq 1003 %a = load i64, i64* %ptr 1004 %v = insertelement <2 x i64> undef, i64 %a, i32 0 1005 %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0> 1006 ret <2 x i64> %shuffle 1007} 1008 1009define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { 1010; SSE-LABEL: insert_reg_lo_v2f64: 1011; SSE: # BB#0: 1012; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1013; SSE-NEXT: movapd %xmm1, %xmm0 1014; SSE-NEXT: retq 1015; 1016; AVX-LABEL: insert_reg_lo_v2f64: 1017; AVX: # BB#0: 1018; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 1019; AVX-NEXT: retq 1020 %v = insertelement <2 x double> undef, double %a, i32 0 1021 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1022 ret <2 x double> %shuffle 1023} 1024 1025define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 1026; SSE-LABEL: insert_mem_lo_v2f64: 1027; SSE: # BB#0: 1028; SSE-NEXT: movlpd (%rdi), %xmm0 1029; SSE-NEXT: retq 1030; 1031; AVX-LABEL: insert_mem_lo_v2f64: 1032; AVX: # BB#0: 1033; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 1034; AVX-NEXT: retq 1035 %a = load double, double* %ptr 1036 %v = insertelement <2 x double> undef, double %a, i32 0 1037 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 1038 ret <2 x double> %shuffle 1039} 1040 1041define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { 1042; SSE-LABEL: insert_reg_hi_v2f64: 1043; SSE: # BB#0: 1044; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1045; SSE-NEXT: movapd %xmm1, %xmm0 1046; SSE-NEXT: retq 1047; 1048; AVX-LABEL: insert_reg_hi_v2f64: 1049; AVX: # BB#0: 1050; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1051; AVX-NEXT: retq 1052 %v = insertelement <2 x double> undef, double %a, i32 0 1053 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1054 ret <2 x double> %shuffle 1055} 1056 1057define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 1058; SSE-LABEL: insert_mem_hi_v2f64: 1059; SSE: # BB#0: 1060; SSE-NEXT: movhpd (%rdi), %xmm0 1061; SSE-NEXT: retq 1062; 1063; AVX-LABEL: insert_mem_hi_v2f64: 1064; AVX: # BB#0: 1065; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 1066; AVX-NEXT: retq 1067 %a = load double, double* %ptr 1068 %v = insertelement <2 x double> undef, double %a, i32 0 1069 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 1070 ret <2 x double> %shuffle 1071} 1072 1073define <2 x double> @insert_dup_reg_v2f64(double %a) { 1074; FIXME: We should match movddup for SSE3 and higher here. 1075; 1076; SSE2-LABEL: insert_dup_reg_v2f64: 1077; SSE2: # BB#0: 1078; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1079; SSE2-NEXT: retq 1080; 1081; SSE3-LABEL: insert_dup_reg_v2f64: 1082; SSE3: # BB#0: 1083; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1084; SSE3-NEXT: retq 1085; 1086; SSSE3-LABEL: insert_dup_reg_v2f64: 1087; SSSE3: # BB#0: 1088; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1089; SSSE3-NEXT: retq 1090; 1091; SSE41-LABEL: insert_dup_reg_v2f64: 1092; SSE41: # BB#0: 1093; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 1094; SSE41-NEXT: retq 1095; 1096; AVX-LABEL: insert_dup_reg_v2f64: 1097; AVX: # BB#0: 1098; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1099; AVX-NEXT: retq 1100 %v = insertelement <2 x double> undef, double %a, i32 0 1101 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1102 ret <2 x double> %shuffle 1103} 1104define <2 x double> @insert_dup_mem_v2f64(double* %ptr) { 1105; SSE2-LABEL: insert_dup_mem_v2f64: 1106; SSE2: # BB#0: 1107; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1108; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1109; SSE2-NEXT: retq 1110; 1111; SSE3-LABEL: insert_dup_mem_v2f64: 1112; SSE3: # BB#0: 1113; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1114; SSE3-NEXT: retq 1115; 1116; SSSE3-LABEL: insert_dup_mem_v2f64: 1117; SSSE3: # BB#0: 1118; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1119; SSSE3-NEXT: retq 1120; 1121; SSE41-LABEL: insert_dup_mem_v2f64: 1122; SSE41: # BB#0: 1123; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 1124; SSE41-NEXT: retq 1125; 1126; AVX-LABEL: insert_dup_mem_v2f64: 1127; AVX: # BB#0: 1128; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 1129; AVX-NEXT: retq 1130 %a = load double, double* %ptr 1131 %v = insertelement <2 x double> undef, double %a, i32 0 1132 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1133 ret <2 x double> %shuffle 1134} 1135 1136define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { 1137; SSE-LABEL: shuffle_mem_v2f64_10: 1138; SSE: # BB#0: 1139; SSE-NEXT: movapd (%rdi), %xmm0 1140; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 1141; SSE-NEXT: retq 1142; 1143; AVX-LABEL: shuffle_mem_v2f64_10: 1144; AVX: # BB#0: 1145; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] 1146; AVX-NEXT: retq 1147 %a = load <2 x double>, <2 x double>* %ptr 1148 %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1149 ret <2 x double> %shuffle 1150} 1151