1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 6 7; Verify that we don't emit packed vector shifts instructions if the 8; condition used by the vector select is a vector of constants. 9 10define <4 x float> @test1(<4 x float> %a, <4 x float> %b) { 11; SSE2-LABEL: test1: 12; SSE2: # %bb.0: 13; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 14; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 15; SSE2-NEXT: retq 16; 17; SSE41-LABEL: test1: 18; SSE41: # %bb.0: 19; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 20; SSE41-NEXT: retq 21; 22; AVX-LABEL: test1: 23; AVX: # %bb.0: 24; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 25; AVX-NEXT: retq 26 %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b 27 ret <4 x float> %1 28} 29 30define <4 x float> @test2(<4 x float> %a, <4 x float> %b) { 31; SSE2-LABEL: test2: 32; SSE2: # %bb.0: 33; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 34; SSE2-NEXT: retq 35; 36; SSE41-LABEL: test2: 37; SSE41: # %bb.0: 38; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 39; SSE41-NEXT: retq 40; 41; AVX-LABEL: test2: 42; AVX: # %bb.0: 43; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 44; AVX-NEXT: retq 45 %1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 46 ret <4 x float> %1 47} 48 49define <4 x float> @test3(<4 x float> %a, <4 x float> %b) { 50; SSE2-LABEL: test3: 51; SSE2: # %bb.0: 52; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 53; SSE2-NEXT: retq 54; 55; SSE41-LABEL: test3: 56; SSE41: # %bb.0: 57; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 58; SSE41-NEXT: retq 59; 60; AVX-LABEL: test3: 61; AVX: # %bb.0: 62; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 63; AVX-NEXT: retq 64 %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 65 ret <4 x float> %1 66} 67 68define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { 69; SSE-LABEL: test4: 70; SSE: # %bb.0: 71; SSE-NEXT: movaps %xmm1, %xmm0 72; SSE-NEXT: retq 73; 74; AVX-LABEL: test4: 75; AVX: # %bb.0: 76; AVX-NEXT: vmovaps %xmm1, %xmm0 77; AVX-NEXT: retq 78 %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 79 ret <4 x float> %1 80} 81 82define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { 83; SSE-LABEL: test5: 84; SSE: # %bb.0: 85; SSE-NEXT: retq 86; 87; AVX-LABEL: test5: 88; AVX: # %bb.0: 89; AVX-NEXT: retq 90 %1 = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 91 ret <4 x float> %1 92} 93 94define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) { 95; SSE-LABEL: test6: 96; SSE: # %bb.0: 97; SSE-NEXT: retq 98; 99; AVX-LABEL: test6: 100; AVX: # %bb.0: 101; AVX-NEXT: retq 102 %1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a 103 ret <8 x i16> %1 104} 105 106define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) { 107; SSE2-LABEL: test7: 108; SSE2: # %bb.0: 109; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 110; SSE2-NEXT: retq 111; 112; SSE41-LABEL: test7: 113; SSE41: # %bb.0: 114; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 115; SSE41-NEXT: retq 116; 117; AVX-LABEL: test7: 118; AVX: # %bb.0: 119; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 120; AVX-NEXT: retq 121 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b 122 ret <8 x i16> %1 123} 124 125define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) { 126; SSE2-LABEL: test8: 127; SSE2: # %bb.0: 128; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 129; SSE2-NEXT: retq 130; 131; SSE41-LABEL: test8: 132; SSE41: # %bb.0: 133; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 134; SSE41-NEXT: retq 135; 136; AVX-LABEL: test8: 137; AVX: # %bb.0: 138; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 139; AVX-NEXT: retq 140 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b 141 ret <8 x i16> %1 142} 143 144define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) { 145; SSE-LABEL: test9: 146; SSE: # %bb.0: 147; SSE-NEXT: movaps %xmm1, %xmm0 148; SSE-NEXT: retq 149; 150; AVX-LABEL: test9: 151; AVX: # %bb.0: 152; AVX-NEXT: vmovaps %xmm1, %xmm0 153; AVX-NEXT: retq 154 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b 155 ret <8 x i16> %1 156} 157 158define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) { 159; SSE-LABEL: test10: 160; SSE: # %bb.0: 161; SSE-NEXT: retq 162; 163; AVX-LABEL: test10: 164; AVX: # %bb.0: 165; AVX-NEXT: retq 166 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b 167 ret <8 x i16> %1 168} 169 170define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) { 171; SSE2-LABEL: test11: 172; SSE2: # %bb.0: 173; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,0,0,65535,65535,0] 174; SSE2-NEXT: andps %xmm2, %xmm0 175; SSE2-NEXT: andnps %xmm1, %xmm2 176; SSE2-NEXT: orps %xmm2, %xmm0 177; SSE2-NEXT: retq 178; 179; SSE41-LABEL: test11: 180; SSE41: # %bb.0: 181; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7] 182; SSE41-NEXT: retq 183; 184; AVX-LABEL: test11: 185; AVX: # %bb.0: 186; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3,4],xmm0[5,6],xmm1[7] 187; AVX-NEXT: retq 188 %1 = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b 189 ret <8 x i16> %1 190} 191 192define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) { 193; SSE-LABEL: test12: 194; SSE: # %bb.0: 195; SSE-NEXT: movaps %xmm1, %xmm0 196; SSE-NEXT: retq 197; 198; AVX-LABEL: test12: 199; AVX: # %bb.0: 200; AVX-NEXT: vmovaps %xmm1, %xmm0 201; AVX-NEXT: retq 202 %1 = select <8 x i1> <i1 false, i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b 203 ret <8 x i16> %1 204} 205 206define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) { 207; SSE-LABEL: test13: 208; SSE: # %bb.0: 209; SSE-NEXT: movaps %xmm1, %xmm0 210; SSE-NEXT: retq 211; 212; AVX-LABEL: test13: 213; AVX: # %bb.0: 214; AVX-NEXT: vmovaps %xmm1, %xmm0 215; AVX-NEXT: retq 216 %1 = select <8 x i1> <i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i16> %a, <8 x i16> %b 217 ret <8 x i16> %1 218} 219 220; Fold (vselect (build_vector AllOnes), N1, N2) -> N1 221define <4 x float> @test14(<4 x float> %a, <4 x float> %b) { 222; SSE-LABEL: test14: 223; SSE: # %bb.0: 224; SSE-NEXT: retq 225; 226; AVX-LABEL: test14: 227; AVX: # %bb.0: 228; AVX-NEXT: retq 229 %1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b 230 ret <4 x float> %1 231} 232 233define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) { 234; SSE-LABEL: test15: 235; SSE: # %bb.0: 236; SSE-NEXT: retq 237; 238; AVX-LABEL: test15: 239; AVX: # %bb.0: 240; AVX-NEXT: retq 241 %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b 242 ret <8 x i16> %1 243} 244 245; Fold (vselect (build_vector AllZeros), N1, N2) -> N2 246define <4 x float> @test16(<4 x float> %a, <4 x float> %b) { 247; SSE-LABEL: test16: 248; SSE: # %bb.0: 249; SSE-NEXT: movaps %xmm1, %xmm0 250; SSE-NEXT: retq 251; 252; AVX-LABEL: test16: 253; AVX: # %bb.0: 254; AVX-NEXT: vmovaps %xmm1, %xmm0 255; AVX-NEXT: retq 256 %1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b 257 ret <4 x float> %1 258} 259 260define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) { 261; SSE-LABEL: test17: 262; SSE: # %bb.0: 263; SSE-NEXT: movaps %xmm1, %xmm0 264; SSE-NEXT: retq 265; 266; AVX-LABEL: test17: 267; AVX: # %bb.0: 268; AVX-NEXT: vmovaps %xmm1, %xmm0 269; AVX-NEXT: retq 270 %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b 271 ret <8 x i16> %1 272} 273 274define <4 x float> @test18(<4 x float> %a, <4 x float> %b) { 275; SSE2-LABEL: test18: 276; SSE2: # %bb.0: 277; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 278; SSE2-NEXT: retq 279; 280; SSE41-LABEL: test18: 281; SSE41: # %bb.0: 282; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 283; SSE41-NEXT: retq 284; 285; AVX-LABEL: test18: 286; AVX: # %bb.0: 287; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 288; AVX-NEXT: retq 289 %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b 290 ret <4 x float> %1 291} 292 293define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 294; SSE2-LABEL: test19: 295; SSE2: # %bb.0: 296; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 297; SSE2-NEXT: retq 298; 299; SSE41-LABEL: test19: 300; SSE41: # %bb.0: 301; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 302; SSE41-NEXT: retq 303; 304; AVX-LABEL: test19: 305; AVX: # %bb.0: 306; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 307; AVX-NEXT: retq 308 %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b 309 ret <4 x i32> %1 310} 311 312define <2 x double> @test20(<2 x double> %a, <2 x double> %b) { 313; SSE2-LABEL: test20: 314; SSE2: # %bb.0: 315; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 316; SSE2-NEXT: retq 317; 318; SSE41-LABEL: test20: 319; SSE41: # %bb.0: 320; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 321; SSE41-NEXT: retq 322; 323; AVX-LABEL: test20: 324; AVX: # %bb.0: 325; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 326; AVX-NEXT: retq 327 %1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b 328 ret <2 x double> %1 329} 330 331define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 332; SSE2-LABEL: test21: 333; SSE2: # %bb.0: 334; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 335; SSE2-NEXT: retq 336; 337; SSE41-LABEL: test21: 338; SSE41: # %bb.0: 339; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 340; SSE41-NEXT: retq 341; 342; AVX-LABEL: test21: 343; AVX: # %bb.0: 344; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 345; AVX-NEXT: retq 346 %1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b 347 ret <2 x i64> %1 348} 349 350define <4 x float> @test22(<4 x float> %a, <4 x float> %b) { 351; SSE2-LABEL: test22: 352; SSE2: # %bb.0: 353; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 354; SSE2-NEXT: movaps %xmm1, %xmm0 355; SSE2-NEXT: retq 356; 357; SSE41-LABEL: test22: 358; SSE41: # %bb.0: 359; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 360; SSE41-NEXT: retq 361; 362; AVX-LABEL: test22: 363; AVX: # %bb.0: 364; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 365; AVX-NEXT: retq 366 %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b 367 ret <4 x float> %1 368} 369 370define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) { 371; SSE2-LABEL: test23: 372; SSE2: # %bb.0: 373; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 374; SSE2-NEXT: movaps %xmm1, %xmm0 375; SSE2-NEXT: retq 376; 377; SSE41-LABEL: test23: 378; SSE41: # %bb.0: 379; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 380; SSE41-NEXT: retq 381; 382; AVX-LABEL: test23: 383; AVX: # %bb.0: 384; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 385; AVX-NEXT: retq 386 %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b 387 ret <4 x i32> %1 388} 389 390define <2 x double> @test24(<2 x double> %a, <2 x double> %b) { 391; SSE2-LABEL: test24: 392; SSE2: # %bb.0: 393; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 394; SSE2-NEXT: retq 395; 396; SSE41-LABEL: test24: 397; SSE41: # %bb.0: 398; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 399; SSE41-NEXT: retq 400; 401; AVX-LABEL: test24: 402; AVX: # %bb.0: 403; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 404; AVX-NEXT: retq 405 %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b 406 ret <2 x double> %1 407} 408 409define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) { 410; SSE2-LABEL: test25: 411; SSE2: # %bb.0: 412; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 413; SSE2-NEXT: retq 414; 415; SSE41-LABEL: test25: 416; SSE41: # %bb.0: 417; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 418; SSE41-NEXT: retq 419; 420; AVX-LABEL: test25: 421; AVX: # %bb.0: 422; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 423; AVX-NEXT: retq 424 %1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b 425 ret <2 x i64> %1 426} 427 428define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) { 429; SSE-LABEL: select_of_shuffles_0: 430; SSE: # %bb.0: 431; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 432; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] 433; SSE-NEXT: subps %xmm1, %xmm0 434; SSE-NEXT: retq 435; 436; AVX-LABEL: select_of_shuffles_0: 437; AVX: # %bb.0: 438; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 439; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] 440; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 441; AVX-NEXT: retq 442 %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 443 %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 444 %3 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %2, <4 x float> %1 445 %4 = shufflevector <2 x float> %b0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 446 %5 = shufflevector <2 x float> %b1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 447 %6 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %5, <4 x float> %4 448 %7 = fsub <4 x float> %3, %6 449 ret <4 x float> %7 450} 451 452; PR20677 453define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) { 454; SSE-LABEL: select_illegal: 455; SSE: # %bb.0: 456; SSE-NEXT: movq %rdi, %rax 457; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4 458; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5 459; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6 460; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7 461; SSE-NEXT: movaps %xmm7, 112(%rdi) 462; SSE-NEXT: movaps %xmm6, 96(%rdi) 463; SSE-NEXT: movaps %xmm5, 80(%rdi) 464; SSE-NEXT: movaps %xmm4, 64(%rdi) 465; SSE-NEXT: movaps %xmm3, 48(%rdi) 466; SSE-NEXT: movaps %xmm2, 32(%rdi) 467; SSE-NEXT: movaps %xmm1, 16(%rdi) 468; SSE-NEXT: movaps %xmm0, (%rdi) 469; SSE-NEXT: retq 470; 471; AVX-LABEL: select_illegal: 472; AVX: # %bb.0: 473; AVX-NEXT: vmovaps %ymm7, %ymm3 474; AVX-NEXT: vmovaps %ymm6, %ymm2 475; AVX-NEXT: retq 476 %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b 477 ret <16 x double> %sel 478} 479 480; Make sure we can optimize the condition MSB when it is used by 2 selects. 481; The v2i1 here will be passed as v2i64 and we will emit a sign_extend_inreg to fill the upper bits. 482; We should be able to remove the sra from the sign_extend_inreg to leave only shl. 483define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { 484; SSE2-LABEL: shrunkblend_2uses: 485; SSE2: # %bb.0: 486; SSE2-NEXT: psllq $63, %xmm0 487; SSE2-NEXT: psrad $31, %xmm0 488; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 489; SSE2-NEXT: movdqa %xmm0, %xmm5 490; SSE2-NEXT: pandn %xmm2, %xmm5 491; SSE2-NEXT: pand %xmm0, %xmm1 492; SSE2-NEXT: por %xmm1, %xmm5 493; SSE2-NEXT: pand %xmm0, %xmm3 494; SSE2-NEXT: pandn %xmm4, %xmm0 495; SSE2-NEXT: por %xmm3, %xmm0 496; SSE2-NEXT: paddq %xmm5, %xmm0 497; SSE2-NEXT: retq 498; 499; SSE41-LABEL: shrunkblend_2uses: 500; SSE41: # %bb.0: 501; SSE41-NEXT: psllq $63, %xmm0 502; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 503; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4 504; SSE41-NEXT: paddq %xmm2, %xmm4 505; SSE41-NEXT: movdqa %xmm4, %xmm0 506; SSE41-NEXT: retq 507; 508; AVX-LABEL: shrunkblend_2uses: 509; AVX: # %bb.0: 510; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 511; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1 512; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0 513; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 514; AVX-NEXT: retq 515 %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b 516 %y = select <2 x i1> %cond, <2 x i64> %c, <2 x i64> %d 517 %z = add <2 x i64> %x, %y 518 ret <2 x i64> %z 519} 520 521; Similar to above, but condition has a use that isn't a condition of a vselect so we can't optimize. 522define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { 523; SSE2-LABEL: shrunkblend_nonvselectuse: 524; SSE2: # %bb.0: 525; SSE2-NEXT: psllq $63, %xmm0 526; SSE2-NEXT: psrad $31, %xmm0 527; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 528; SSE2-NEXT: movdqa %xmm3, %xmm0 529; SSE2-NEXT: pandn %xmm2, %xmm0 530; SSE2-NEXT: pand %xmm3, %xmm1 531; SSE2-NEXT: por %xmm1, %xmm0 532; SSE2-NEXT: paddq %xmm3, %xmm0 533; SSE2-NEXT: retq 534; 535; SSE41-LABEL: shrunkblend_nonvselectuse: 536; SSE41: # %bb.0: 537; SSE41-NEXT: psllq $63, %xmm0 538; SSE41-NEXT: psrad $31, %xmm0 539; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 540; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 541; SSE41-NEXT: paddq %xmm2, %xmm0 542; SSE41-NEXT: retq 543; 544; AVX-LABEL: shrunkblend_nonvselectuse: 545; AVX: # %bb.0: 546; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 547; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1 548; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 549; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 550; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 551; AVX-NEXT: retq 552 %x = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b 553 %y = sext <2 x i1> %cond to <2 x i64> 554 %z = add <2 x i64> %x, %y 555 ret <2 x i64> %z 556} 557 558; This turns into a SHRUNKBLEND with SSE4 or later, and via 559; late shuffle magic, both sides of the blend are the same 560; value. If that is not simplified before isel, it can fail 561; to match (crash). 562 563define <2 x i32> @simplify_select(i32 %x, <2 x i1> %z) { 564; SSE2-LABEL: simplify_select: 565; SSE2: # %bb.0: 566; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 567; SSE2-NEXT: pslld $31, %xmm0 568; SSE2-NEXT: psrad $31, %xmm0 569; SSE2-NEXT: movd %edi, %xmm1 570; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,1,1] 571; SSE2-NEXT: por %xmm1, %xmm2 572; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[1,3] 573; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[1,1] 574; SSE2-NEXT: pand %xmm0, %xmm2 575; SSE2-NEXT: pandn %xmm1, %xmm0 576; SSE2-NEXT: por %xmm2, %xmm0 577; SSE2-NEXT: retq 578; 579; SSE41-LABEL: simplify_select: 580; SSE41: # %bb.0: 581; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 582; SSE41-NEXT: pslld $31, %xmm0 583; SSE41-NEXT: movd %edi, %xmm1 584; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,1,1] 585; SSE41-NEXT: por %xmm1, %xmm2 586; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] 587; SSE41-NEXT: pinsrd $1, %edi, %xmm1 588; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 589; SSE41-NEXT: movaps %xmm1, %xmm0 590; SSE41-NEXT: retq 591; 592; AVX-LABEL: simplify_select: 593; AVX: # %bb.0: 594; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 595; AVX-NEXT: vpslld $31, %xmm0, %xmm0 596; AVX-NEXT: vmovd %edi, %xmm1 597; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,0,1,1] 598; AVX-NEXT: vpor %xmm1, %xmm2, %xmm1 599; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 600; AVX-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2 601; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 602; AVX-NEXT: retq 603 %a = insertelement <2 x i32> <i32 0, i32 undef>, i32 %x, i32 1 604 %b = insertelement <2 x i32> <i32 undef, i32 0>, i32 %x, i32 0 605 %y = or <2 x i32> %a, %b 606 %p16 = extractelement <2 x i32> %y, i32 1 607 %p17 = insertelement <2 x i32> undef, i32 %p16, i32 0 608 %p18 = insertelement <2 x i32> %p17, i32 %x, i32 1 609 %r = select <2 x i1> %z, <2 x i32> %y, <2 x i32> %p18 610 ret <2 x i32> %r 611} 612 613; Test to make sure we don't try to insert a new setcc to swap the operands 614; of select with all zeros LHS if the setcc has additional users. 615define void @vselect_allzeros_LHS_multiple_use_setcc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>* %p1, <4 x i32>* %p2) { 616; SSE-LABEL: vselect_allzeros_LHS_multiple_use_setcc: 617; SSE: # %bb.0: 618; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8] 619; SSE-NEXT: pand %xmm3, %xmm0 620; SSE-NEXT: pcmpeqd %xmm3, %xmm0 621; SSE-NEXT: movdqa %xmm0, %xmm3 622; SSE-NEXT: pandn %xmm1, %xmm3 623; SSE-NEXT: pand %xmm2, %xmm0 624; SSE-NEXT: movdqa %xmm3, (%rdi) 625; SSE-NEXT: movdqa %xmm0, (%rsi) 626; SSE-NEXT: retq 627; 628; AVX-LABEL: vselect_allzeros_LHS_multiple_use_setcc: 629; AVX: # %bb.0: 630; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8] 631; AVX-NEXT: vpand %xmm3, %xmm0, %xmm0 632; AVX-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 633; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm1 634; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 635; AVX-NEXT: vmovdqa %xmm1, (%rdi) 636; AVX-NEXT: vmovdqa %xmm0, (%rsi) 637; AVX-NEXT: retq 638 %and = and <4 x i32> %x, <i32 1, i32 2, i32 4, i32 8> 639 %cond = icmp ne <4 x i32> %and, zeroinitializer 640 %sel1 = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %y 641 %sel2 = select <4 x i1> %cond, <4 x i32> %z, <4 x i32> zeroinitializer 642 store <4 x i32> %sel1, <4 x i32>* %p1 643 store <4 x i32> %sel2, <4 x i32>* %p2 644 ret void 645} 646 647; This test case previously crashed after r363802, r363850, and r363856 due 648; any_extend_vector_inreg not being handled by the X86 backend. 649define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) { 650; SSE-LABEL: vselect_any_extend_vector_inreg_crash: 651; SSE: # %bb.0: 652; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 653; SSE-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 654; SSE-NEXT: movq %xmm0, %rax 655; SSE-NEXT: andl $1, %eax 656; SSE-NEXT: shlq $15, %rax 657; SSE-NEXT: retq 658; 659; AVX-LABEL: vselect_any_extend_vector_inreg_crash: 660; AVX: # %bb.0: 661; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 662; AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0 663; AVX-NEXT: vmovq %xmm0, %rax 664; AVX-NEXT: andl $1, %eax 665; AVX-NEXT: shlq $15, %rax 666; AVX-NEXT: retq 6670: 668 %1 = load <8 x i8>, <8 x i8>* %x 669 %2 = icmp eq <8 x i8> %1, <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49> 670 %3 = select <8 x i1> %2, <8 x i64> <i64 32768, i64 16384, i64 8192, i64 4096, i64 2048, i64 1024, i64 512, i64 256>, <8 x i64> zeroinitializer 671 %4 = extractelement <8 x i64> %3, i32 0 672 ret i64 %4 673} 674 675