1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX 4 5 6define <2 x double> @test_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) { 7; CHECK-LABEL: test_x86_sse41_blend_pd: 8; CHECK: # %bb.0: 9; CHECK-NEXT: retq 10 %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 0) 11 ret <2 x double> %1 12} 13 14define <4 x float> @test_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) { 15; CHECK-LABEL: test_x86_sse41_blend_ps: 16; CHECK: # %bb.0: 17; CHECK-NEXT: retq 18 %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 0) 19 ret <4 x float> %1 20} 21 22define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) { 23; CHECK-LABEL: test_x86_sse41_pblend_w: 24; CHECK: # %bb.0: 25; CHECK-NEXT: retq 26 %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 0) 27 ret <8 x i16> %1 28} 29 30define <2 x double> @test2_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) { 31; SSE-LABEL: test2_x86_sse41_blend_pd: 32; SSE: # %bb.0: 33; SSE-NEXT: movaps %xmm1, %xmm0 34; SSE-NEXT: retq 35; 36; AVX-LABEL: test2_x86_sse41_blend_pd: 37; AVX: # %bb.0: 38; AVX-NEXT: vmovaps %xmm1, %xmm0 39; AVX-NEXT: retq 40 %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 -1) 41 ret <2 x double> %1 42} 43 44define <4 x float> @test2_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) { 45; SSE-LABEL: test2_x86_sse41_blend_ps: 46; SSE: # %bb.0: 47; SSE-NEXT: movaps %xmm1, %xmm0 48; SSE-NEXT: retq 49; 50; AVX-LABEL: test2_x86_sse41_blend_ps: 51; AVX: # %bb.0: 52; AVX-NEXT: vmovaps %xmm1, %xmm0 53; AVX-NEXT: retq 54 %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 -1) 55 ret <4 x float> %1 56} 57 58define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) { 59; SSE-LABEL: test2_x86_sse41_pblend_w: 60; SSE: # %bb.0: 61; SSE-NEXT: movaps %xmm1, %xmm0 62; SSE-NEXT: retq 63; 64; AVX-LABEL: test2_x86_sse41_pblend_w: 65; AVX: # %bb.0: 66; AVX-NEXT: vmovaps %xmm1, %xmm0 67; AVX-NEXT: retq 68 %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1) 69 ret <8 x i16> %1 70} 71 72define <2 x double> @test3_x86_sse41_blend_pd(<2 x double> %a0) { 73; CHECK-LABEL: test3_x86_sse41_blend_pd: 74; CHECK: # %bb.0: 75; CHECK-NEXT: retq 76 %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a0, i32 7) 77 ret <2 x double> %1 78} 79 80define <4 x float> @test3_x86_sse41_blend_ps(<4 x float> %a0) { 81; CHECK-LABEL: test3_x86_sse41_blend_ps: 82; CHECK: # %bb.0: 83; CHECK-NEXT: retq 84 %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a0, i32 7) 85 ret <4 x float> %1 86} 87 88define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) { 89; CHECK-LABEL: test3_x86_sse41_pblend_w: 90; CHECK: # %bb.0: 91; CHECK-NEXT: retq 92 %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a0, i32 7) 93 ret <8 x i16> %1 94} 95 96define double @demandedelts_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 97; SSE-LABEL: demandedelts_blendvpd: 98; SSE: # %bb.0: 99; SSE-NEXT: movapd %xmm0, %xmm3 100; SSE-NEXT: movaps %xmm2, %xmm0 101; SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 102; SSE-NEXT: movapd %xmm3, %xmm0 103; SSE-NEXT: retq 104; 105; AVX-LABEL: demandedelts_blendvpd: 106; AVX: # %bb.0: 107; AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 108; AVX-NEXT: retq 109 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer 110 %2 = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 111 %3 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer 112 %4 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %1, <2 x double> %2, <2 x double> %3) 113 %5 = extractelement <2 x double> %4, i32 0 114 ret double %5 115} 116 117define float @demandedelts_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 118; SSE-LABEL: demandedelts_blendvps: 119; SSE: # %bb.0: 120; SSE-NEXT: movaps %xmm0, %xmm3 121; SSE-NEXT: movaps %xmm2, %xmm0 122; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 123; SSE-NEXT: movaps %xmm3, %xmm0 124; SSE-NEXT: retq 125; 126; AVX-LABEL: demandedelts_blendvps: 127; AVX: # %bb.0: 128; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 129; AVX-NEXT: retq 130 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer 131 %2 = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 132 %3 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> zeroinitializer 133 %4 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %1, <4 x float> %2, <4 x float> %3) 134 %5 = extractelement <4 x float> %4, i32 0 135 ret float %5 136} 137 138define <16 x i8> @demandedelts_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 139; SSE-LABEL: demandedelts_pblendvb: 140; SSE: # %bb.0: 141; SSE-NEXT: movdqa %xmm0, %xmm3 142; SSE-NEXT: movdqa %xmm2, %xmm0 143; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 144; SSE-NEXT: pxor %xmm0, %xmm0 145; SSE-NEXT: pshufb %xmm0, %xmm3 146; SSE-NEXT: movdqa %xmm3, %xmm0 147; SSE-NEXT: retq 148; 149; AVX-LABEL: demandedelts_pblendvb: 150; AVX: # %bb.0: 151; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 152; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 153; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 154; AVX-NEXT: retq 155 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer 156 %2 = shufflevector <16 x i8> %a1, <16 x i8> undef, <16 x i32> zeroinitializer 157 %3 = shufflevector <16 x i8> %a2, <16 x i8> undef, <16 x i32> zeroinitializer 158 %4 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %1, <16 x i8> %2, <16 x i8> %3) 159 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> zeroinitializer 160 ret <16 x i8> %5 161} 162 163define <2 x i64> @demandedbits_blendvpd(i64 %a0, i64 %a2, <2 x double> %a3) { 164; SSE-LABEL: demandedbits_blendvpd: 165; SSE: # %bb.0: 166; SSE-NEXT: movq %rdi, %rax 167; SSE-NEXT: orq $1, %rax 168; SSE-NEXT: orq $4, %rdi 169; SSE-NEXT: movq %rax, %xmm1 170; SSE-NEXT: movq %rdi, %xmm2 171; SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero 172; SSE-NEXT: movq {{.*#+}} xmm2 = xmm2[0],zero 173; SSE-NEXT: blendvpd %xmm0, %xmm2, %xmm1 174; SSE-NEXT: psrlq $11, %xmm1 175; SSE-NEXT: movdqa %xmm1, %xmm0 176; SSE-NEXT: retq 177; 178; AVX-LABEL: demandedbits_blendvpd: 179; AVX: # %bb.0: 180; AVX-NEXT: movq %rdi, %rax 181; AVX-NEXT: orq $1, %rax 182; AVX-NEXT: orq $4, %rdi 183; AVX-NEXT: vmovq %rax, %xmm1 184; AVX-NEXT: vmovq %rdi, %xmm2 185; AVX-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero 186; AVX-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero 187; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 188; AVX-NEXT: vpsrlq $11, %xmm0, %xmm0 189; AVX-NEXT: retq 190 %1 = or i64 %a0, 1 191 %2 = or i64 %a0, 4 192 %3 = bitcast i64 %1 to double 193 %4 = bitcast i64 %2 to double 194 %5 = insertelement <2 x double> zeroinitializer, double %3, i32 0 195 %6 = insertelement <2 x double> zeroinitializer, double %4, i32 0 196 %7 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %5, <2 x double> %6, <2 x double> %a3) 197 %8 = bitcast <2 x double> %7 to <2 x i64> 198 %9 = lshr <2 x i64> %8, <i64 11, i64 11> 199 ret <2 x i64> %9 200} 201 202define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 203; SSE-LABEL: xor_pblendvb: 204; SSE: # %bb.0: 205; SSE-NEXT: movdqa %xmm0, %xmm3 206; SSE-NEXT: movaps %xmm2, %xmm0 207; SSE-NEXT: pblendvb %xmm0, %xmm3, %xmm1 208; SSE-NEXT: movdqa %xmm1, %xmm0 209; SSE-NEXT: retq 210; 211; AVX-LABEL: xor_pblendvb: 212; AVX: # %bb.0: 213; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 214; AVX-NEXT: retq 215 %1 = xor <16 x i8> %a2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 216 %2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1) 217 ret <16 x i8> %2 218} 219 220define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 221; SSE-LABEL: xor_blendvps: 222; SSE: # %bb.0: 223; SSE-NEXT: movaps %xmm0, %xmm3 224; SSE-NEXT: movaps %xmm2, %xmm0 225; SSE-NEXT: blendvps %xmm0, %xmm3, %xmm1 226; SSE-NEXT: movaps %xmm1, %xmm0 227; SSE-NEXT: retq 228; 229; AVX-LABEL: xor_blendvps: 230; AVX: # %bb.0: 231; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 232; AVX-NEXT: retq 233 %1 = bitcast <4 x float> %a2 to <4 x i32> 234 %2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1> 235 %3 = bitcast <4 x i32> %2 to <4 x float> 236 %4 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %3) 237 ret <4 x float> %4 238} 239 240define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 241; SSE-LABEL: xor_blendvpd: 242; SSE: # %bb.0: 243; SSE-NEXT: movapd %xmm0, %xmm3 244; SSE-NEXT: movaps %xmm2, %xmm0 245; SSE-NEXT: blendvpd %xmm0, %xmm3, %xmm1 246; SSE-NEXT: movapd %xmm1, %xmm0 247; SSE-NEXT: retq 248; 249; AVX-LABEL: xor_blendvpd: 250; AVX: # %bb.0: 251; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 252; AVX-NEXT: retq 253 %1 = bitcast <2 x double> %a2 to <4 x i32> 254 %2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1> 255 %3 = bitcast <4 x i32> %2 to <2 x double> 256 %4 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %3) 257 ret <2 x double> %4 258} 259 260define <16 x i8> @PR47404(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { 261; SSE-LABEL: PR47404: 262; SSE: # %bb.0: 263; SSE-NEXT: movdqa %xmm0, %xmm3 264; SSE-NEXT: movaps %xmm2, %xmm0 265; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 266; SSE-NEXT: movdqa %xmm3, %xmm0 267; SSE-NEXT: retq 268; 269; AVX-LABEL: PR47404: 270; AVX: # %bb.0: 271; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 272; AVX-NEXT: retq 273 %4 = icmp sgt <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 274 %5 = select <16 x i1> %4, <16 x i8> %0, <16 x i8> %1 275 ret <16 x i8> %5 276} 277 278declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) 279declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) 280declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) 281 282declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) 283declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) 284declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) 285