1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41 3; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 4 5define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 6; SSE41-LABEL: test_x86_sse41_blendvpd: 7; SSE41: ## BB#0: 8; SSE41-NEXT: movapd %xmm0, %xmm3 9; SSE41-NEXT: movaps %xmm2, %xmm0 10; SSE41-NEXT: blendvpd %xmm1, %xmm3 11; SSE41-NEXT: movapd %xmm3, %xmm0 12; SSE41-NEXT: retl 13; 14; KNL-LABEL: test_x86_sse41_blendvpd: 15; KNL: ## BB#0: 16; KNL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 17; KNL-NEXT: retl 18 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 19 ret <2 x double> %res 20} 21declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 22 23 24define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 25; SSE41-LABEL: test_x86_sse41_blendvps: 26; SSE41: ## BB#0: 27; SSE41-NEXT: movaps %xmm0, %xmm3 28; SSE41-NEXT: movaps %xmm2, %xmm0 29; SSE41-NEXT: blendvps %xmm1, %xmm3 30; SSE41-NEXT: movaps %xmm3, %xmm0 31; SSE41-NEXT: retl 32; 33; KNL-LABEL: test_x86_sse41_blendvps: 34; KNL: ## BB#0: 35; KNL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 36; KNL-NEXT: retl 37 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 38 ret <4 x float> %res 39} 40declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 41 42 43define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 44; SSE41-LABEL: test_x86_sse41_dppd: 45; SSE41: ## BB#0: 46; SSE41-NEXT: dppd $7, %xmm1, %xmm0 47; SSE41-NEXT: retl 48; 49; KNL-LABEL: test_x86_sse41_dppd: 50; KNL: ## BB#0: 51; KNL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 52; KNL-NEXT: retl 53 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 54 ret <2 x double> %res 55} 56declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 57 58 59define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 60; SSE41-LABEL: test_x86_sse41_dpps: 61; SSE41: ## BB#0: 62; SSE41-NEXT: dpps $7, %xmm1, %xmm0 63; SSE41-NEXT: retl 64; 65; KNL-LABEL: test_x86_sse41_dpps: 66; KNL: ## BB#0: 67; KNL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 68; KNL-NEXT: retl 69 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 70 ret <4 x float> %res 71} 72declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 73 74 75define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 76; SSE41-LABEL: test_x86_sse41_insertps: 77; SSE41: ## BB#0: 78; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] 79; SSE41-NEXT: retl 80; 81; KNL-LABEL: test_x86_sse41_insertps: 82; KNL: ## BB#0: 83; KNL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] 84; KNL-NEXT: retl 85 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x float>> [#uses=1] 86 ret <4 x float> %res 87} 88declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 89 90 91 92define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 93; SSE41-LABEL: test_x86_sse41_mpsadbw: 94; SSE41: ## BB#0: 95; SSE41-NEXT: mpsadbw $7, %xmm1, %xmm0 96; SSE41-NEXT: retl 97; 98; KNL-LABEL: test_x86_sse41_mpsadbw: 99; KNL: ## BB#0: 100; KNL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 101; KNL-NEXT: retl 102 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 103 ret <8 x i16> %res 104} 105declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 106 107 108define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 109; SSE41-LABEL: test_x86_sse41_packusdw: 110; SSE41: ## BB#0: 111; SSE41-NEXT: packusdw %xmm1, %xmm0 112; SSE41-NEXT: retl 113; 114; KNL-LABEL: test_x86_sse41_packusdw: 115; KNL: ## BB#0: 116; KNL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 117; KNL-NEXT: retl 118 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 119 ret <8 x i16> %res 120} 121declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 122 123 124define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 125; SSE41-LABEL: test_x86_sse41_pblendvb: 126; SSE41: ## BB#0: 127; SSE41-NEXT: movdqa %xmm0, %xmm3 128; SSE41-NEXT: movaps %xmm2, %xmm0 129; SSE41-NEXT: pblendvb %xmm1, %xmm3 130; SSE41-NEXT: movdqa %xmm3, %xmm0 131; SSE41-NEXT: retl 132; 133; KNL-LABEL: test_x86_sse41_pblendvb: 134; KNL: ## BB#0: 135; KNL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 136; KNL-NEXT: retl 137 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 138 ret <16 x i8> %res 139} 140declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 141 142 143define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 144; SSE41-LABEL: test_x86_sse41_phminposuw: 145; SSE41: ## BB#0: 146; SSE41-NEXT: phminposuw %xmm0, %xmm0 147; SSE41-NEXT: retl 148; 149; KNL-LABEL: test_x86_sse41_phminposuw: 150; KNL: ## BB#0: 151; KNL-NEXT: vphminposuw %xmm0, %xmm0 152; KNL-NEXT: retl 153 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 154 ret <8 x i16> %res 155} 156declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 157 158 159define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 160; SSE41-LABEL: test_x86_sse41_pmaxsb: 161; SSE41: ## BB#0: 162; SSE41-NEXT: pmaxsb %xmm1, %xmm0 163; SSE41-NEXT: retl 164; 165; KNL-LABEL: test_x86_sse41_pmaxsb: 166; KNL: ## BB#0: 167; KNL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 168; KNL-NEXT: retl 169 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 170 ret <16 x i8> %res 171} 172declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 173 174 175define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 176; SSE41-LABEL: test_x86_sse41_pmaxsd: 177; SSE41: ## BB#0: 178; SSE41-NEXT: pmaxsd %xmm1, %xmm0 179; SSE41-NEXT: retl 180; 181; KNL-LABEL: test_x86_sse41_pmaxsd: 182; KNL: ## BB#0: 183; KNL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 184; KNL-NEXT: retl 185 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 186 ret <4 x i32> %res 187} 188declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 189 190 191define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 192; SSE41-LABEL: test_x86_sse41_pmaxud: 193; SSE41: ## BB#0: 194; SSE41-NEXT: pmaxud %xmm1, %xmm0 195; SSE41-NEXT: retl 196; 197; KNL-LABEL: test_x86_sse41_pmaxud: 198; KNL: ## BB#0: 199; KNL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 200; KNL-NEXT: retl 201 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 202 ret <4 x i32> %res 203} 204declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 205 206 207define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 208; SSE41-LABEL: test_x86_sse41_pmaxuw: 209; SSE41: ## BB#0: 210; SSE41-NEXT: pmaxuw %xmm1, %xmm0 211; SSE41-NEXT: retl 212; 213; KNL-LABEL: test_x86_sse41_pmaxuw: 214; KNL: ## BB#0: 215; KNL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 216; KNL-NEXT: retl 217 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 218 ret <8 x i16> %res 219} 220declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 221 222 223define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 224; SSE41-LABEL: test_x86_sse41_pminsb: 225; SSE41: ## BB#0: 226; SSE41-NEXT: pminsb %xmm1, %xmm0 227; SSE41-NEXT: retl 228; 229; KNL-LABEL: test_x86_sse41_pminsb: 230; KNL: ## BB#0: 231; KNL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 232; KNL-NEXT: retl 233 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 234 ret <16 x i8> %res 235} 236declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 237 238 239define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 240; SSE41-LABEL: test_x86_sse41_pminsd: 241; SSE41: ## BB#0: 242; SSE41-NEXT: pminsd %xmm1, %xmm0 243; SSE41-NEXT: retl 244; 245; KNL-LABEL: test_x86_sse41_pminsd: 246; KNL: ## BB#0: 247; KNL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 248; KNL-NEXT: retl 249 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 250 ret <4 x i32> %res 251} 252declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 253 254 255define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 256; SSE41-LABEL: test_x86_sse41_pminud: 257; SSE41: ## BB#0: 258; SSE41-NEXT: pminud %xmm1, %xmm0 259; SSE41-NEXT: retl 260; 261; KNL-LABEL: test_x86_sse41_pminud: 262; KNL: ## BB#0: 263; KNL-NEXT: vpminud %xmm1, %xmm0, %xmm0 264; KNL-NEXT: retl 265 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 266 ret <4 x i32> %res 267} 268declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 269 270 271define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 272; SSE41-LABEL: test_x86_sse41_pminuw: 273; SSE41: ## BB#0: 274; SSE41-NEXT: pminuw %xmm1, %xmm0 275; SSE41-NEXT: retl 276; 277; KNL-LABEL: test_x86_sse41_pminuw: 278; KNL: ## BB#0: 279; KNL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 280; KNL-NEXT: retl 281 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 282 ret <8 x i16> %res 283} 284declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 285 286 287define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 288; SSE41-LABEL: test_x86_sse41_pmuldq: 289; SSE41: ## BB#0: 290; SSE41-NEXT: pmuldq %xmm1, %xmm0 291; SSE41-NEXT: retl 292; 293; KNL-LABEL: test_x86_sse41_pmuldq: 294; KNL: ## BB#0: 295; KNL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 296; KNL-NEXT: retl 297 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 298 ret <2 x i64> %res 299} 300declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 301 302 303define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 304; SSE41-LABEL: test_x86_sse41_ptestc: 305; SSE41: ## BB#0: 306; SSE41-NEXT: ptest %xmm1, %xmm0 307; SSE41-NEXT: sbbl %eax, %eax 308; SSE41-NEXT: andl $1, %eax 309; SSE41-NEXT: retl 310; 311; KNL-LABEL: test_x86_sse41_ptestc: 312; KNL: ## BB#0: 313; KNL-NEXT: vptest %xmm1, %xmm0 314; KNL-NEXT: sbbl %eax, %eax 315; KNL-NEXT: andl $1, %eax 316; KNL-NEXT: retl 317 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 318 ret i32 %res 319} 320declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 321 322 323define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 324; SSE41-LABEL: test_x86_sse41_ptestnzc: 325; SSE41: ## BB#0: 326; SSE41-NEXT: xorl %eax, %eax 327; SSE41-NEXT: ptest %xmm1, %xmm0 328; SSE41-NEXT: seta %al 329; SSE41-NEXT: retl 330; 331; KNL-LABEL: test_x86_sse41_ptestnzc: 332; KNL: ## BB#0: 333; KNL-NEXT: xorl %eax, %eax 334; KNL-NEXT: vptest %xmm1, %xmm0 335; KNL-NEXT: seta %al 336; KNL-NEXT: retl 337 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 338 ret i32 %res 339} 340declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 341 342 343define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 344; SSE41-LABEL: test_x86_sse41_ptestz: 345; SSE41: ## BB#0: 346; SSE41-NEXT: xorl %eax, %eax 347; SSE41-NEXT: ptest %xmm1, %xmm0 348; SSE41-NEXT: sete %al 349; SSE41-NEXT: retl 350; 351; KNL-LABEL: test_x86_sse41_ptestz: 352; KNL: ## BB#0: 353; KNL-NEXT: xorl %eax, %eax 354; KNL-NEXT: vptest %xmm1, %xmm0 355; KNL-NEXT: sete %al 356; KNL-NEXT: retl 357 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 358 ret i32 %res 359} 360declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 361 362 363define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 364; SSE41-LABEL: test_x86_sse41_round_pd: 365; SSE41: ## BB#0: 366; SSE41-NEXT: roundpd $7, %xmm0, %xmm0 367; SSE41-NEXT: retl 368; 369; KNL-LABEL: test_x86_sse41_round_pd: 370; KNL: ## BB#0: 371; KNL-NEXT: vroundpd $7, %xmm0, %xmm0 372; KNL-NEXT: retl 373 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 374 ret <2 x double> %res 375} 376declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 377 378 379define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 380; SSE41-LABEL: test_x86_sse41_round_ps: 381; SSE41: ## BB#0: 382; SSE41-NEXT: roundps $7, %xmm0, %xmm0 383; SSE41-NEXT: retl 384; 385; KNL-LABEL: test_x86_sse41_round_ps: 386; KNL: ## BB#0: 387; KNL-NEXT: vroundps $7, %xmm0, %xmm0 388; KNL-NEXT: retl 389 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 390 ret <4 x float> %res 391} 392declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 393 394 395define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 396; SSE41-LABEL: test_x86_sse41_round_sd: 397; SSE41: ## BB#0: 398; SSE41-NEXT: roundsd $7, %xmm1, %xmm0 399; SSE41-NEXT: retl 400; 401; KNL-LABEL: test_x86_sse41_round_sd: 402; KNL: ## BB#0: 403; KNL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 404; KNL-NEXT: retl 405 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 406 ret <2 x double> %res 407} 408declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 409 410 411define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 412; SSE41-LABEL: test_x86_sse41_round_ss: 413; SSE41: ## BB#0: 414; SSE41-NEXT: roundss $7, %xmm1, %xmm0 415; SSE41-NEXT: retl 416; 417; KNL-LABEL: test_x86_sse41_round_ss: 418; KNL: ## BB#0: 419; KNL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 420; KNL-NEXT: retl 421 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 422 ret <4 x float> %res 423} 424declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 425