1; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s 2 3define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 4; CHECK-LABEL: test_x86_aesni_aesdec: 5; CHECK: # BB#0: 6; CHECK-NEXT: vaesdec %xmm1, %xmm0, %xmm0 7; CHECK-NEXT: retl 8 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 9 ret <2 x i64> %res 10} 11declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 12 13 14define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 15; CHECK-LABEL: test_x86_aesni_aesdeclast: 16; CHECK: # BB#0: 17; CHECK-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 18; CHECK-NEXT: retl 19 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 20 ret <2 x i64> %res 21} 22declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 23 24 25define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 26; CHECK-LABEL: test_x86_aesni_aesenc: 27; CHECK: # BB#0: 28; CHECK-NEXT: vaesenc %xmm1, %xmm0, %xmm0 29; CHECK-NEXT: retl 30 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 31 ret <2 x i64> %res 32} 33declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 34 35 36define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 37; CHECK-LABEL: test_x86_aesni_aesenclast: 38; CHECK: # BB#0: 39; CHECK-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 40; CHECK-NEXT: retl 41 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 42 ret <2 x i64> %res 43} 44declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 45 46 47define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { 48; CHECK-LABEL: test_x86_aesni_aesimc: 49; CHECK: # BB#0: 50; CHECK-NEXT: vaesimc %xmm0, %xmm0 51; CHECK-NEXT: retl 52 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 53 ret <2 x i64> %res 54} 55declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 56 57 58define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { 59; CHECK-LABEL: test_x86_aesni_aeskeygenassist: 60; CHECK: # BB#0: 61; CHECK-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 62; CHECK-NEXT: retl 63 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 64 ret <2 x i64> %res 65} 66declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 67 68 69define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 70; CHECK-LABEL: test_x86_sse2_add_sd: 71; CHECK: # BB#0: 72; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 73; CHECK-NEXT: retl 74 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 75 ret <2 x double> %res 76} 77declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 78 79 80define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 81; CHECK-LABEL: test_x86_sse2_cmp_pd: 82; CHECK: # BB#0: 83; CHECK-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 84; CHECK-NEXT: retl 85 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 86 ret <2 x double> %res 87} 88declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 89 90 91define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 92; CHECK-LABEL: test_x86_sse2_cmp_sd: 93; CHECK: # BB#0: 94; CHECK-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 95; CHECK-NEXT: retl 96 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 97 ret <2 x double> %res 98} 99declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 100 101 102define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 103; CHECK-LABEL: test_x86_sse2_comieq_sd: 104; CHECK: # BB#0: 105; CHECK-NEXT: vcomisd %xmm1, %xmm0 106; CHECK-NEXT: sete %al 107; CHECK-NEXT: movzbl %al, %eax 108; CHECK-NEXT: retl 109 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 110 ret i32 %res 111} 112declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 113 114 115define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 116; CHECK-LABEL: test_x86_sse2_comige_sd: 117; CHECK: # BB#0: 118; CHECK-NEXT: vcomisd %xmm1, %xmm0 119; CHECK-NEXT: setae %al 120; CHECK-NEXT: movzbl %al, %eax 121; CHECK-NEXT: retl 122 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 123 ret i32 %res 124} 125declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 126 127 128define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 129; CHECK-LABEL: test_x86_sse2_comigt_sd: 130; CHECK: # BB#0: 131; CHECK-NEXT: vcomisd %xmm1, %xmm0 132; CHECK-NEXT: seta %al 133; CHECK-NEXT: movzbl %al, %eax 134; CHECK-NEXT: retl 135 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 136 ret i32 %res 137} 138declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 139 140 141define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 142; CHECK-LABEL: test_x86_sse2_comile_sd: 143; CHECK: # BB#0: 144; CHECK-NEXT: vcomisd %xmm1, %xmm0 145; CHECK-NEXT: setbe %al 146; CHECK-NEXT: movzbl %al, %eax 147; CHECK-NEXT: retl 148 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 149 ret i32 %res 150} 151declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 152 153 154define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 155; CHECK-LABEL: test_x86_sse2_comilt_sd: 156; CHECK: # BB#0: 157; CHECK-NEXT: vcomisd %xmm1, %xmm0 158; CHECK-NEXT: sbbl %eax, %eax 159; CHECK-NEXT: andl $1, %eax 160; CHECK-NEXT: retl 161 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 162 ret i32 %res 163} 164declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 165 166 167define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 168; CHECK-LABEL: test_x86_sse2_comineq_sd: 169; CHECK: # BB#0: 170; CHECK-NEXT: vcomisd %xmm1, %xmm0 171; CHECK-NEXT: setne %al 172; CHECK-NEXT: movzbl %al, %eax 173; CHECK-NEXT: retl 174 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 175 ret i32 %res 176} 177declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 178 179 180define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 181; CHECK-LABEL: test_x86_sse2_cvtdq2pd: 182; CHECK: # BB#0: 183; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 184; CHECK-NEXT: retl 185 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 186 ret <2 x double> %res 187} 188declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 189 190 191define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 192; CHECK-LABEL: test_x86_sse2_cvtdq2ps: 193; CHECK: # BB#0: 194; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 195; CHECK-NEXT: retl 196 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 197 ret <4 x float> %res 198} 199declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 200 201 202define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 203; CHECK-LABEL: test_x86_sse2_cvtpd2dq: 204; CHECK: # BB#0: 205; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 206; CHECK-NEXT: retl 207 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 208 ret <4 x i32> %res 209} 210declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 211 212 213define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 214; CHECK-LABEL: test_x86_sse2_cvtpd2ps: 215; CHECK: # BB#0: 216; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 217; CHECK-NEXT: retl 218 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 219 ret <4 x float> %res 220} 221declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 222 223 224define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 225; CHECK-LABEL: test_x86_sse2_cvtps2dq: 226; CHECK: # BB#0: 227; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 228; CHECK-NEXT: retl 229 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 230 ret <4 x i32> %res 231} 232declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 233 234 235define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 236; CHECK-LABEL: test_x86_sse2_cvtps2pd: 237; CHECK: # BB#0: 238; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 239; CHECK-NEXT: retl 240 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 241 ret <2 x double> %res 242} 243declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 244 245 246define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 247; CHECK-LABEL: test_x86_sse2_cvtsd2si: 248; CHECK: # BB#0: 249; CHECK-NEXT: vcvtsd2si %xmm0, %eax 250; CHECK-NEXT: retl 251 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 252 ret i32 %res 253} 254declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 255 256 257define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 258; CHECK-LABEL: test_x86_sse2_cvtsd2ss: 259; CHECK: # BB#0: 260; CHECK-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 261; CHECK-NEXT: retl 262 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 263 ret <4 x float> %res 264} 265declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 266 267 268define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 269; CHECK-LABEL: test_x86_sse2_cvtsi2sd: 270; CHECK: # BB#0: 271; CHECK-NEXT: movl $7, %eax 272; CHECK-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 273; CHECK-NEXT: retl 274 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 275 ret <2 x double> %res 276} 277declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 278 279 280define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 281; CHECK-LABEL: test_x86_sse2_cvtss2sd: 282; CHECK: # BB#0: 283; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 284; CHECK-NEXT: retl 285 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 286 ret <2 x double> %res 287} 288declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 289 290 291define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 292; CHECK-LABEL: test_x86_sse2_cvttpd2dq: 293; CHECK: # BB#0: 294; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 295; CHECK-NEXT: retl 296 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 297 ret <4 x i32> %res 298} 299declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 300 301 302define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 303; CHECK-LABEL: test_x86_sse2_cvttps2dq: 304; CHECK: # BB#0: 305; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 306; CHECK-NEXT: retl 307 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 308 ret <4 x i32> %res 309} 310declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 311 312 313define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 314; CHECK-LABEL: test_x86_sse2_cvttsd2si: 315; CHECK: # BB#0: 316; CHECK-NEXT: vcvttsd2si %xmm0, %eax 317; CHECK-NEXT: retl 318 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 319 ret i32 %res 320} 321declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 322 323 324define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 325; CHECK-LABEL: test_x86_sse2_div_sd: 326; CHECK: # BB#0: 327; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 328; CHECK-NEXT: retl 329 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 330 ret <2 x double> %res 331} 332declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 333 334 335 336define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 337; CHECK-LABEL: test_x86_sse2_max_pd: 338; CHECK: # BB#0: 339; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 340; CHECK-NEXT: retl 341 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 342 ret <2 x double> %res 343} 344declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 345 346 347define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 348; CHECK-LABEL: test_x86_sse2_max_sd: 349; CHECK: # BB#0: 350; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 351; CHECK-NEXT: retl 352 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 353 ret <2 x double> %res 354} 355declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 356 357 358define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 359; CHECK-LABEL: test_x86_sse2_min_pd: 360; CHECK: # BB#0: 361; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0 362; CHECK-NEXT: retl 363 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 364 ret <2 x double> %res 365} 366declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 367 368 369define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 370; CHECK-LABEL: test_x86_sse2_min_sd: 371; CHECK: # BB#0: 372; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0 373; CHECK-NEXT: retl 374 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 375 ret <2 x double> %res 376} 377declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 378 379 380define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 381; CHECK-LABEL: test_x86_sse2_movmsk_pd: 382; CHECK: # BB#0: 383; CHECK-NEXT: vmovmskpd %xmm0, %eax 384; CHECK-NEXT: retl 385 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 386 ret i32 %res 387} 388declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 389 390 391 392 393define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 394; CHECK-LABEL: test_x86_sse2_mul_sd: 395; CHECK: # BB#0: 396; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 397; CHECK-NEXT: retl 398 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 399 ret <2 x double> %res 400} 401declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 402 403 404define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 405; CHECK-LABEL: test_x86_sse2_packssdw_128: 406; CHECK: # BB#0: 407; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 408; CHECK-NEXT: retl 409 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 410 ret <8 x i16> %res 411} 412declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 413 414 415define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 416; CHECK-LABEL: test_x86_sse2_packsswb_128: 417; CHECK: # BB#0: 418; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 419; CHECK-NEXT: retl 420 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 421 ret <16 x i8> %res 422} 423declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 424 425 426define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 427; CHECK-LABEL: test_x86_sse2_packuswb_128: 428; CHECK: # BB#0: 429; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 430; CHECK-NEXT: retl 431 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 432 ret <16 x i8> %res 433} 434declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 435 436 437define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 438; CHECK-LABEL: test_x86_sse2_padds_b: 439; CHECK: # BB#0: 440; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 441; CHECK-NEXT: retl 442 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 443 ret <16 x i8> %res 444} 445declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 446 447 448define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 449; CHECK-LABEL: test_x86_sse2_padds_w: 450; CHECK: # BB#0: 451; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 452; CHECK-NEXT: retl 453 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 454 ret <8 x i16> %res 455} 456declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 457 458 459define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 460; CHECK-LABEL: test_x86_sse2_paddus_b: 461; CHECK: # BB#0: 462; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 463; CHECK-NEXT: retl 464 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 465 ret <16 x i8> %res 466} 467declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 468 469 470define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 471; CHECK-LABEL: test_x86_sse2_paddus_w: 472; CHECK: # BB#0: 473; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 474; CHECK-NEXT: retl 475 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 476 ret <8 x i16> %res 477} 478declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 479 480 481define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 482; CHECK-LABEL: test_x86_sse2_pavg_b: 483; CHECK: # BB#0: 484; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 485; CHECK-NEXT: retl 486 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 487 ret <16 x i8> %res 488} 489declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 490 491 492define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 493; CHECK-LABEL: test_x86_sse2_pavg_w: 494; CHECK: # BB#0: 495; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 496; CHECK-NEXT: retl 497 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 498 ret <8 x i16> %res 499} 500declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 501 502 503define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 504; CHECK-LABEL: test_x86_sse2_pmadd_wd: 505; CHECK: # BB#0: 506; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 507; CHECK-NEXT: retl 508 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 509 ret <4 x i32> %res 510} 511declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 512 513 514define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 515; CHECK-LABEL: test_x86_sse2_pmaxs_w: 516; CHECK: # BB#0: 517; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 518; CHECK-NEXT: retl 519 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 520 ret <8 x i16> %res 521} 522declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 523 524 525define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 526; CHECK-LABEL: test_x86_sse2_pmaxu_b: 527; CHECK: # BB#0: 528; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 529; CHECK-NEXT: retl 530 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 531 ret <16 x i8> %res 532} 533declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 534 535 536define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 537; CHECK-LABEL: test_x86_sse2_pmins_w: 538; CHECK: # BB#0: 539; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0 540; CHECK-NEXT: retl 541 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 542 ret <8 x i16> %res 543} 544declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 545 546 547define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 548; CHECK-LABEL: test_x86_sse2_pminu_b: 549; CHECK: # BB#0: 550; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0 551; CHECK-NEXT: retl 552 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 553 ret <16 x i8> %res 554} 555declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 556 557 558define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 559; CHECK-LABEL: test_x86_sse2_pmovmskb_128: 560; CHECK: # BB#0: 561; CHECK-NEXT: vpmovmskb %xmm0, %eax 562; CHECK-NEXT: retl 563 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 564 ret i32 %res 565} 566declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 567 568 569define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 570; CHECK-LABEL: test_x86_sse2_pmulh_w: 571; CHECK: # BB#0: 572; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 573; CHECK-NEXT: retl 574 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 575 ret <8 x i16> %res 576} 577declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 578 579 580define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 581; CHECK-LABEL: test_x86_sse2_pmulhu_w: 582; CHECK: # BB#0: 583; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 584; CHECK-NEXT: retl 585 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 586 ret <8 x i16> %res 587} 588declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 589 590 591define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 592; CHECK-LABEL: test_x86_sse2_pmulu_dq: 593; CHECK: # BB#0: 594; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 595; CHECK-NEXT: retl 596 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 597 ret <2 x i64> %res 598} 599declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 600 601 602define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 603; CHECK-LABEL: test_x86_sse2_psad_bw: 604; CHECK: # BB#0: 605; CHECK-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 606; CHECK-NEXT: retl 607 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 608 ret <2 x i64> %res 609} 610declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 611 612 613define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 614; CHECK-LABEL: test_x86_sse2_psll_d: 615; CHECK: # BB#0: 616; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0 617; CHECK-NEXT: retl 618 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 619 ret <4 x i32> %res 620} 621declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 622 623 624define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 625; CHECK-LABEL: test_x86_sse2_psll_q: 626; CHECK: # BB#0: 627; CHECK-NEXT: vpsllq %xmm1, %xmm0, %xmm0 628; CHECK-NEXT: retl 629 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 630 ret <2 x i64> %res 631} 632declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 633 634 635define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 636; CHECK-LABEL: test_x86_sse2_psll_w: 637; CHECK: # BB#0: 638; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 639; CHECK-NEXT: retl 640 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 641 ret <8 x i16> %res 642} 643declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 644 645 646define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 647; CHECK-LABEL: test_x86_sse2_pslli_d: 648; CHECK: # BB#0: 649; CHECK-NEXT: vpslld $7, %xmm0, %xmm0 650; CHECK-NEXT: retl 651 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 652 ret <4 x i32> %res 653} 654declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 655 656 657define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 658; CHECK-LABEL: test_x86_sse2_pslli_q: 659; CHECK: # BB#0: 660; CHECK-NEXT: vpsllq $7, %xmm0, %xmm0 661; CHECK-NEXT: retl 662 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 663 ret <2 x i64> %res 664} 665declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 666 667 668define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 669; CHECK-LABEL: test_x86_sse2_pslli_w: 670; CHECK: # BB#0: 671; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 672; CHECK-NEXT: retl 673 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 674 ret <8 x i16> %res 675} 676declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 677 678 679define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 680; CHECK-LABEL: test_x86_sse2_psra_d: 681; CHECK: # BB#0: 682; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0 683; CHECK-NEXT: retl 684 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 685 ret <4 x i32> %res 686} 687declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 688 689 690define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 691; CHECK-LABEL: test_x86_sse2_psra_w: 692; CHECK: # BB#0: 693; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 694; CHECK-NEXT: retl 695 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 696 ret <8 x i16> %res 697} 698declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 699 700 701define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 702; CHECK-LABEL: test_x86_sse2_psrai_d: 703; CHECK: # BB#0: 704; CHECK-NEXT: vpsrad $7, %xmm0, %xmm0 705; CHECK-NEXT: retl 706 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 707 ret <4 x i32> %res 708} 709declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 710 711 712define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 713; CHECK-LABEL: test_x86_sse2_psrai_w: 714; CHECK: # BB#0: 715; CHECK-NEXT: vpsraw $7, %xmm0, %xmm0 716; CHECK-NEXT: retl 717 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 718 ret <8 x i16> %res 719} 720declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 721 722 723define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 724; CHECK-LABEL: test_x86_sse2_psrl_d: 725; CHECK: # BB#0: 726; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 727; CHECK-NEXT: retl 728 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 729 ret <4 x i32> %res 730} 731declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 732 733 734define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 735; CHECK-LABEL: test_x86_sse2_psrl_q: 736; CHECK: # BB#0: 737; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 738; CHECK-NEXT: retl 739 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 740 ret <2 x i64> %res 741} 742declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 743 744 745define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 746; CHECK-LABEL: test_x86_sse2_psrl_w: 747; CHECK: # BB#0: 748; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 749; CHECK-NEXT: retl 750 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 751 ret <8 x i16> %res 752} 753declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 754 755 756define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 757; CHECK-LABEL: test_x86_sse2_psrli_d: 758; CHECK: # BB#0: 759; CHECK-NEXT: vpsrld $7, %xmm0, %xmm0 760; CHECK-NEXT: retl 761 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 762 ret <4 x i32> %res 763} 764declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 765 766 767define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 768; CHECK-LABEL: test_x86_sse2_psrli_q: 769; CHECK: # BB#0: 770; CHECK-NEXT: vpsrlq $7, %xmm0, %xmm0 771; CHECK-NEXT: retl 772 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 773 ret <2 x i64> %res 774} 775declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 776 777 778define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 779; CHECK-LABEL: test_x86_sse2_psrli_w: 780; CHECK: # BB#0: 781; CHECK-NEXT: vpsrlw $7, %xmm0, %xmm0 782; CHECK-NEXT: retl 783 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 784 ret <8 x i16> %res 785} 786declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 787 788 789define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 790; CHECK-LABEL: test_x86_sse2_psubs_b: 791; CHECK: # BB#0: 792; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 793; CHECK-NEXT: retl 794 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 795 ret <16 x i8> %res 796} 797declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 798 799 800define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 801; CHECK-LABEL: test_x86_sse2_psubs_w: 802; CHECK: # BB#0: 803; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 804; CHECK-NEXT: retl 805 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 806 ret <8 x i16> %res 807} 808declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 809 810 811define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 812; CHECK-LABEL: test_x86_sse2_psubus_b: 813; CHECK: # BB#0: 814; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 815; CHECK-NEXT: retl 816 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 817 ret <16 x i8> %res 818} 819declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 820 821 822define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 823; CHECK-LABEL: test_x86_sse2_psubus_w: 824; CHECK: # BB#0: 825; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 826; CHECK-NEXT: retl 827 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 828 ret <8 x i16> %res 829} 830declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 831 832 833define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 834; CHECK-LABEL: test_x86_sse2_sqrt_pd: 835; CHECK: # BB#0: 836; CHECK-NEXT: vsqrtpd %xmm0, %xmm0 837; CHECK-NEXT: retl 838 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 839 ret <2 x double> %res 840} 841declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 842 843 844define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 845; CHECK-LABEL: test_x86_sse2_sqrt_sd: 846; CHECK: # BB#0: 847; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 848; CHECK-NEXT: retl 849 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 850 ret <2 x double> %res 851} 852declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 853 854 855define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 856; CHECK-LABEL: test_x86_sse2_storel_dq: 857; CHECK: # BB#0: 858; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 859; CHECK-NEXT: vmovq %xmm0, (%eax) 860; CHECK-NEXT: retl 861 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 862 ret void 863} 864declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 865 866 867define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 868 ; add operation forces the execution domain. 869; CHECK-LABEL: test_x86_sse2_storeu_dq: 870; CHECK: # BB#0: 871; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 872; CHECK-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0 873; CHECK-NEXT: vmovdqu %xmm0, (%eax) 874; CHECK-NEXT: retl 875 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 876 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 877 ret void 878} 879declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 880 881 882define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 883 ; fadd operation forces the execution domain. 884; CHECK-LABEL: test_x86_sse2_storeu_pd: 885; CHECK: # BB#0: 886; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 887; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 888; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 889; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 890; CHECK-NEXT: vmovupd %xmm0, (%eax) 891; CHECK-NEXT: retl 892 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 893 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 894 ret void 895} 896declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 897 898 899define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 900; CHECK-LABEL: test_x86_sse2_sub_sd: 901; CHECK: # BB#0: 902; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 903; CHECK-NEXT: retl 904 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 905 ret <2 x double> %res 906} 907declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 908 909 910define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 911; CHECK-LABEL: test_x86_sse2_ucomieq_sd: 912; CHECK: # BB#0: 913; CHECK-NEXT: vucomisd %xmm1, %xmm0 914; CHECK-NEXT: sete %al 915; CHECK-NEXT: movzbl %al, %eax 916; CHECK-NEXT: retl 917 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 918 ret i32 %res 919} 920declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 921 922 923define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 924; CHECK-LABEL: test_x86_sse2_ucomige_sd: 925; CHECK: # BB#0: 926; CHECK-NEXT: vucomisd %xmm1, %xmm0 927; CHECK-NEXT: setae %al 928; CHECK-NEXT: movzbl %al, %eax 929; CHECK-NEXT: retl 930 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 931 ret i32 %res 932} 933declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 934 935 936define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 937; CHECK-LABEL: test_x86_sse2_ucomigt_sd: 938; CHECK: # BB#0: 939; CHECK-NEXT: vucomisd %xmm1, %xmm0 940; CHECK-NEXT: seta %al 941; CHECK-NEXT: movzbl %al, %eax 942; CHECK-NEXT: retl 943 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 944 ret i32 %res 945} 946declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 947 948 949define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 950; CHECK-LABEL: test_x86_sse2_ucomile_sd: 951; CHECK: # BB#0: 952; CHECK-NEXT: vucomisd %xmm1, %xmm0 953; CHECK-NEXT: setbe %al 954; CHECK-NEXT: movzbl %al, %eax 955; CHECK-NEXT: retl 956 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 957 ret i32 %res 958} 959declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 960 961 962define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 963; CHECK-LABEL: test_x86_sse2_ucomilt_sd: 964; CHECK: # BB#0: 965; CHECK-NEXT: vucomisd %xmm1, %xmm0 966; CHECK-NEXT: sbbl %eax, %eax 967; CHECK-NEXT: andl $1, %eax 968; CHECK-NEXT: retl 969 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 970 ret i32 %res 971} 972declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 973 974 975define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 976; CHECK-LABEL: test_x86_sse2_ucomineq_sd: 977; CHECK: # BB#0: 978; CHECK-NEXT: vucomisd %xmm1, %xmm0 979; CHECK-NEXT: setne %al 980; CHECK-NEXT: movzbl %al, %eax 981; CHECK-NEXT: retl 982 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 983 ret i32 %res 984} 985declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 986 987 988define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 989; CHECK-LABEL: test_x86_sse3_addsub_pd: 990; CHECK: # BB#0: 991; CHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 992; CHECK-NEXT: retl 993 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 994 ret <2 x double> %res 995} 996declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 997 998 999define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 1000; CHECK-LABEL: test_x86_sse3_addsub_ps: 1001; CHECK: # BB#0: 1002; CHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 1003; CHECK-NEXT: retl 1004 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1005 ret <4 x float> %res 1006} 1007declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 1008 1009 1010define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 1011; CHECK-LABEL: test_x86_sse3_hadd_pd: 1012; CHECK: # BB#0: 1013; CHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 1014; CHECK-NEXT: retl 1015 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1016 ret <2 x double> %res 1017} 1018declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 1019 1020 1021define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 1022; CHECK-LABEL: test_x86_sse3_hadd_ps: 1023; CHECK: # BB#0: 1024; CHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0 1025; CHECK-NEXT: retl 1026 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1027 ret <4 x float> %res 1028} 1029declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 1030 1031 1032define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 1033; CHECK-LABEL: test_x86_sse3_hsub_pd: 1034; CHECK: # BB#0: 1035; CHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 1036; CHECK-NEXT: retl 1037 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1038 ret <2 x double> %res 1039} 1040declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 1041 1042 1043define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 1044; CHECK-LABEL: test_x86_sse3_hsub_ps: 1045; CHECK: # BB#0: 1046; CHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0 1047; CHECK-NEXT: retl 1048 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1049 ret <4 x float> %res 1050} 1051declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 1052 1053 1054define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 1055; CHECK-LABEL: test_x86_sse3_ldu_dq: 1056; CHECK: # BB#0: 1057; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1058; CHECK-NEXT: vlddqu (%eax), %xmm0 1059; CHECK-NEXT: retl 1060 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 1061 ret <16 x i8> %res 1062} 1063declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 1064 1065 1066define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 1067; CHECK-LABEL: test_x86_sse41_blendvpd: 1068; CHECK: # BB#0: 1069; CHECK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1070; CHECK-NEXT: retl 1071 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 1072 ret <2 x double> %res 1073} 1074declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 1075 1076 1077define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 1078; CHECK-LABEL: test_x86_sse41_blendvps: 1079; CHECK: # BB#0: 1080; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1081; CHECK-NEXT: retl 1082 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 1083 ret <4 x float> %res 1084} 1085declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 1086 1087 1088define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 1089; CHECK-LABEL: test_x86_sse41_dppd: 1090; CHECK: # BB#0: 1091; CHECK-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 1092; CHECK-NEXT: retl 1093 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 1094 ret <2 x double> %res 1095} 1096declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 1097 1098 1099define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 1100; CHECK-LABEL: test_x86_sse41_dpps: 1101; CHECK: # BB#0: 1102; CHECK-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 1103; CHECK-NEXT: retl 1104 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1105 ret <4 x float> %res 1106} 1107declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 1108 1109 1110define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 1111; CHECK-LABEL: test_x86_sse41_insertps: 1112; CHECK: # BB#0: 1113; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3] 1114; CHECK-NEXT: retl 1115 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1116 ret <4 x float> %res 1117} 1118declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 1119 1120 1121 1122define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 1123; CHECK-LABEL: test_x86_sse41_mpsadbw: 1124; CHECK: # BB#0: 1125; CHECK-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 1126; CHECK-NEXT: retl 1127 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 1128 ret <8 x i16> %res 1129} 1130declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 1131 1132 1133define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 1134; CHECK-LABEL: test_x86_sse41_packusdw: 1135; CHECK: # BB#0: 1136; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1137; CHECK-NEXT: retl 1138 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 1139 ret <8 x i16> %res 1140} 1141declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 1142 1143 1144define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 1145; CHECK-LABEL: test_x86_sse41_pblendvb: 1146; CHECK: # BB#0: 1147; CHECK-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 1148; CHECK-NEXT: retl 1149 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 1150 ret <16 x i8> %res 1151} 1152declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1153 1154 1155define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 1156; CHECK-LABEL: test_x86_sse41_phminposuw: 1157; CHECK: # BB#0: 1158; CHECK-NEXT: vphminposuw %xmm0, %xmm0 1159; CHECK-NEXT: retl 1160 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1161 ret <8 x i16> %res 1162} 1163declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 1164 1165 1166define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 1167; CHECK-LABEL: test_x86_sse41_pmaxsb: 1168; CHECK: # BB#0: 1169; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1170; CHECK-NEXT: retl 1171 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1172 ret <16 x i8> %res 1173} 1174declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 1175 1176 1177define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 1178; CHECK-LABEL: test_x86_sse41_pmaxsd: 1179; CHECK: # BB#0: 1180; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1181; CHECK-NEXT: retl 1182 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1183 ret <4 x i32> %res 1184} 1185declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 1186 1187 1188define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 1189; CHECK-LABEL: test_x86_sse41_pmaxud: 1190; CHECK: # BB#0: 1191; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1192; CHECK-NEXT: retl 1193 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1194 ret <4 x i32> %res 1195} 1196declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 1197 1198 1199define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 1200; CHECK-LABEL: test_x86_sse41_pmaxuw: 1201; CHECK: # BB#0: 1202; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1203; CHECK-NEXT: retl 1204 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1205 ret <8 x i16> %res 1206} 1207declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 1208 1209 1210define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 1211; CHECK-LABEL: test_x86_sse41_pminsb: 1212; CHECK: # BB#0: 1213; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1214; CHECK-NEXT: retl 1215 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1216 ret <16 x i8> %res 1217} 1218declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 1219 1220 1221define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 1222; CHECK-LABEL: test_x86_sse41_pminsd: 1223; CHECK: # BB#0: 1224; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1225; CHECK-NEXT: retl 1226 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1227 ret <4 x i32> %res 1228} 1229declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 1230 1231 1232define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 1233; CHECK-LABEL: test_x86_sse41_pminud: 1234; CHECK: # BB#0: 1235; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0 1236; CHECK-NEXT: retl 1237 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1238 ret <4 x i32> %res 1239} 1240declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 1241 1242 1243define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 1244; CHECK-LABEL: test_x86_sse41_pminuw: 1245; CHECK: # BB#0: 1246; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1247; CHECK-NEXT: retl 1248 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1249 ret <8 x i16> %res 1250} 1251declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 1252 1253 1254define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 1255; CHECK-LABEL: test_x86_sse41_pmovsxbd: 1256; CHECK: # BB#0: 1257; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 1258; CHECK-NEXT: retl 1259 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1260 ret <4 x i32> %res 1261} 1262declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 1263 1264 1265define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 1266; CHECK-LABEL: test_x86_sse41_pmovsxbq: 1267; CHECK: # BB#0: 1268; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 1269; CHECK-NEXT: retl 1270 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1271 ret <2 x i64> %res 1272} 1273declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 1274 1275 1276define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 1277; CHECK-LABEL: test_x86_sse41_pmovsxbw: 1278; CHECK: # BB#0: 1279; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 1280; CHECK-NEXT: retl 1281 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1282 ret <8 x i16> %res 1283} 1284declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 1285 1286 1287define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 1288; CHECK-LABEL: test_x86_sse41_pmovsxdq: 1289; CHECK: # BB#0: 1290; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 1291; CHECK-NEXT: retl 1292 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1293 ret <2 x i64> %res 1294} 1295declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 1296 1297 1298define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 1299; CHECK-LABEL: test_x86_sse41_pmovsxwd: 1300; CHECK: # BB#0: 1301; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 1302; CHECK-NEXT: retl 1303 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1304 ret <4 x i32> %res 1305} 1306declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 1307 1308 1309define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 1310; CHECK-LABEL: test_x86_sse41_pmovsxwq: 1311; CHECK: # BB#0: 1312; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0 1313; CHECK-NEXT: retl 1314 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1315 ret <2 x i64> %res 1316} 1317declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 1318 1319 1320define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 1321; CHECK-LABEL: test_x86_sse41_pmovzxbd: 1322; CHECK: # BB#0: 1323; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1324; CHECK-NEXT: retl 1325 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1326 ret <4 x i32> %res 1327} 1328declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 1329 1330 1331define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 1332; CHECK-LABEL: test_x86_sse41_pmovzxbq: 1333; CHECK: # BB#0: 1334; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1335; CHECK-NEXT: retl 1336 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1337 ret <2 x i64> %res 1338} 1339declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 1340 1341 1342define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 1343; CHECK-LABEL: test_x86_sse41_pmovzxbw: 1344; CHECK: # BB#0: 1345; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1346; CHECK-NEXT: retl 1347 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1348 ret <8 x i16> %res 1349} 1350declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 1351 1352 1353define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 1354; CHECK-LABEL: test_x86_sse41_pmovzxdq: 1355; CHECK: # BB#0: 1356; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1357; CHECK-NEXT: retl 1358 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1359 ret <2 x i64> %res 1360} 1361declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 1362 1363 1364define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 1365; CHECK-LABEL: test_x86_sse41_pmovzxwd: 1366; CHECK: # BB#0: 1367; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1368; CHECK-NEXT: retl 1369 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1370 ret <4 x i32> %res 1371} 1372declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 1373 1374 1375define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 1376; CHECK-LABEL: test_x86_sse41_pmovzxwq: 1377; CHECK: # BB#0: 1378; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1379; CHECK-NEXT: retl 1380 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1381 ret <2 x i64> %res 1382} 1383declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 1384 1385 1386define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 1387; CHECK-LABEL: test_x86_sse41_pmuldq: 1388; CHECK: # BB#0: 1389; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 1390; CHECK-NEXT: retl 1391 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 1392 ret <2 x i64> %res 1393} 1394declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 1395 1396 1397define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 1398; CHECK-LABEL: test_x86_sse41_ptestc: 1399; CHECK: # BB#0: 1400; CHECK-NEXT: vptest %xmm1, %xmm0 1401; CHECK-NEXT: sbbl %eax, %eax 1402; CHECK-NEXT: andl $1, %eax 1403; CHECK-NEXT: retl 1404 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1405 ret i32 %res 1406} 1407declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 1408 1409 1410define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 1411; CHECK-LABEL: test_x86_sse41_ptestnzc: 1412; CHECK: # BB#0: 1413; CHECK-NEXT: vptest %xmm1, %xmm0 1414; CHECK-NEXT: seta %al 1415; CHECK-NEXT: movzbl %al, %eax 1416; CHECK-NEXT: retl 1417 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1418 ret i32 %res 1419} 1420declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 1421 1422 1423define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 1424; CHECK-LABEL: test_x86_sse41_ptestz: 1425; CHECK: # BB#0: 1426; CHECK-NEXT: vptest %xmm1, %xmm0 1427; CHECK-NEXT: sete %al 1428; CHECK-NEXT: movzbl %al, %eax 1429; CHECK-NEXT: retl 1430 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1431 ret i32 %res 1432} 1433declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 1434 1435 1436define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 1437; CHECK-LABEL: test_x86_sse41_round_pd: 1438; CHECK: # BB#0: 1439; CHECK-NEXT: vroundpd $7, %xmm0, %xmm0 1440; CHECK-NEXT: retl 1441 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 1442 ret <2 x double> %res 1443} 1444declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1445 1446 1447define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 1448; CHECK-LABEL: test_x86_sse41_round_ps: 1449; CHECK: # BB#0: 1450; CHECK-NEXT: vroundps $7, %xmm0, %xmm0 1451; CHECK-NEXT: retl 1452 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1453 ret <4 x float> %res 1454} 1455declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1456 1457 1458define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 1459; CHECK-LABEL: test_x86_sse41_round_sd: 1460; CHECK: # BB#0: 1461; CHECK-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 1462; CHECK-NEXT: retl 1463 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 1464 ret <2 x double> %res 1465} 1466declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1467 1468 1469define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 1470; CHECK-LABEL: test_x86_sse41_round_ss: 1471; CHECK: # BB#0: 1472; CHECK-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 1473; CHECK-NEXT: retl 1474 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 1475 ret <4 x float> %res 1476} 1477declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1478 1479 1480define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { 1481; CHECK-LABEL: test_x86_sse42_pcmpestri128: 1482; CHECK: # BB#0: 1483; CHECK-NEXT: movl $7, %eax 1484; CHECK-NEXT: movl $7, %edx 1485; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1486; CHECK-NEXT: movl %ecx, %eax 1487; CHECK-NEXT: retl 1488 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1489 ret i32 %res 1490} 1491declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1492 1493 1494define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { 1495; CHECK-LABEL: test_x86_sse42_pcmpestri128_load: 1496; CHECK: # BB#0: 1497; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1498; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1499; CHECK-NEXT: vmovdqa (%eax), %xmm0 1500; CHECK-NEXT: movl $7, %eax 1501; CHECK-NEXT: movl $7, %edx 1502; CHECK-NEXT: vpcmpestri $7, (%ecx), %xmm0 1503; CHECK-NEXT: movl %ecx, %eax 1504; CHECK-NEXT: retl 1505 %1 = load <16 x i8>, <16 x i8>* %a0 1506 %2 = load <16 x i8>, <16 x i8>* %a2 1507 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1] 1508 ret i32 %res 1509} 1510 1511 1512define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { 1513; CHECK-LABEL: test_x86_sse42_pcmpestria128: 1514; CHECK: # BB#0: 1515; CHECK-NEXT: movl $7, %eax 1516; CHECK-NEXT: movl $7, %edx 1517; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1518; CHECK-NEXT: seta %al 1519; CHECK-NEXT: movzbl %al, %eax 1520; CHECK-NEXT: retl 1521 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1522 ret i32 %res 1523} 1524declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1525 1526 1527define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { 1528; CHECK-LABEL: test_x86_sse42_pcmpestric128: 1529; CHECK: # BB#0: 1530; CHECK-NEXT: movl $7, %eax 1531; CHECK-NEXT: movl $7, %edx 1532; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1533; CHECK-NEXT: sbbl %eax, %eax 1534; CHECK-NEXT: andl $1, %eax 1535; CHECK-NEXT: retl 1536 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1537 ret i32 %res 1538} 1539declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1540 1541 1542define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { 1543; CHECK-LABEL: test_x86_sse42_pcmpestrio128: 1544; CHECK: # BB#0: 1545; CHECK-NEXT: movl $7, %eax 1546; CHECK-NEXT: movl $7, %edx 1547; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1548; CHECK-NEXT: seto %al 1549; CHECK-NEXT: movzbl %al, %eax 1550; CHECK-NEXT: retl 1551 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1552 ret i32 %res 1553} 1554declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1555 1556 1557define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { 1558; CHECK-LABEL: test_x86_sse42_pcmpestris128: 1559; CHECK: # BB#0: 1560; CHECK-NEXT: movl $7, %eax 1561; CHECK-NEXT: movl $7, %edx 1562; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1563; CHECK-NEXT: sets %al 1564; CHECK-NEXT: movzbl %al, %eax 1565; CHECK-NEXT: retl 1566 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1567 ret i32 %res 1568} 1569declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1570 1571 1572define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { 1573; CHECK-LABEL: test_x86_sse42_pcmpestriz128: 1574; CHECK: # BB#0: 1575; CHECK-NEXT: movl $7, %eax 1576; CHECK-NEXT: movl $7, %edx 1577; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1578; CHECK-NEXT: sete %al 1579; CHECK-NEXT: movzbl %al, %eax 1580; CHECK-NEXT: retl 1581 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1582 ret i32 %res 1583} 1584declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1585 1586 1587define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { 1588; CHECK-LABEL: test_x86_sse42_pcmpestrm128: 1589; CHECK: # BB#0: 1590; CHECK-NEXT: movl $7, %eax 1591; CHECK-NEXT: movl $7, %edx 1592; CHECK-NEXT: vpcmpestrm $7, %xmm1, %xmm0 1593; CHECK-NEXT: retl 1594 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1595 ret <16 x i8> %res 1596} 1597declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1598 1599 1600define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) { 1601; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load: 1602; CHECK: # BB#0: 1603; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1604; CHECK-NEXT: movl $7, %eax 1605; CHECK-NEXT: movl $7, %edx 1606; CHECK-NEXT: vpcmpestrm $7, (%ecx), %xmm0 1607; CHECK-NEXT: retl 1608 %1 = load <16 x i8>, <16 x i8>* %a2 1609 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1610 ret <16 x i8> %res 1611} 1612 1613 1614define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { 1615; CHECK-LABEL: test_x86_sse42_pcmpistri128: 1616; CHECK: # BB#0: 1617; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1618; CHECK-NEXT: movl %ecx, %eax 1619; CHECK-NEXT: retl 1620 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1621 ret i32 %res 1622} 1623declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1624 1625 1626define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { 1627; CHECK-LABEL: test_x86_sse42_pcmpistri128_load: 1628; CHECK: # BB#0: 1629; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1630; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1631; CHECK-NEXT: vmovdqa (%ecx), %xmm0 1632; CHECK-NEXT: vpcmpistri $7, (%eax), %xmm0 1633; CHECK-NEXT: movl %ecx, %eax 1634; CHECK-NEXT: retl 1635 %1 = load <16 x i8>, <16 x i8>* %a0 1636 %2 = load <16 x i8>, <16 x i8>* %a1 1637 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1] 1638 ret i32 %res 1639} 1640 1641 1642define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { 1643; CHECK-LABEL: test_x86_sse42_pcmpistria128: 1644; CHECK: # BB#0: 1645; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1646; CHECK-NEXT: seta %al 1647; CHECK-NEXT: movzbl %al, %eax 1648; CHECK-NEXT: retl 1649 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1650 ret i32 %res 1651} 1652declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1653 1654 1655define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { 1656; CHECK-LABEL: test_x86_sse42_pcmpistric128: 1657; CHECK: # BB#0: 1658; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1659; CHECK-NEXT: sbbl %eax, %eax 1660; CHECK-NEXT: andl $1, %eax 1661; CHECK-NEXT: retl 1662 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1663 ret i32 %res 1664} 1665declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1666 1667 1668define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { 1669; CHECK-LABEL: test_x86_sse42_pcmpistrio128: 1670; CHECK: # BB#0: 1671; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1672; CHECK-NEXT: seto %al 1673; CHECK-NEXT: movzbl %al, %eax 1674; CHECK-NEXT: retl 1675 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1676 ret i32 %res 1677} 1678declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1679 1680 1681define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { 1682; CHECK-LABEL: test_x86_sse42_pcmpistris128: 1683; CHECK: # BB#0: 1684; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1685; CHECK-NEXT: sets %al 1686; CHECK-NEXT: movzbl %al, %eax 1687; CHECK-NEXT: retl 1688 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1689 ret i32 %res 1690} 1691declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1692 1693 1694define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { 1695; CHECK-LABEL: test_x86_sse42_pcmpistriz128: 1696; CHECK: # BB#0: 1697; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1698; CHECK-NEXT: sete %al 1699; CHECK-NEXT: movzbl %al, %eax 1700; CHECK-NEXT: retl 1701 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1702 ret i32 %res 1703} 1704declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1705 1706 1707define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { 1708; CHECK-LABEL: test_x86_sse42_pcmpistrm128: 1709; CHECK: # BB#0: 1710; CHECK-NEXT: vpcmpistrm $7, %xmm1, %xmm0 1711; CHECK-NEXT: retl 1712 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] 1713 ret <16 x i8> %res 1714} 1715declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1716 1717 1718define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) { 1719; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load: 1720; CHECK: # BB#0: 1721; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1722; CHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0 1723; CHECK-NEXT: retl 1724 %1 = load <16 x i8>, <16 x i8>* %a1 1725 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] 1726 ret <16 x i8> %res 1727} 1728 1729 1730define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 1731; CHECK-LABEL: test_x86_sse_add_ss: 1732; CHECK: # BB#0: 1733; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 1734; CHECK-NEXT: retl 1735 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1736 ret <4 x float> %res 1737} 1738declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 1739 1740 1741define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 1742; CHECK-LABEL: test_x86_sse_cmp_ps: 1743; CHECK: # BB#0: 1744; CHECK-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 1745; CHECK-NEXT: retl 1746 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1747 ret <4 x float> %res 1748} 1749declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 1750 1751 1752define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 1753; CHECK-LABEL: test_x86_sse_cmp_ss: 1754; CHECK: # BB#0: 1755; CHECK-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 1756; CHECK-NEXT: retl 1757 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1758 ret <4 x float> %res 1759} 1760declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 1761 1762 1763define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 1764; CHECK-LABEL: test_x86_sse_comieq_ss: 1765; CHECK: # BB#0: 1766; CHECK-NEXT: vcomiss %xmm1, %xmm0 1767; CHECK-NEXT: sete %al 1768; CHECK-NEXT: movzbl %al, %eax 1769; CHECK-NEXT: retl 1770 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1771 ret i32 %res 1772} 1773declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 1774 1775 1776define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 1777; CHECK-LABEL: test_x86_sse_comige_ss: 1778; CHECK: # BB#0: 1779; CHECK-NEXT: vcomiss %xmm1, %xmm0 1780; CHECK-NEXT: setae %al 1781; CHECK-NEXT: movzbl %al, %eax 1782; CHECK-NEXT: retl 1783 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1784 ret i32 %res 1785} 1786declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 1787 1788 1789define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 1790; CHECK-LABEL: test_x86_sse_comigt_ss: 1791; CHECK: # BB#0: 1792; CHECK-NEXT: vcomiss %xmm1, %xmm0 1793; CHECK-NEXT: seta %al 1794; CHECK-NEXT: movzbl %al, %eax 1795; CHECK-NEXT: retl 1796 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1797 ret i32 %res 1798} 1799declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 1800 1801 1802define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 1803; CHECK-LABEL: test_x86_sse_comile_ss: 1804; CHECK: # BB#0: 1805; CHECK-NEXT: vcomiss %xmm1, %xmm0 1806; CHECK-NEXT: setbe %al 1807; CHECK-NEXT: movzbl %al, %eax 1808; CHECK-NEXT: retl 1809 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1810 ret i32 %res 1811} 1812declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 1813 1814 1815define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 1816; CHECK-LABEL: test_x86_sse_comilt_ss: 1817; CHECK: # BB#0: 1818; CHECK-NEXT: vcomiss %xmm1, %xmm0 1819; CHECK-NEXT: sbbl %eax, %eax 1820; CHECK-NEXT: andl $1, %eax 1821; CHECK-NEXT: retl 1822 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1823 ret i32 %res 1824} 1825declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 1826 1827 1828define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 1829; CHECK-LABEL: test_x86_sse_comineq_ss: 1830; CHECK: # BB#0: 1831; CHECK-NEXT: vcomiss %xmm1, %xmm0 1832; CHECK-NEXT: setne %al 1833; CHECK-NEXT: movzbl %al, %eax 1834; CHECK-NEXT: retl 1835 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1836 ret i32 %res 1837} 1838declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 1839 1840 1841define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 1842; CHECK-LABEL: test_x86_sse_cvtsi2ss: 1843; CHECK: # BB#0: 1844; CHECK-NEXT: movl $7, %eax 1845; CHECK-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 1846; CHECK-NEXT: retl 1847 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1848 ret <4 x float> %res 1849} 1850declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 1851 1852 1853define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 1854; CHECK-LABEL: test_x86_sse_cvtss2si: 1855; CHECK: # BB#0: 1856; CHECK-NEXT: vcvtss2si %xmm0, %eax 1857; CHECK-NEXT: retl 1858 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 1859 ret i32 %res 1860} 1861declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1862 1863 1864define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 1865; CHECK-LABEL: test_x86_sse_cvttss2si: 1866; CHECK: # BB#0: 1867; CHECK-NEXT: vcvttss2si %xmm0, %eax 1868; CHECK-NEXT: retl 1869 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 1870 ret i32 %res 1871} 1872declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1873 1874 1875define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 1876; CHECK-LABEL: test_x86_sse_div_ss: 1877; CHECK: # BB#0: 1878; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 1879; CHECK-NEXT: retl 1880 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1881 ret <4 x float> %res 1882} 1883declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1884 1885 1886define void @test_x86_sse_ldmxcsr(i8* %a0) { 1887; CHECK-LABEL: test_x86_sse_ldmxcsr: 1888; CHECK: # BB#0: 1889; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1890; CHECK-NEXT: vldmxcsr (%eax) 1891; CHECK-NEXT: retl 1892 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 1893 ret void 1894} 1895declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 1896 1897 1898 1899define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 1900; CHECK-LABEL: test_x86_sse_max_ps: 1901; CHECK: # BB#0: 1902; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 1903; CHECK-NEXT: retl 1904 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1905 ret <4 x float> %res 1906} 1907declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1908 1909 1910define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 1911; CHECK-LABEL: test_x86_sse_max_ss: 1912; CHECK: # BB#0: 1913; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 1914; CHECK-NEXT: retl 1915 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1916 ret <4 x float> %res 1917} 1918declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1919 1920 1921define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 1922; CHECK-LABEL: test_x86_sse_min_ps: 1923; CHECK: # BB#0: 1924; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 1925; CHECK-NEXT: retl 1926 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1927 ret <4 x float> %res 1928} 1929declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1930 1931 1932define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 1933; CHECK-LABEL: test_x86_sse_min_ss: 1934; CHECK: # BB#0: 1935; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0 1936; CHECK-NEXT: retl 1937 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1938 ret <4 x float> %res 1939} 1940declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1941 1942 1943define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 1944; CHECK-LABEL: test_x86_sse_movmsk_ps: 1945; CHECK: # BB#0: 1946; CHECK-NEXT: vmovmskps %xmm0, %eax 1947; CHECK-NEXT: retl 1948 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 1949 ret i32 %res 1950} 1951declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1952 1953 1954 1955define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 1956; CHECK-LABEL: test_x86_sse_mul_ss: 1957; CHECK: # BB#0: 1958; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 1959; CHECK-NEXT: retl 1960 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1961 ret <4 x float> %res 1962} 1963declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1964 1965 1966define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 1967; CHECK-LABEL: test_x86_sse_rcp_ps: 1968; CHECK: # BB#0: 1969; CHECK-NEXT: vrcpps %xmm0, %xmm0 1970; CHECK-NEXT: retl 1971 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1972 ret <4 x float> %res 1973} 1974declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1975 1976 1977define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 1978; CHECK-LABEL: test_x86_sse_rcp_ss: 1979; CHECK: # BB#0: 1980; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0 1981; CHECK-NEXT: retl 1982 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1983 ret <4 x float> %res 1984} 1985declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1986 1987 1988define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 1989; CHECK-LABEL: test_x86_sse_rsqrt_ps: 1990; CHECK: # BB#0: 1991; CHECK-NEXT: vrsqrtps %xmm0, %xmm0 1992; CHECK-NEXT: retl 1993 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1994 ret <4 x float> %res 1995} 1996declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1997 1998 1999define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 2000; CHECK-LABEL: test_x86_sse_rsqrt_ss: 2001; CHECK: # BB#0: 2002; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 2003; CHECK-NEXT: retl 2004 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 2005 ret <4 x float> %res 2006} 2007declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 2008 2009 2010define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 2011; CHECK-LABEL: test_x86_sse_sqrt_ps: 2012; CHECK: # BB#0: 2013; CHECK-NEXT: vsqrtps %xmm0, %xmm0 2014; CHECK-NEXT: retl 2015 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 2016 ret <4 x float> %res 2017} 2018declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 2019 2020 2021define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 2022; CHECK-LABEL: test_x86_sse_sqrt_ss: 2023; CHECK: # BB#0: 2024; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 2025; CHECK-NEXT: retl 2026 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 2027 ret <4 x float> %res 2028} 2029declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 2030 2031 2032define void @test_x86_sse_stmxcsr(i8* %a0) { 2033; CHECK-LABEL: test_x86_sse_stmxcsr: 2034; CHECK: # BB#0: 2035; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2036; CHECK-NEXT: vstmxcsr (%eax) 2037; CHECK-NEXT: retl 2038 call void @llvm.x86.sse.stmxcsr(i8* %a0) 2039 ret void 2040} 2041declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 2042 2043 2044define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 2045; CHECK-LABEL: test_x86_sse_storeu_ps: 2046; CHECK: # BB#0: 2047; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2048; CHECK-NEXT: vmovups %xmm0, (%eax) 2049; CHECK-NEXT: retl 2050 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 2051 ret void 2052} 2053declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 2054 2055 2056define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 2057; CHECK-LABEL: test_x86_sse_sub_ss: 2058; CHECK: # BB#0: 2059; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 2060; CHECK-NEXT: retl 2061 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 2062 ret <4 x float> %res 2063} 2064declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 2065 2066 2067define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 2068; CHECK-LABEL: test_x86_sse_ucomieq_ss: 2069; CHECK: # BB#0: 2070; CHECK-NEXT: vucomiss %xmm1, %xmm0 2071; CHECK-NEXT: sete %al 2072; CHECK-NEXT: movzbl %al, %eax 2073; CHECK-NEXT: retl 2074 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2075 ret i32 %res 2076} 2077declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 2078 2079 2080define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 2081; CHECK-LABEL: test_x86_sse_ucomige_ss: 2082; CHECK: # BB#0: 2083; CHECK-NEXT: vucomiss %xmm1, %xmm0 2084; CHECK-NEXT: setae %al 2085; CHECK-NEXT: movzbl %al, %eax 2086; CHECK-NEXT: retl 2087 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2088 ret i32 %res 2089} 2090declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 2091 2092 2093define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 2094; CHECK-LABEL: test_x86_sse_ucomigt_ss: 2095; CHECK: # BB#0: 2096; CHECK-NEXT: vucomiss %xmm1, %xmm0 2097; CHECK-NEXT: seta %al 2098; CHECK-NEXT: movzbl %al, %eax 2099; CHECK-NEXT: retl 2100 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2101 ret i32 %res 2102} 2103declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 2104 2105 2106define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 2107; CHECK-LABEL: test_x86_sse_ucomile_ss: 2108; CHECK: # BB#0: 2109; CHECK-NEXT: vucomiss %xmm1, %xmm0 2110; CHECK-NEXT: setbe %al 2111; CHECK-NEXT: movzbl %al, %eax 2112; CHECK-NEXT: retl 2113 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2114 ret i32 %res 2115} 2116declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 2117 2118 2119define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 2120; CHECK-LABEL: test_x86_sse_ucomilt_ss: 2121; CHECK: # BB#0: 2122; CHECK-NEXT: vucomiss %xmm1, %xmm0 2123; CHECK-NEXT: sbbl %eax, %eax 2124; CHECK-NEXT: andl $1, %eax 2125; CHECK-NEXT: retl 2126 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2127 ret i32 %res 2128} 2129declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 2130 2131 2132define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 2133; CHECK-LABEL: test_x86_sse_ucomineq_ss: 2134; CHECK: # BB#0: 2135; CHECK-NEXT: vucomiss %xmm1, %xmm0 2136; CHECK-NEXT: setne %al 2137; CHECK-NEXT: movzbl %al, %eax 2138; CHECK-NEXT: retl 2139 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2140 ret i32 %res 2141} 2142declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 2143 2144 2145define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { 2146; CHECK-LABEL: test_x86_ssse3_pabs_b_128: 2147; CHECK: # BB#0: 2148; CHECK-NEXT: vpabsb %xmm0, %xmm0 2149; CHECK-NEXT: retl 2150 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 2151 ret <16 x i8> %res 2152} 2153declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 2154 2155 2156define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { 2157; CHECK-LABEL: test_x86_ssse3_pabs_d_128: 2158; CHECK: # BB#0: 2159; CHECK-NEXT: vpabsd %xmm0, %xmm0 2160; CHECK-NEXT: retl 2161 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 2162 ret <4 x i32> %res 2163} 2164declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 2165 2166 2167define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { 2168; CHECK-LABEL: test_x86_ssse3_pabs_w_128: 2169; CHECK: # BB#0: 2170; CHECK-NEXT: vpabsw %xmm0, %xmm0 2171; CHECK-NEXT: retl 2172 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 2173 ret <8 x i16> %res 2174} 2175declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 2176 2177 2178define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2179; CHECK-LABEL: test_x86_ssse3_phadd_d_128: 2180; CHECK: # BB#0: 2181; CHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0 2182; CHECK-NEXT: retl 2183 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2184 ret <4 x i32> %res 2185} 2186declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2187 2188 2189define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2190; CHECK-LABEL: test_x86_ssse3_phadd_sw_128: 2191; CHECK: # BB#0: 2192; CHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 2193; CHECK-NEXT: retl 2194 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2195 ret <8 x i16> %res 2196} 2197declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2198 2199 2200define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2201; CHECK-LABEL: test_x86_ssse3_phadd_w_128: 2202; CHECK: # BB#0: 2203; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0 2204; CHECK-NEXT: retl 2205 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2206 ret <8 x i16> %res 2207} 2208declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2209 2210 2211define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2212; CHECK-LABEL: test_x86_ssse3_phsub_d_128: 2213; CHECK: # BB#0: 2214; CHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0 2215; CHECK-NEXT: retl 2216 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2217 ret <4 x i32> %res 2218} 2219declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2220 2221 2222define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2223; CHECK-LABEL: test_x86_ssse3_phsub_sw_128: 2224; CHECK: # BB#0: 2225; CHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 2226; CHECK-NEXT: retl 2227 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2228 ret <8 x i16> %res 2229} 2230declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2231 2232 2233define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2234; CHECK-LABEL: test_x86_ssse3_phsub_w_128: 2235; CHECK: # BB#0: 2236; CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0 2237; CHECK-NEXT: retl 2238 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2239 ret <8 x i16> %res 2240} 2241declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2242 2243 2244define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) { 2245; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128: 2246; CHECK: # BB#0: 2247; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 2248; CHECK-NEXT: retl 2249 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] 2250 ret <8 x i16> %res 2251} 2252declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 2253 2254 2255define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2256; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128: 2257; CHECK: # BB#0: 2258; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 2259; CHECK-NEXT: retl 2260 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2261 ret <8 x i16> %res 2262} 2263declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2264 2265 2266define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { 2267; CHECK-LABEL: test_x86_ssse3_pshuf_b_128: 2268; CHECK: # BB#0: 2269; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2270; CHECK-NEXT: retl 2271 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 2272 ret <16 x i8> %res 2273} 2274declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 2275 2276 2277define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { 2278; CHECK-LABEL: test_x86_ssse3_psign_b_128: 2279; CHECK: # BB#0: 2280; CHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0 2281; CHECK-NEXT: retl 2282 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 2283 ret <16 x i8> %res 2284} 2285declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 2286 2287 2288define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2289; CHECK-LABEL: test_x86_ssse3_psign_d_128: 2290; CHECK: # BB#0: 2291; CHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0 2292; CHECK-NEXT: retl 2293 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2294 ret <4 x i32> %res 2295} 2296declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2297 2298 2299define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2300; CHECK-LABEL: test_x86_ssse3_psign_w_128: 2301; CHECK: # BB#0: 2302; CHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0 2303; CHECK-NEXT: retl 2304 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2305 ret <8 x i16> %res 2306} 2307declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2308 2309 2310define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 2311; CHECK-LABEL: test_x86_avx_addsub_pd_256: 2312; CHECK: # BB#0: 2313; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 2314; CHECK-NEXT: retl 2315 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2316 ret <4 x double> %res 2317} 2318declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 2319 2320 2321define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 2322; CHECK-LABEL: test_x86_avx_addsub_ps_256: 2323; CHECK: # BB#0: 2324; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 2325; CHECK-NEXT: retl 2326 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2327 ret <8 x float> %res 2328} 2329declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2330 2331 2332define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 2333; CHECK-LABEL: test_x86_avx_blendv_pd_256: 2334; CHECK: # BB#0: 2335; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 2336; CHECK-NEXT: retl 2337 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 2338 ret <4 x double> %res 2339} 2340declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 2341 2342 2343define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 2344; CHECK-LABEL: test_x86_avx_blendv_ps_256: 2345; CHECK: # BB#0: 2346; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 2347; CHECK-NEXT: retl 2348 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 2349 ret <8 x float> %res 2350} 2351declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 2352 2353 2354define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 2355; CHECK-LABEL: test_x86_avx_cmp_pd_256: 2356; CHECK: # BB#0: 2357; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0 2358; CHECK-NEXT: retl 2359 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2360 ret <4 x double> %res 2361} 2362declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2363 2364 2365define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 2366; CHECK-LABEL: test_x86_avx_cmp_ps_256: 2367; CHECK: # BB#0: 2368; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0 2369; CHECK-NEXT: retl 2370 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2371 ret <8 x float> %res 2372} 2373 2374define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) { 2375; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op: 2376; CHECK: # BB#0: 2377; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 2378; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1 2379; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1 2380; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1 2381; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1 2382; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1 2383; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1 2384; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1 2385; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1 2386; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1 2387; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1 2388; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1 2389; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1 2390; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1 2391; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1 2392; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1 2393; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1 2394; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1 2395; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1 2396; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1 2397; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1 2398; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1 2399; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1 2400; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1 2401; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1 2402; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1 2403; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1 2404; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1 2405; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1 2406; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1 2407; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1 2408; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0 2409; CHECK-NEXT: retl 2410 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] 2411 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] 2412 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] 2413 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] 2414 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] 2415 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] 2416 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] 2417 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] 2418 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] 2419 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] 2420 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] 2421 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] 2422 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] 2423 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] 2424 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] 2425 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] 2426 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] 2427 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] 2428 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] 2429 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] 2430 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] 2431 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] 2432 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] 2433 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] 2434 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] 2435 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] 2436 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] 2437 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] 2438 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] 2439 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] 2440 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] 2441 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] 2442 ret <8 x float> %res 2443} 2444declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2445 2446 2447define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 2448; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256: 2449; CHECK: # BB#0: 2450; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0 2451; CHECK-NEXT: vzeroupper 2452; CHECK-NEXT: retl 2453 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 2454 ret <4 x float> %res 2455} 2456declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 2457 2458 2459define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 2460; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256: 2461; CHECK: # BB#0: 2462; CHECK-NEXT: vcvtpd2dqy %ymm0, %xmm0 2463; CHECK-NEXT: vzeroupper 2464; CHECK-NEXT: retl 2465 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 2466 ret <4 x i32> %res 2467} 2468declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 2469 2470 2471define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 2472; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256: 2473; CHECK: # BB#0: 2474; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 2475; CHECK-NEXT: retl 2476 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 2477 ret <4 x double> %res 2478} 2479declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 2480 2481 2482define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 2483; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256: 2484; CHECK: # BB#0: 2485; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 2486; CHECK-NEXT: retl 2487 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 2488 ret <8 x i32> %res 2489} 2490declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 2491 2492 2493define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 2494; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256: 2495; CHECK: # BB#0: 2496; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 2497; CHECK-NEXT: retl 2498 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 2499 ret <4 x double> %res 2500} 2501declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 2502 2503 2504define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 2505; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256: 2506; CHECK: # BB#0: 2507; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 2508; CHECK-NEXT: retl 2509 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 2510 ret <8 x float> %res 2511} 2512declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 2513 2514 2515define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 2516; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256: 2517; CHECK: # BB#0: 2518; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0 2519; CHECK-NEXT: vzeroupper 2520; CHECK-NEXT: retl 2521 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 2522 ret <4 x i32> %res 2523} 2524declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 2525 2526 2527define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 2528; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256: 2529; CHECK: # BB#0: 2530; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 2531; CHECK-NEXT: retl 2532 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 2533 ret <8 x i32> %res 2534} 2535declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 2536 2537 2538define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 2539; CHECK-LABEL: test_x86_avx_dp_ps_256: 2540; CHECK: # BB#0: 2541; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 2542; CHECK-NEXT: retl 2543 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2544 ret <8 x float> %res 2545} 2546declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2547 2548 2549define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 2550; CHECK-LABEL: test_x86_avx_hadd_pd_256: 2551; CHECK: # BB#0: 2552; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 2553; CHECK-NEXT: retl 2554 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2555 ret <4 x double> %res 2556} 2557declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 2558 2559 2560define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 2561; CHECK-LABEL: test_x86_avx_hadd_ps_256: 2562; CHECK: # BB#0: 2563; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 2564; CHECK-NEXT: retl 2565 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2566 ret <8 x float> %res 2567} 2568declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 2569 2570 2571define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 2572; CHECK-LABEL: test_x86_avx_hsub_pd_256: 2573; CHECK: # BB#0: 2574; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 2575; CHECK-NEXT: retl 2576 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2577 ret <4 x double> %res 2578} 2579declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 2580 2581 2582define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 2583; CHECK-LABEL: test_x86_avx_hsub_ps_256: 2584; CHECK: # BB#0: 2585; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0 2586; CHECK-NEXT: retl 2587 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2588 ret <8 x float> %res 2589} 2590declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2591 2592 2593define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 2594; CHECK-LABEL: test_x86_avx_ldu_dq_256: 2595; CHECK: # BB#0: 2596; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2597; CHECK-NEXT: vlddqu (%eax), %ymm0 2598; CHECK-NEXT: retl 2599 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 2600 ret <32 x i8> %res 2601} 2602declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 2603 2604 2605define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { 2606; CHECK-LABEL: test_x86_avx_maskload_pd: 2607; CHECK: # BB#0: 2608; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2609; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 2610; CHECK-NEXT: retl 2611 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 2612 ret <2 x double> %res 2613} 2614declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly 2615 2616 2617define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { 2618; CHECK-LABEL: test_x86_avx_maskload_pd_256: 2619; CHECK: # BB#0: 2620; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2621; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 2622; CHECK-NEXT: retl 2623 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2624 ret <4 x double> %res 2625} 2626declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly 2627 2628 2629define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { 2630; CHECK-LABEL: test_x86_avx_maskload_ps: 2631; CHECK: # BB#0: 2632; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2633; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 2634; CHECK-NEXT: retl 2635 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 2636 ret <4 x float> %res 2637} 2638declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly 2639 2640 2641define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { 2642; CHECK-LABEL: test_x86_avx_maskload_ps_256: 2643; CHECK: # BB#0: 2644; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2645; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 2646; CHECK-NEXT: retl 2647 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2648 ret <8 x float> %res 2649} 2650declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly 2651 2652 2653define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { 2654; CHECK-LABEL: test_x86_avx_maskstore_pd: 2655; CHECK: # BB#0: 2656; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2657; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) 2658; CHECK-NEXT: retl 2659 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) 2660 ret void 2661} 2662declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind 2663 2664 2665define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { 2666; CHECK-LABEL: test_x86_avx_maskstore_pd_256: 2667; CHECK: # BB#0: 2668; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2669; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) 2670; CHECK-NEXT: vzeroupper 2671; CHECK-NEXT: retl 2672 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) 2673 ret void 2674} 2675declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind 2676 2677 2678define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { 2679; CHECK-LABEL: test_x86_avx_maskstore_ps: 2680; CHECK: # BB#0: 2681; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2682; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) 2683; CHECK-NEXT: retl 2684 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) 2685 ret void 2686} 2687declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind 2688 2689 2690define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { 2691; CHECK-LABEL: test_x86_avx_maskstore_ps_256: 2692; CHECK: # BB#0: 2693; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2694; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) 2695; CHECK-NEXT: vzeroupper 2696; CHECK-NEXT: retl 2697 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) 2698 ret void 2699} 2700declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind 2701 2702 2703define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 2704; CHECK-LABEL: test_x86_avx_max_pd_256: 2705; CHECK: # BB#0: 2706; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 2707; CHECK-NEXT: retl 2708 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2709 ret <4 x double> %res 2710} 2711declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2712 2713 2714define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 2715; CHECK-LABEL: test_x86_avx_max_ps_256: 2716; CHECK: # BB#0: 2717; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 2718; CHECK-NEXT: retl 2719 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2720 ret <8 x float> %res 2721} 2722declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2723 2724 2725define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 2726; CHECK-LABEL: test_x86_avx_min_pd_256: 2727; CHECK: # BB#0: 2728; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0 2729; CHECK-NEXT: retl 2730 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2731 ret <4 x double> %res 2732} 2733declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2734 2735 2736define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 2737; CHECK-LABEL: test_x86_avx_min_ps_256: 2738; CHECK: # BB#0: 2739; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 2740; CHECK-NEXT: retl 2741 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2742 ret <8 x float> %res 2743} 2744declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2745 2746 2747define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 2748; CHECK-LABEL: test_x86_avx_movmsk_pd_256: 2749; CHECK: # BB#0: 2750; CHECK-NEXT: vmovmskpd %ymm0, %eax 2751; CHECK-NEXT: vzeroupper 2752; CHECK-NEXT: retl 2753 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 2754 ret i32 %res 2755} 2756declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2757 2758 2759define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 2760; CHECK-LABEL: test_x86_avx_movmsk_ps_256: 2761; CHECK: # BB#0: 2762; CHECK-NEXT: vmovmskps %ymm0, %eax 2763; CHECK-NEXT: vzeroupper 2764; CHECK-NEXT: retl 2765 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 2766 ret i32 %res 2767} 2768declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2769 2770 2771 2772 2773 2774 2775 2776define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 2777; CHECK-LABEL: test_x86_avx_ptestc_256: 2778; CHECK: # BB#0: 2779; CHECK-NEXT: vptest %ymm1, %ymm0 2780; CHECK-NEXT: sbbl %eax, %eax 2781; CHECK-NEXT: andl $1, %eax 2782; CHECK-NEXT: vzeroupper 2783; CHECK-NEXT: retl 2784 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2785 ret i32 %res 2786} 2787declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 2788 2789 2790define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 2791; CHECK-LABEL: test_x86_avx_ptestnzc_256: 2792; CHECK: # BB#0: 2793; CHECK-NEXT: vptest %ymm1, %ymm0 2794; CHECK-NEXT: seta %al 2795; CHECK-NEXT: movzbl %al, %eax 2796; CHECK-NEXT: vzeroupper 2797; CHECK-NEXT: retl 2798 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2799 ret i32 %res 2800} 2801declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 2802 2803 2804define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 2805; CHECK-LABEL: test_x86_avx_ptestz_256: 2806; CHECK: # BB#0: 2807; CHECK-NEXT: vptest %ymm1, %ymm0 2808; CHECK-NEXT: sete %al 2809; CHECK-NEXT: movzbl %al, %eax 2810; CHECK-NEXT: vzeroupper 2811; CHECK-NEXT: retl 2812 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2813 ret i32 %res 2814} 2815declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 2816 2817 2818define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 2819; CHECK-LABEL: test_x86_avx_rcp_ps_256: 2820; CHECK: # BB#0: 2821; CHECK-NEXT: vrcpps %ymm0, %ymm0 2822; CHECK-NEXT: retl 2823 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2824 ret <8 x float> %res 2825} 2826declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 2827 2828 2829define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 2830; CHECK-LABEL: test_x86_avx_round_pd_256: 2831; CHECK: # BB#0: 2832; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0 2833; CHECK-NEXT: retl 2834 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 2835 ret <4 x double> %res 2836} 2837declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 2838 2839 2840define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 2841; CHECK-LABEL: test_x86_avx_round_ps_256: 2842; CHECK: # BB#0: 2843; CHECK-NEXT: vroundps $7, %ymm0, %ymm0 2844; CHECK-NEXT: retl 2845 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 2846 ret <8 x float> %res 2847} 2848declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 2849 2850 2851define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 2852; CHECK-LABEL: test_x86_avx_rsqrt_ps_256: 2853; CHECK: # BB#0: 2854; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 2855; CHECK-NEXT: retl 2856 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2857 ret <8 x float> %res 2858} 2859declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 2860 2861 2862define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 2863; CHECK-LABEL: test_x86_avx_sqrt_pd_256: 2864; CHECK: # BB#0: 2865; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 2866; CHECK-NEXT: retl 2867 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 2868 ret <4 x double> %res 2869} 2870declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 2871 2872 2873define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 2874; CHECK-LABEL: test_x86_avx_sqrt_ps_256: 2875; CHECK: # BB#0: 2876; CHECK-NEXT: vsqrtps %ymm0, %ymm0 2877; CHECK-NEXT: retl 2878 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2879 ret <8 x float> %res 2880} 2881declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 2882 2883 2884define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 2885 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 2886 ; add operation forces the execution domain. 2887; CHECK-LABEL: test_x86_avx_storeu_dq_256: 2888; CHECK: # BB#0: 2889; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2890; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 2891; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2892; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2893; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2894; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2895; CHECK-NEXT: vmovups %ymm0, (%eax) 2896; CHECK-NEXT: vzeroupper 2897; CHECK-NEXT: retl 2898 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2899 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 2900 ret void 2901} 2902declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 2903 2904 2905define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 2906 ; add operation forces the execution domain. 2907; CHECK-LABEL: test_x86_avx_storeu_pd_256: 2908; CHECK: # BB#0: 2909; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2910; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 2911; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 2912; CHECK-NEXT: vmovupd %ymm0, (%eax) 2913; CHECK-NEXT: vzeroupper 2914; CHECK-NEXT: retl 2915 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 2916 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 2917 ret void 2918} 2919declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 2920 2921 2922define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 2923; CHECK-LABEL: test_x86_avx_storeu_ps_256: 2924; CHECK: # BB#0: 2925; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2926; CHECK-NEXT: vmovups %ymm0, (%eax) 2927; CHECK-NEXT: vzeroupper 2928; CHECK-NEXT: retl 2929 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 2930 ret void 2931} 2932declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 2933 2934 2935define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 2936; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256: 2937; CHECK: # BB#0: 2938; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2939; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0 2940; CHECK-NEXT: retl 2941 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2942 ret <4 x double> %res 2943} 2944declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 2945 2946 2947define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 2948; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256: 2949; CHECK: # BB#0: 2950; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2951; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0 2952; CHECK-NEXT: retl 2953 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2954 ret <8 x float> %res 2955} 2956declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 2957 2958 2959define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 2960; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256: 2961; CHECK: # BB#0: 2962; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2963; CHECK-NEXT: retl 2964 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2965 ret <4 x double> %res 2966} 2967declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2968 2969 2970define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 2971; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256: 2972; CHECK: # BB#0: 2973; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2974; CHECK-NEXT: retl 2975 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2976 ret <8 x float> %res 2977} 2978declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2979 2980 2981define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 2982; CHECK-LABEL: test_x86_avx_vperm2f128_si_256: 2983; CHECK: # BB#0: 2984; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2985; CHECK-NEXT: retl 2986 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2987 ret <8 x i32> %res 2988} 2989declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 2990 2991 2992define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 2993; CHECK-LABEL: test_x86_avx_vpermil_pd: 2994; CHECK: # BB#0: 2995; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2996; CHECK-NEXT: retl 2997 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1] 2998 ret <2 x double> %res 2999} 3000declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 3001 3002 3003define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 3004; CHECK-LABEL: test_x86_avx_vpermil_pd_256: 3005; CHECK: # BB#0: 3006; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 3007; CHECK-NEXT: retl 3008 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 3009 ret <4 x double> %res 3010} 3011declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 3012 3013 3014define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 3015; CHECK-LABEL: test_x86_avx_vpermil_ps: 3016; CHECK: # BB#0: 3017; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0] 3018; CHECK-NEXT: retl 3019 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 3020 ret <4 x float> %res 3021} 3022declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 3023 3024 3025define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 3026; CHECK-LABEL: test_x86_avx_vpermil_ps_256: 3027; CHECK: # BB#0: 3028; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4] 3029; CHECK-NEXT: retl 3030 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 3031 ret <8 x float> %res 3032} 3033declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 3034 3035 3036define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 3037; CHECK-LABEL: test_x86_avx_vpermilvar_pd: 3038; CHECK: # BB#0: 3039; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 3040; CHECK-NEXT: retl 3041 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 3042 ret <2 x double> %res 3043} 3044declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 3045 3046 3047define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 3048; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256: 3049; CHECK: # BB#0: 3050; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 3051; CHECK-NEXT: retl 3052 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 3053 ret <4 x double> %res 3054} 3055declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 3056 3057 3058define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 3059; CHECK-LABEL: test_x86_avx_vpermilvar_ps: 3060; CHECK: # BB#0: 3061; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 3062; CHECK-NEXT: retl 3063 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 3064 ret <4 x float> %res 3065} 3066define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) { 3067; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load: 3068; CHECK: # BB#0: 3069; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3070; CHECK-NEXT: vpermilps (%eax), %xmm0, %xmm0 3071; CHECK-NEXT: retl 3072 %a2 = load <4 x i32>, <4 x i32>* %a1 3073 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] 3074 ret <4 x float> %res 3075} 3076declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 3077 3078 3079define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 3080; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256: 3081; CHECK: # BB#0: 3082; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 3083; CHECK-NEXT: retl 3084 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 3085 ret <8 x float> %res 3086} 3087declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 3088 3089 3090define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 3091; CHECK-LABEL: test_x86_avx_vtestc_pd: 3092; CHECK: # BB#0: 3093; CHECK-NEXT: vtestpd %xmm1, %xmm0 3094; CHECK-NEXT: sbbl %eax, %eax 3095; CHECK-NEXT: andl $1, %eax 3096; CHECK-NEXT: retl 3097 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3098 ret i32 %res 3099} 3100declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 3101 3102 3103define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 3104; CHECK-LABEL: test_x86_avx_vtestc_pd_256: 3105; CHECK: # BB#0: 3106; CHECK-NEXT: vtestpd %ymm1, %ymm0 3107; CHECK-NEXT: sbbl %eax, %eax 3108; CHECK-NEXT: andl $1, %eax 3109; CHECK-NEXT: vzeroupper 3110; CHECK-NEXT: retl 3111 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3112 ret i32 %res 3113} 3114declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3115 3116 3117define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 3118; CHECK-LABEL: test_x86_avx_vtestc_ps: 3119; CHECK: # BB#0: 3120; CHECK-NEXT: vtestps %xmm1, %xmm0 3121; CHECK-NEXT: sbbl %eax, %eax 3122; CHECK-NEXT: andl $1, %eax 3123; CHECK-NEXT: retl 3124 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3125 ret i32 %res 3126} 3127declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 3128 3129 3130define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 3131; CHECK-LABEL: test_x86_avx_vtestc_ps_256: 3132; CHECK: # BB#0: 3133; CHECK-NEXT: vtestps %ymm1, %ymm0 3134; CHECK-NEXT: sbbl %eax, %eax 3135; CHECK-NEXT: andl $1, %eax 3136; CHECK-NEXT: vzeroupper 3137; CHECK-NEXT: retl 3138 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3139 ret i32 %res 3140} 3141declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3142 3143 3144define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 3145; CHECK-LABEL: test_x86_avx_vtestnzc_pd: 3146; CHECK: # BB#0: 3147; CHECK-NEXT: vtestpd %xmm1, %xmm0 3148; CHECK-NEXT: seta %al 3149; CHECK-NEXT: movzbl %al, %eax 3150; CHECK-NEXT: retl 3151 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3152 ret i32 %res 3153} 3154declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 3155 3156 3157define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 3158; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256: 3159; CHECK: # BB#0: 3160; CHECK-NEXT: vtestpd %ymm1, %ymm0 3161; CHECK-NEXT: seta %al 3162; CHECK-NEXT: movzbl %al, %eax 3163; CHECK-NEXT: vzeroupper 3164; CHECK-NEXT: retl 3165 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3166 ret i32 %res 3167} 3168declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3169 3170 3171define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 3172; CHECK-LABEL: test_x86_avx_vtestnzc_ps: 3173; CHECK: # BB#0: 3174; CHECK-NEXT: vtestps %xmm1, %xmm0 3175; CHECK-NEXT: seta %al 3176; CHECK-NEXT: movzbl %al, %eax 3177; CHECK-NEXT: retl 3178 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3179 ret i32 %res 3180} 3181declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 3182 3183 3184define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 3185; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256: 3186; CHECK: # BB#0: 3187; CHECK-NEXT: vtestps %ymm1, %ymm0 3188; CHECK-NEXT: seta %al 3189; CHECK-NEXT: movzbl %al, %eax 3190; CHECK-NEXT: vzeroupper 3191; CHECK-NEXT: retl 3192 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3193 ret i32 %res 3194} 3195declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3196 3197 3198define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 3199; CHECK-LABEL: test_x86_avx_vtestz_pd: 3200; CHECK: # BB#0: 3201; CHECK-NEXT: vtestpd %xmm1, %xmm0 3202; CHECK-NEXT: sete %al 3203; CHECK-NEXT: movzbl %al, %eax 3204; CHECK-NEXT: retl 3205 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3206 ret i32 %res 3207} 3208declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 3209 3210 3211define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 3212; CHECK-LABEL: test_x86_avx_vtestz_pd_256: 3213; CHECK: # BB#0: 3214; CHECK-NEXT: vtestpd %ymm1, %ymm0 3215; CHECK-NEXT: sete %al 3216; CHECK-NEXT: movzbl %al, %eax 3217; CHECK-NEXT: vzeroupper 3218; CHECK-NEXT: retl 3219 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3220 ret i32 %res 3221} 3222declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 3223 3224 3225define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 3226; CHECK-LABEL: test_x86_avx_vtestz_ps: 3227; CHECK: # BB#0: 3228; CHECK-NEXT: vtestps %xmm1, %xmm0 3229; CHECK-NEXT: sete %al 3230; CHECK-NEXT: movzbl %al, %eax 3231; CHECK-NEXT: retl 3232 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3233 ret i32 %res 3234} 3235declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 3236 3237 3238define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 3239; CHECK-LABEL: test_x86_avx_vtestz_ps_256: 3240; CHECK: # BB#0: 3241; CHECK-NEXT: vtestps %ymm1, %ymm0 3242; CHECK-NEXT: sete %al 3243; CHECK-NEXT: movzbl %al, %eax 3244; CHECK-NEXT: vzeroupper 3245; CHECK-NEXT: retl 3246 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3247 ret i32 %res 3248} 3249declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 3250 3251 3252define void @test_x86_avx_vzeroall() { 3253; CHECK-LABEL: test_x86_avx_vzeroall: 3254; CHECK: # BB#0: 3255; CHECK-NEXT: vzeroall 3256; CHECK-NEXT: vzeroupper 3257; CHECK-NEXT: retl 3258 call void @llvm.x86.avx.vzeroall() 3259 ret void 3260} 3261declare void @llvm.x86.avx.vzeroall() nounwind 3262 3263 3264define void @test_x86_avx_vzeroupper() { 3265; CHECK-LABEL: test_x86_avx_vzeroupper: 3266; CHECK: # BB#0: 3267; CHECK-NEXT: vzeroupper 3268; CHECK-NEXT: vzeroupper 3269; CHECK-NEXT: retl 3270 call void @llvm.x86.avx.vzeroupper() 3271 ret void 3272} 3273declare void @llvm.x86.avx.vzeroupper() nounwind 3274 3275; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work 3276 3277define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { 3278; CHECK-LABEL: monitor: 3279; CHECK: # BB#0: 3280; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 3281; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 3282; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3283; CHECK-NEXT: leal (%eax), %eax 3284; CHECK-NEXT: monitor 3285; CHECK-NEXT: retl 3286 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) 3287 ret void 3288} 3289declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind 3290 3291define void @mwait(i32 %E, i32 %H) nounwind { 3292; CHECK-LABEL: mwait: 3293; CHECK: # BB#0: 3294; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 3295; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3296; CHECK-NEXT: mwait 3297; CHECK-NEXT: retl 3298 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) 3299 ret void 3300} 3301declare void @llvm.x86.sse3.mwait(i32, i32) nounwind 3302 3303define void @sfence() nounwind { 3304; CHECK-LABEL: sfence: 3305; CHECK: # BB#0: 3306; CHECK-NEXT: sfence 3307; CHECK-NEXT: retl 3308 tail call void @llvm.x86.sse.sfence() 3309 ret void 3310} 3311declare void @llvm.x86.sse.sfence() nounwind 3312 3313define void @lfence() nounwind { 3314; CHECK-LABEL: lfence: 3315; CHECK: # BB#0: 3316; CHECK-NEXT: lfence 3317; CHECK-NEXT: retl 3318 tail call void @llvm.x86.sse2.lfence() 3319 ret void 3320} 3321declare void @llvm.x86.sse2.lfence() nounwind 3322 3323define void @mfence() nounwind { 3324; CHECK-LABEL: mfence: 3325; CHECK: # BB#0: 3326; CHECK-NEXT: mfence 3327; CHECK-NEXT: retl 3328 tail call void @llvm.x86.sse2.mfence() 3329 ret void 3330} 3331declare void @llvm.x86.sse2.mfence() nounwind 3332 3333define void @clflush(i8* %p) nounwind { 3334; CHECK-LABEL: clflush: 3335; CHECK: # BB#0: 3336; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3337; CHECK-NEXT: clflush (%eax) 3338; CHECK-NEXT: retl 3339 tail call void @llvm.x86.sse2.clflush(i8* %p) 3340 ret void 3341} 3342declare void @llvm.x86.sse2.clflush(i8*) nounwind 3343 3344define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { 3345; CHECK-LABEL: crc32_32_8: 3346; CHECK: # BB#0: 3347; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3348; CHECK-NEXT: crc32b {{[0-9]+}}(%esp), %eax 3349; CHECK-NEXT: retl 3350 %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) 3351 ret i32 %tmp 3352} 3353declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind 3354 3355define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { 3356; CHECK-LABEL: crc32_32_16: 3357; CHECK: # BB#0: 3358; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3359; CHECK-NEXT: crc32w {{[0-9]+}}(%esp), %eax 3360; CHECK-NEXT: retl 3361 %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) 3362 ret i32 %tmp 3363} 3364declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind 3365 3366define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { 3367; CHECK-LABEL: crc32_32_32: 3368; CHECK: # BB#0: 3369; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3370; CHECK-NEXT: crc32l {{[0-9]+}}(%esp), %eax 3371; CHECK-NEXT: retl 3372 %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) 3373 ret i32 %tmp 3374} 3375declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind 3376 3377define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { 3378; CHECK-LABEL: movnt_dq: 3379; CHECK: # BB#0: 3380; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3381; CHECK-NEXT: vpaddq LCPI282_0, %xmm0, %xmm0 3382; CHECK-NEXT: vmovntdq %ymm0, (%eax) 3383; CHECK-NEXT: vzeroupper 3384; CHECK-NEXT: retl 3385 %a2 = add <2 x i64> %a1, <i64 1, i64 1> 3386 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 3387 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind 3388 ret void 3389} 3390declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 3391 3392define void @movnt_ps(i8* %p, <8 x float> %a) nounwind { 3393; CHECK-LABEL: movnt_ps: 3394; CHECK: # BB#0: 3395; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3396; CHECK-NEXT: vmovntps %ymm0, (%eax) 3397; CHECK-NEXT: vzeroupper 3398; CHECK-NEXT: retl 3399 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind 3400 ret void 3401} 3402declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 3403 3404define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind { 3405 ; add operation forces the execution domain. 3406; CHECK-LABEL: movnt_pd: 3407; CHECK: # BB#0: 3408; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3409; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 3410; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 3411; CHECK-NEXT: vmovntpd %ymm0, (%eax) 3412; CHECK-NEXT: vzeroupper 3413; CHECK-NEXT: retl 3414 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 3415 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind 3416 ret void 3417} 3418declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 3419 3420 3421; Check for pclmulqdq 3422define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { 3423; CHECK-LABEL: test_x86_pclmulqdq: 3424; CHECK: # BB#0: 3425; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 3426; CHECK-NEXT: retl 3427 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] 3428 ret <2 x i64> %res 3429} 3430declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 3431