1; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s 2 3; Verify that we're folding the load into the math instruction. 4 5; FIXME: The folding should also happen without the avx attribute; 6; ie, when generating SSE (non-VEX-prefixed) instructions. 7 8define float @rcpss(float* %a) { 9; CHECK-LABEL: rcpss: 10; CHECK: vrcpss (%rdi), %xmm0, %xmm0 11 12 %ld = load float, float* %a 13 %ins = insertelement <4 x float> undef, float %ld, i32 0 14 %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins) 15 %ext = extractelement <4 x float> %res, i32 0 16 ret float %ext 17} 18 19define float @rsqrtss(float* %a) { 20; CHECK-LABEL: rsqrtss: 21; CHECK: vrsqrtss (%rdi), %xmm0, %xmm0 22 23 %ld = load float, float* %a 24 %ins = insertelement <4 x float> undef, float %ld, i32 0 25 %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins) 26 %ext = extractelement <4 x float> %res, i32 0 27 ret float %ext 28} 29 30define float @sqrtss(float* %a) { 31; CHECK-LABEL: sqrtss: 32; CHECK: vsqrtss (%rdi), %xmm0, %xmm0 33 34 %ld = load float, float* %a 35 %ins = insertelement <4 x float> undef, float %ld, i32 0 36 %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins) 37 %ext = extractelement <4 x float> %res, i32 0 38 ret float %ext 39} 40 41define double @sqrtsd(double* %a) { 42; CHECK-LABEL: sqrtsd: 43; CHECK: vsqrtsd (%rdi), %xmm0, %xmm0 44 45 %ld = load double, double* %a 46 %ins = insertelement <2 x double> undef, double %ld, i32 0 47 %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins) 48 %ext = extractelement <2 x double> %res, i32 0 49 ret double %ext 50} 51 52 53declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 54declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 55declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 56declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 57 58