1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3; RUN: llc -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX 4; RUN: llc -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX 5 6; Verify that we're folding the load into the math instruction. 7; This pattern is generated out of the simplest intrinsics usage: 8; _mm_add_ss(a, _mm_load_ss(b)); 9 10define <4 x float> @addss(<4 x float> %va, float* %pb) { 11; SSE-LABEL: addss: 12; SSE: # %bb.0: 13; SSE-NEXT: addss (%rdi), %xmm0 14; SSE-NEXT: retq 15; 16; AVX-LABEL: addss: 17; AVX: # %bb.0: 18; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 19; AVX-NEXT: retq 20 %a = extractelement <4 x float> %va, i32 0 21 %b = load float, float* %pb 22 %r = fadd float %a, %b 23 %vr = insertelement <4 x float> %va, float %r, i32 0 24 ret <4 x float> %vr 25} 26 27define <2 x double> @addsd(<2 x double> %va, double* %pb) { 28; SSE-LABEL: addsd: 29; SSE: # %bb.0: 30; SSE-NEXT: addsd (%rdi), %xmm0 31; SSE-NEXT: retq 32; 33; AVX-LABEL: addsd: 34; AVX: # %bb.0: 35; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 36; AVX-NEXT: retq 37 %a = extractelement <2 x double> %va, i32 0 38 %b = load double, double* %pb 39 %r = fadd double %a, %b 40 %vr = insertelement <2 x double> %va, double %r, i32 0 41 ret <2 x double> %vr 42} 43 44define <4 x float> @subss(<4 x float> %va, float* %pb) { 45; SSE-LABEL: subss: 46; SSE: # %bb.0: 47; SSE-NEXT: subss (%rdi), %xmm0 48; SSE-NEXT: retq 49; 50; AVX-LABEL: subss: 51; AVX: # %bb.0: 52; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 53; AVX-NEXT: retq 54 %a = extractelement <4 x float> %va, i32 0 55 %b = load float, float* %pb 56 %r = fsub float %a, %b 57 %vr = insertelement <4 x float> %va, float %r, i32 0 58 ret <4 x float> %vr 59} 60 61define <2 x double> @subsd(<2 x double> %va, double* %pb) { 62; SSE-LABEL: subsd: 63; SSE: # %bb.0: 64; SSE-NEXT: subsd (%rdi), %xmm0 65; SSE-NEXT: retq 66; 67; AVX-LABEL: subsd: 68; AVX: # %bb.0: 69; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 70; AVX-NEXT: retq 71 %a = extractelement <2 x double> %va, i32 0 72 %b = load double, double* %pb 73 %r = fsub double %a, %b 74 %vr = insertelement <2 x double> %va, double %r, i32 0 75 ret <2 x double> %vr 76} 77 78define <4 x float> @mulss(<4 x float> %va, float* %pb) { 79; SSE-LABEL: mulss: 80; SSE: # %bb.0: 81; SSE-NEXT: mulss (%rdi), %xmm0 82; SSE-NEXT: retq 83; 84; AVX-LABEL: mulss: 85; AVX: # %bb.0: 86; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 87; AVX-NEXT: retq 88 %a = extractelement <4 x float> %va, i32 0 89 %b = load float, float* %pb 90 %r = fmul float %a, %b 91 %vr = insertelement <4 x float> %va, float %r, i32 0 92 ret <4 x float> %vr 93} 94 95define <2 x double> @mulsd(<2 x double> %va, double* %pb) { 96; SSE-LABEL: mulsd: 97; SSE: # %bb.0: 98; SSE-NEXT: mulsd (%rdi), %xmm0 99; SSE-NEXT: retq 100; 101; AVX-LABEL: mulsd: 102; AVX: # %bb.0: 103; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 104; AVX-NEXT: retq 105 %a = extractelement <2 x double> %va, i32 0 106 %b = load double, double* %pb 107 %r = fmul double %a, %b 108 %vr = insertelement <2 x double> %va, double %r, i32 0 109 ret <2 x double> %vr 110} 111 112define <4 x float> @divss(<4 x float> %va, float* %pb) { 113; SSE-LABEL: divss: 114; SSE: # %bb.0: 115; SSE-NEXT: divss (%rdi), %xmm0 116; SSE-NEXT: retq 117; 118; AVX-LABEL: divss: 119; AVX: # %bb.0: 120; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 121; AVX-NEXT: retq 122 %a = extractelement <4 x float> %va, i32 0 123 %b = load float, float* %pb 124 %r = fdiv float %a, %b 125 %vr = insertelement <4 x float> %va, float %r, i32 0 126 ret <4 x float> %vr 127} 128 129define <2 x double> @divsd(<2 x double> %va, double* %pb) { 130; SSE-LABEL: divsd: 131; SSE: # %bb.0: 132; SSE-NEXT: divsd (%rdi), %xmm0 133; SSE-NEXT: retq 134; 135; AVX-LABEL: divsd: 136; AVX: # %bb.0: 137; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 138; AVX-NEXT: retq 139 %a = extractelement <2 x double> %va, i32 0 140 %b = load double, double* %pb 141 %r = fdiv double %a, %b 142 %vr = insertelement <2 x double> %va, double %r, i32 0 143 ret <2 x double> %vr 144} 145