1; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 4;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 5 %tmp1 = fmul <2 x float> %A, %B; 6 %tmp2 = fadd <2 x float> %C, %tmp1; 7 ret <2 x float> %tmp2 8} 9 10define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 11;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 12 %tmp1 = fmul <4 x float> %A, %B; 13 %tmp2 = fadd <4 x float> %C, %tmp1; 14 ret <4 x float> %tmp2 15} 16 17define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 18;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 19 %tmp1 = fmul <2 x double> %A, %B; 20 %tmp2 = fadd <2 x double> %C, %tmp1; 21 ret <2 x double> %tmp2 22} 23 24 25define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 26;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 27 %tmp1 = fmul <2 x float> %A, %B; 28 %tmp2 = fsub <2 x float> %C, %tmp1; 29 ret <2 x float> %tmp2 30} 31 32define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 33;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 34 %tmp1 = fmul <4 x float> %A, %B; 35 %tmp2 = fsub <4 x float> %C, %tmp1; 36 ret <4 x float> %tmp2 37} 38 39define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 40;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 41 %tmp1 = fmul <2 x double> %A, %B; 42 %tmp2 = fsub <2 x double> %C, %tmp1; 43 ret <2 x double> %tmp2 44} 45 46 47; Another set of tests for when the intrinsic is used. 48 49declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 50declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 51declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 52 53define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 54;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 55 %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C) 56 ret <2 x float> %val 57} 58 59define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 60;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 61 %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C) 62 ret <4 x float> %val 63} 64 65define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 66;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 67 %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C) 68 ret <2 x double> %val 69} 70 71define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 72;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 73 %negA = fsub <2 x float> <float -0.0, float -0.0>, %A 74 %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C) 75 ret <2 x float> %val 76} 77 78define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 79;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 80 %negA = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %A 81 %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C) 82 ret <4 x float> %val 83} 84 85define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 86;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 87 %negA = fsub <2 x double> <double -0.0, double -0.0>, %A 88 %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C) 89 ret <2 x double> %val 90} 91 92declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) 93declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) 94declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) 95 96define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 97;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 98 %val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C) 99 ret <2 x float> %val 100} 101 102define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { 103;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 104 %val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C) 105 ret <4 x float> %val 106} 107 108define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 109;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 110 %val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C) 111 ret <2 x double> %val 112} 113 114 115; Another set of tests that check for multiply single use 116 117define <2 x float> @fmla2xfloati_su(<2 x float> %A, <2 x float> %B, <2 x float> %C) { 118;CHECK-NOT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 119 %tmp1 = fmul <2 x float> %A, %B; 120 %tmp2 = fadd <2 x float> %C, %tmp1; 121 %tmp3 = fadd <2 x float> %tmp2, %tmp1; 122 ret <2 x float> %tmp3 123} 124 125define <2 x double> @fmls2xdouble_su(<2 x double> %A, <2 x double> %B, <2 x double> %C) { 126;CHECK-NOT: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 127 %tmp1 = fmul <2 x double> %A, %B; 128 %tmp2 = fsub <2 x double> %C, %tmp1; 129 %tmp3 = fsub <2 x double> %tmp2, %tmp1; 130 ret <2 x double> %tmp3 131} 132 133