1; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+v8.2a,+fullfp16 -fp-contract=fast | FileCheck %s 2 3define half @test_FMULADDH_OP1(half %a, half %b, half %c) { 4; CHECK-LABEL: test_FMULADDH_OP1: 5; CHECK: fmadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} 6entry: 7 %mul = fmul fast half %c, %b 8 %add = fadd fast half %mul, %a 9 ret half %add 10} 11 12define half @test_FMULADDH_OP2(half %a, half %b, half %c) { 13; CHECK-LABEL: test_FMULADDH_OP2: 14; CHECK: fmadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} 15entry: 16 %mul = fmul fast half %c, %b 17 %add = fadd fast half %a, %mul 18 ret half %add 19} 20 21define half @test_FMULSUBH_OP1(half %a, half %b, half %c) { 22; CHECK-LABEL: test_FMULSUBH_OP1: 23; CHECK: fnmsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} 24entry: 25 %mul = fmul fast half %c, %b 26 %sub = fsub fast half %mul, %a 27 ret half %sub 28} 29 30define half @test_FMULSUBH_OP2(half %a, half %b, half %c) { 31; CHECK-LABEL: test_FMULSUBH_OP2: 32; CHECK: fmsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} 33entry: 34 %mul = fmul fast half %c, %b 35 %add = fsub fast half %a, %mul 36 ret half %add 37} 38 39define half @test_FNMULSUBH_OP1(half %a, half %b, half %c) { 40; CHECK-LABEL: test_FNMULSUBH_OP1: 41; CHECK: fnmadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} 42entry: 43 %mul = fmul fast half %c, %b 44 %neg = fsub fast half -0.0, %mul 45 %add = fsub fast half %neg, %a 46 ret half %add 47} 48 49define <4 x half> @test_FMLAv4f16_OP1(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 50; CHECK-LABEL: test_FMLAv4f16_OP1: 51; CHECK: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 52entry: 53 %mul = fmul fast <4 x half> %c, %b 54 %add = fadd fast <4 x half> %mul, %a 55 ret <4 x half> %add 56} 57 58define <4 x half> @test_FMLAv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 59; CHECK-LABEL: test_FMLAv4f16_OP2: 60; CHECK: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 61entry: 62 %mul = fmul fast <4 x half> %c, %b 63 %add = fadd fast <4 x half> %a, %mul 64 ret <4 x half> %add 65} 66 67define <8 x half> @test_FMLAv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 68; CHECK-LABEL: test_FMLAv8f16_OP1: 69; CHECK: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 70entry: 71 %mul = fmul fast <8 x half> %c, %b 72 %add = fadd fast <8 x half> %mul, %a 73 ret <8 x half> %add 74} 75 76define <8 x half> @test_FMLAv8f16_OP2(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 77; CHECK-LABEL: test_FMLAv8f16_OP2: 78; CHECK: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 79entry: 80 %mul = fmul fast <8 x half> %c, %b 81 %add = fadd fast <8 x half> %a, %mul 82 ret <8 x half> %add 83} 84 85define <4 x half> @test_FMLAv4i16_indexed_OP1(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) { 86; CHECK-LABEL: test_FMLAv4i16_indexed_OP1: 87; CHECK-FIXME: Currently LLVM produces inefficient code: 88; CHECK: mul 89; CHECK: fadd 90; CHECK-FIXME: It should instead produce the following instruction: 91; CHECK-FIXME: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 92entry: 93 %mul = mul <4 x i16> %c, %b 94 %m = bitcast <4 x i16> %mul to <4 x half> 95 %add = fadd fast <4 x half> %m, %a 96 ret <4 x half> %add 97} 98 99define <4 x half> @test_FMLAv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) { 100; CHECK-LABEL: test_FMLAv4i16_indexed_OP2: 101; CHECK-FIXME: Currently LLVM produces inefficient code: 102; CHECK: mul 103; CHECK: fadd 104; CHECK-FIXME: It should instead produce the following instruction: 105; CHECK-FIXME: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 106entry: 107 %mul = mul <4 x i16> %c, %b 108 %m = bitcast <4 x i16> %mul to <4 x half> 109 %add = fadd fast <4 x half> %a, %m 110 ret <4 x half> %add 111} 112 113define <8 x half> @test_FMLAv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) { 114; CHECK-LABEL: test_FMLAv8i16_indexed_OP1: 115; CHECK-FIXME: Currently LLVM produces inefficient code: 116; CHECK: mul 117; CHECK: fadd 118; CHECK-FIXME: It should instead produce the following instruction: 119; CHECK-FIXME: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 120entry: 121 %mul = mul <8 x i16> %c, %b 122 %m = bitcast <8 x i16> %mul to <8 x half> 123 %add = fadd fast <8 x half> %m, %a 124 ret <8 x half> %add 125} 126 127define <8 x half> @test_FMLAv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) { 128; CHECK-LABEL: test_FMLAv8i16_indexed_OP2: 129; CHECK-FIXME: Currently LLVM produces inefficient code: 130; CHECK: mul 131; CHECK: fadd 132; CHECK-FIXME: It should instead produce the following instruction: 133; CHECK-FIXME: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 134entry: 135 %mul = mul <8 x i16> %c, %b 136 %m = bitcast <8 x i16> %mul to <8 x half> 137 %add = fadd fast <8 x half> %a, %m 138 ret <8 x half> %add 139} 140 141define <4 x half> @test_FMLSv4f16_OP1(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 142; CHECK-LABEL: test_FMLSv4f16_OP1: 143; CHECK: fneg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 144; CHECK: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 145entry: 146 %mul = fmul fast <4 x half> %c, %b 147 %sub = fsub fast <4 x half> %mul, %a 148 ret <4 x half> %sub 149} 150 151define <4 x half> @test_FMLSv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 152; CHECK-LABEL: test_FMLSv4f16_OP2: 153; CHECK: fmls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 154entry: 155 %mul = fmul fast <4 x half> %c, %b 156 %sub = fsub fast <4 x half> %a, %mul 157 ret <4 x half> %sub 158} 159 160define <8 x half> @test_FMLSv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 161; CHECK-LABEL: test_FMLSv8f16_OP1: 162; CHECK: fneg {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 163; CHECK: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 164entry: 165 %mul = fmul fast <8 x half> %c, %b 166 %sub = fsub fast <8 x half> %mul, %a 167 ret <8 x half> %sub 168} 169 170define <8 x half> @test_FMLSv8f16_OP2(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 171; CHECK-LABEL: test_FMLSv8f16_OP2: 172; CHECK: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 173entry: 174 %mul = fmul fast <8 x half> %c, %b 175 %sub = fsub fast <8 x half> %a, %mul 176 ret <8 x half> %sub 177} 178 179define <4 x half> @test_FMLSv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) { 180; CHECK-LABEL: test_FMLSv4i16_indexed_OP2: 181; CHECK-FIXME: Currently LLVM produces inefficient code: 182; CHECK: mul 183; CHECK: fsub 184; CHECK-FIXME: It should instead produce the following instruction: 185; CHECK-FIXME: fmls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 186entry: 187 %mul = mul <4 x i16> %c, %b 188 %m = bitcast <4 x i16> %mul to <4 x half> 189 %sub = fsub fast <4 x half> %a, %m 190 ret <4 x half> %sub 191} 192 193define <8 x half> @test_FMLSv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) { 194; CHECK-LABEL: test_FMLSv8i16_indexed_OP1: 195; CHECK-FIXME: Currently LLVM produces inefficient code: 196; CHECK: mul 197; CHECK: fsub 198; CHECK-FIXME: It should instead produce the following instruction: 199; CHECK-FIXME: fneg {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 200; CHECK-FIXME: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 201entry: 202 %mul = mul <8 x i16> %c, %b 203 %m = bitcast <8 x i16> %mul to <8 x half> 204 %sub = fsub fast <8 x half> %m, %a 205 ret <8 x half> %sub 206} 207 208define <8 x half> @test_FMLSv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) { 209; CHECK-LABEL: test_FMLSv8i16_indexed_OP2: 210; CHECK-FIXME: Currently LLVM produces inefficient code: 211; CHECK: mul 212; CHECK: fsub 213; CHECK-FIXME: It should instead produce the following instruction: 214; CHECK-FIXME: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 215entry: 216 %mul = mul <8 x i16> %c, %b 217 %m = bitcast <8 x i16> %mul to <8 x half> 218 %sub = fsub fast <8 x half> %a, %m 219 ret <8 x half> %sub 220} 221