• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+v8.2a,+fullfp16 -fp-contract=fast  | FileCheck %s
2
3define half @test_FMULADDH_OP1(half %a, half %b, half %c) {
4; CHECK-LABEL: test_FMULADDH_OP1:
5; CHECK: fmadd    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
6entry:
7  %mul = fmul fast half %c, %b
8  %add = fadd fast half %mul, %a
9  ret half %add
10}
11
12define half @test_FMULADDH_OP2(half %a, half %b, half %c) {
13; CHECK-LABEL: test_FMULADDH_OP2:
14; CHECK: fmadd    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
15entry:
16  %mul = fmul fast half %c, %b
17  %add = fadd fast half %a, %mul
18  ret half %add
19}
20
21define half @test_FMULSUBH_OP1(half %a, half %b, half %c) {
22; CHECK-LABEL: test_FMULSUBH_OP1:
23; CHECK: fnmsub    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
24entry:
25  %mul = fmul fast half %c, %b
26  %sub = fsub fast half %mul, %a
27  ret half %sub
28}
29
30define half @test_FMULSUBH_OP2(half %a, half %b, half %c) {
31; CHECK-LABEL: test_FMULSUBH_OP2:
32; CHECK: fmsub    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
33entry:
34  %mul = fmul fast half %c, %b
35  %add = fsub fast half %a, %mul
36  ret half %add
37}
38
39define half @test_FNMULSUBH_OP1(half %a, half %b, half %c) {
40; CHECK-LABEL: test_FNMULSUBH_OP1:
41; CHECK: fnmadd    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
42entry:
43  %mul = fmul fast half %c, %b
44  %neg = fsub fast half -0.0, %mul
45  %add = fsub fast half %neg, %a
46  ret half %add
47}
48
49define <4 x half> @test_FMLAv4f16_OP1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
50; CHECK-LABEL: test_FMLAv4f16_OP1:
51; CHECK: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
52entry:
53  %mul = fmul fast <4 x half> %c, %b
54  %add = fadd fast <4 x half> %mul, %a
55  ret <4 x half> %add
56}
57
58define <4 x half> @test_FMLAv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
59; CHECK-LABEL: test_FMLAv4f16_OP2:
60; CHECK: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
61entry:
62  %mul = fmul fast <4 x half> %c, %b
63  %add = fadd fast <4 x half> %a, %mul
64  ret <4 x half> %add
65}
66
67define <8 x half> @test_FMLAv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
68; CHECK-LABEL: test_FMLAv8f16_OP1:
69; CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
70entry:
71  %mul = fmul fast <8 x half> %c, %b
72  %add = fadd fast <8 x half> %mul, %a
73  ret <8 x half> %add
74}
75
76define <8 x half> @test_FMLAv8f16_OP2(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
77; CHECK-LABEL: test_FMLAv8f16_OP2:
78; CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
79entry:
80  %mul = fmul fast <8 x half> %c, %b
81  %add = fadd fast <8 x half> %a, %mul
82  ret <8 x half> %add
83}
84
85define <4 x half> @test_FMLAv4i16_indexed_OP1(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
86; CHECK-LABEL: test_FMLAv4i16_indexed_OP1:
87; CHECK-FIXME: Currently LLVM produces inefficient code:
88; CHECK: mul
89; CHECK: fadd
90; CHECK-FIXME: It should instead produce the following instruction:
91; CHECK-FIXME: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
92entry:
93  %mul = mul <4 x i16> %c, %b
94  %m = bitcast <4 x i16> %mul to <4 x half>
95  %add = fadd fast <4 x half> %m, %a
96  ret <4 x half> %add
97}
98
99define <4 x half> @test_FMLAv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
100; CHECK-LABEL: test_FMLAv4i16_indexed_OP2:
101; CHECK-FIXME: Currently LLVM produces inefficient code:
102; CHECK: mul
103; CHECK: fadd
104; CHECK-FIXME: It should instead produce the following instruction:
105; CHECK-FIXME: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
106entry:
107  %mul = mul <4 x i16> %c, %b
108  %m = bitcast <4 x i16> %mul to <4 x half>
109  %add = fadd fast <4 x half> %a, %m
110  ret <4 x half> %add
111}
112
113define <8 x half> @test_FMLAv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
114; CHECK-LABEL: test_FMLAv8i16_indexed_OP1:
115; CHECK-FIXME: Currently LLVM produces inefficient code:
116; CHECK: mul
117; CHECK: fadd
118; CHECK-FIXME: It should instead produce the following instruction:
119; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
120entry:
121  %mul = mul <8 x i16> %c, %b
122  %m = bitcast <8 x i16> %mul to <8 x half>
123  %add = fadd fast <8 x half> %m, %a
124  ret <8 x half> %add
125}
126
127define <8 x half> @test_FMLAv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
128; CHECK-LABEL: test_FMLAv8i16_indexed_OP2:
129; CHECK-FIXME: Currently LLVM produces inefficient code:
130; CHECK: mul
131; CHECK: fadd
132; CHECK-FIXME: It should instead produce the following instruction:
133; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
134entry:
135  %mul = mul <8 x i16> %c, %b
136  %m = bitcast <8 x i16> %mul to <8 x half>
137  %add = fadd fast <8 x half> %a, %m
138  ret <8 x half> %add
139}
140
141define <4 x half> @test_FMLSv4f16_OP1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
142; CHECK-LABEL: test_FMLSv4f16_OP1:
143; CHECK: fneg    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
144; CHECK: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
145entry:
146  %mul = fmul fast <4 x half> %c, %b
147  %sub = fsub fast <4 x half> %mul, %a
148  ret <4 x half> %sub
149}
150
151define <4 x half> @test_FMLSv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
152; CHECK-LABEL: test_FMLSv4f16_OP2:
153; CHECK: fmls    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
154entry:
155  %mul = fmul fast <4 x half> %c, %b
156  %sub = fsub fast <4 x half> %a, %mul
157  ret <4 x half> %sub
158}
159
160define <8 x half> @test_FMLSv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
161; CHECK-LABEL: test_FMLSv8f16_OP1:
162; CHECK: fneg    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
163; CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
164entry:
165  %mul = fmul fast <8 x half> %c, %b
166  %sub = fsub fast <8 x half> %mul, %a
167  ret <8 x half> %sub
168}
169
170define <8 x half> @test_FMLSv8f16_OP2(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
171; CHECK-LABEL: test_FMLSv8f16_OP2:
172; CHECK: fmls    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
173entry:
174  %mul = fmul fast <8 x half> %c, %b
175  %sub = fsub fast <8 x half> %a, %mul
176  ret <8 x half> %sub
177}
178
179define <4 x half> @test_FMLSv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
180; CHECK-LABEL: test_FMLSv4i16_indexed_OP2:
181; CHECK-FIXME: Currently LLVM produces inefficient code:
182; CHECK: mul
183; CHECK: fsub
184; CHECK-FIXME: It should instead produce the following instruction:
185; CHECK-FIXME: fmls    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
186entry:
187  %mul = mul <4 x i16> %c, %b
188  %m = bitcast <4 x i16> %mul to <4 x half>
189  %sub = fsub fast <4 x half> %a, %m
190  ret <4 x half> %sub
191}
192
193define <8 x half> @test_FMLSv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
194; CHECK-LABEL: test_FMLSv8i16_indexed_OP1:
195; CHECK-FIXME: Currently LLVM produces inefficient code:
196; CHECK: mul
197; CHECK: fsub
198; CHECK-FIXME: It should instead produce the following instruction:
199; CHECK-FIXME: fneg    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
200; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
201entry:
202  %mul = mul <8 x i16> %c, %b
203  %m = bitcast <8 x i16> %mul to <8 x half>
204  %sub = fsub fast <8 x half> %m, %a
205  ret <8 x half> %sub
206}
207
208define <8 x half> @test_FMLSv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
209; CHECK-LABEL: test_FMLSv8i16_indexed_OP2:
210; CHECK-FIXME: Currently LLVM produces inefficient code:
211; CHECK: mul
212; CHECK: fsub
213; CHECK-FIXME: It should instead produce the following instruction:
214; CHECK-FIXME: fmls    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
215entry:
216  %mul = mul <8 x i16> %c, %b
217  %m = bitcast <8 x i16> %mul to <8 x half>
218  %sub = fsub fast <8 x half> %a, %m
219  ret <8 x half> %sub
220}
221