1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16  | FileCheck %s
3
4declare half @llvm.aarch64.neon.fmulx.f16(half, half)
5declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>)
6declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>)
7declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
8declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
9declare half @llvm.fma.f16(half, half, half) #1
10
11define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
12; CHECK-LABEL: t_vfma_lane_f16:
13; CHECK-NEXT:    .cfi_startproc
14; CHECK-NEXT:  // %bb.0: // %entry
15; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
16; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
17; CHECK-NEXT:    ret
18entry:
19  %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
20  %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a)
21  ret <4 x half> %fmla3
22}
23
24define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
25; CHECK-LABEL: t_vfmaq_lane_f16:
26; CHECK-NEXT:    .cfi_startproc
27; CHECK-NEXT:  // %bb.0: // %entry
28; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
29; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
30; CHECK-NEXT:    ret
31entry:
32  %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
33  %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %lane1, <8 x half> %a)
34  ret <8 x half> %fmla3
35}
36
37define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
38; CHECK-LABEL: t_vfma_laneq_f16:
39; CHECK-NEXT:    .cfi_startproc
40; CHECK-NEXT:  // %bb.0: // %entry
41; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
42; CHECK-NEXT:    ret
43entry:
44  %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
45  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %b, <4 x half> %a)
46  ret <4 x half> %0
47}
48
49define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
50; CHECK-LABEL: t_vfmaq_laneq_f16:
51; CHECK-NEXT:    .cfi_startproc
52; CHECK-NEXT:  // %bb.0: // %entry
53; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
54; CHECK-NEXT:    ret
55entry:
56  %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
57  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %b, <8 x half> %a)
58  ret <8 x half> %0
59}
60
61define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
62; CHECK-LABEL: t_vfma_n_f16:
63; CHECK-NEXT:    .cfi_startproc
64; CHECK-NEXT:  // %bb.0: // %entry
65; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
66; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
67; CHECK-NEXT:    ret
68entry:
69  %vecinit = insertelement <4 x half> undef, half %c, i32 0
70  %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
71  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %vecinit3, <4 x half> %a) #4
72  ret <4 x half> %0
73}
74
75define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
76; CHECK-LABEL: t_vfmaq_n_f16:
77; CHECK-NEXT:    .cfi_startproc
78; CHECK-NEXT:  // %bb.0: // %entry
79; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
80; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
81; CHECK-NEXT:    ret
82entry:
83  %vecinit = insertelement <8 x half> undef, half %c, i32 0
84  %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
85  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %vecinit7, <8 x half> %a) #4
86  ret <8 x half> %0
87}
88
89define dso_local half @t_vfmah_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
90; CHECK-LABEL: t_vfmah_lane_f16:
91; CHECK-NEXT:    .cfi_startproc
92; CHECK-NEXT:  // %bb.0: // %entry
93; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
94; CHECK-NEXT:    fmla h0, h1, v2.h[0]
95; CHECK-NEXT:    ret
96entry:
97  %extract = extractelement <4 x half> %c, i32 0
98  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
99  ret half %0
100}
101
102define dso_local half @t_vfmah_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
103; CHECK-LABEL: t_vfmah_laneq_f16:
104; CHECK-NEXT:    .cfi_startproc
105; CHECK-NEXT:  // %bb.0: // %entry
106; CHECK-NEXT:    fmla h0, h1, v2.h[0]
107; CHECK-NEXT:    ret
108entry:
109  %extract = extractelement <8 x half> %c, i32 0
110  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
111  ret half %0
112}
113
114define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
115; CHECK-LABEL: t_vfms_lane_f16:
116; CHECK-NEXT:    .cfi_startproc
117; CHECK-NEXT:  // %bb.0: // %entry
118; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
119; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
120; CHECK-NEXT:    ret
121entry:
122  %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
123  %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
124  %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %lane1, <4 x half> %a)
125  ret <4 x half> %fmla3
126}
127
128define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
129; CHECK-LABEL: t_vfmsq_lane_f16:
130; CHECK-NEXT:    .cfi_startproc
131; CHECK-NEXT:  // %bb.0: // %entry
132; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
133; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
134; CHECK-NEXT:    ret
135entry:
136  %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
137  %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
138  %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %lane1, <8 x half> %a)
139  ret <8 x half> %fmla3
140}
141
142define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
143; CHECK-LABEL: t_vfms_laneq_f16:
144; CHECK-NEXT:    .cfi_startproc
145; CHECK-NEXT:  // %bb.0: // %entry
146; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
147; CHECK-NEXT:    ret
148entry:
149  %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
150  %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
151  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %sub, <4 x half> %a)
152  ret <4 x half> %0
153}
154
155define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
156; CHECK-LABEL: t_vfmsq_laneq_f16:
157; CHECK-NEXT:    .cfi_startproc
158; CHECK-NEXT:  // %bb.0: // %entry
159; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
160; CHECK-NEXT:    ret
161entry:
162  %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
163  %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
164  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %sub, <8 x half> %a)
165  ret <8 x half> %0
166}
167
168define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
169; CHECK-LABEL: t_vfms_n_f16:
170; CHECK-NEXT:    .cfi_startproc
171; CHECK-NEXT:  // %bb.0: // %entry
172; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
173; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
174; CHECK-NEXT:    ret
175entry:
176  %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
177  %vecinit = insertelement <4 x half> undef, half %c, i32 0
178  %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
179  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %vecinit3, <4 x half> %a) #4
180  ret <4 x half> %0
181}
182
183define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
184; CHECK-LABEL: t_vfmsq_n_f16:
185; CHECK-NEXT:    .cfi_startproc
186; CHECK-NEXT:  // %bb.0: // %entry
187; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
188; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
189; CHECK-NEXT:    ret
190entry:
191  %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
192  %vecinit = insertelement <8 x half> undef, half %c, i32 0
193  %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
194  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %vecinit7, <8 x half> %a) #4
195  ret <8 x half> %0
196}
197
198define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) {
199; CHECK-LABEL: t_vfmsh_lane_f16:
200; CHECK-NEXT:    .cfi_startproc
201; CHECK-NEXT:  // %bb.0: // %entry
202; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
203; CHECK-NEXT:    fmls h0, h1, v2.h[0]
204; CHECK-NEXT:    ret
205entry:
206  %0 = fsub half 0xH8000, %b
207  %extract = extractelement <4 x half> %c, i32 0
208  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
209  ret half %1
210}
211
212define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) {
213; CHECK-LABEL: t_vfmsh_laneq_f16:
214; CHECK-NEXT:    .cfi_startproc
215; CHECK-NEXT:  // %bb.0: // %entry
216; CHECK-NEXT:    fmls h0, h1, v2.h[0]
217; CHECK-NEXT:    ret
218entry:
219  %0 = fsub half 0xH8000, %b
220  %extract = extractelement <8 x half> %c, i32 0
221  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
222  ret half %1
223}
224
225define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
226; CHECK-LABEL: t_vmul_laneq_f16:
227; CHECK-NEXT:    .cfi_startproc
228; CHECK-NEXT:  // %bb.0: // %entry
229; CHECK-NEXT:    fmul v0.4h, v0.4h, v1.h[0]
230; CHECK-NEXT:    ret
231entry:
232  %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer
233  %mul = fmul <4 x half> %shuffle, %a
234  ret <4 x half> %mul
235}
236
237define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
238; CHECK-LABEL: t_vmulq_laneq_f16:
239; CHECK-NEXT:    .cfi_startproc
240; CHECK-NEXT:  // %bb.0: // %entry
241; CHECK-NEXT:    fmul v0.8h, v0.8h, v1.h[0]
242; CHECK-NEXT:    ret
243entry:
244  %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer
245  %mul = fmul <8 x half> %shuffle, %a
246  ret <8 x half> %mul
247}
248
249define dso_local half @t_vmulh_lane_f16(half %a, <4 x half> %c, i32 %lane) {
250; CHECK-LABEL: t_vmulh_lane_f16:
251; CHECK-NEXT:    .cfi_startproc
252; CHECK-NEXT:  // %bb.0: // %entry
253; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
254; CHECK-NEXT:    fmul h0, h0, v1.h[0]
255; CHECK-NEXT:    ret
256entry:
257  %0 = extractelement <4 x half> %c, i32 0
258  %1 = fmul half %0, %a
259  ret half %1
260}
261
262define dso_local half @t_vmulh_laneq_f16(half %a, <8 x half> %c, i32 %lane) {
263; CHECK-LABEL: t_vmulh_laneq_f16:
264; CHECK-NEXT:    .cfi_startproc
265; CHECK-NEXT:  // %bb.0: // %entry
266; CHECK-NEXT:    fmul h0, h0, v1.h[0]
267; CHECK-NEXT:    ret
268entry:
269  %0 = extractelement <8 x half> %c, i32 0
270  %1 = fmul half %0, %a
271  ret half %1
272}
273
274define dso_local half @t_vmulx_f16(half %a, half %b) {
275; CHECK-LABEL: t_vmulx_f16:
276; CHECK-NEXT:    .cfi_startproc
277; CHECK-NEXT:  // %bb.0: // %entry
278; CHECK-NEXT:    fmulx h0, h0, h1
279; CHECK-NEXT:    ret
280entry:
281  %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b)
282  ret half %fmulx.i
283}
284
285define dso_local half @t_vmulxh_lane_f16(half %a, <4 x half> %b, i32 %lane) {
286; CHECK-LABEL: t_vmulxh_lane_f16:
287; CHECK-NEXT:    .cfi_startproc
288; CHECK-NEXT:  // %bb.0: // %entry
289; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
290; CHECK-NEXT:    fmulx h0, h0, v1.h[3]
291; CHECK-NEXT:    ret
292entry:
293  %extract = extractelement <4 x half> %b, i32 3
294  %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
295  ret half %fmulx.i
296}
297
298define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) {
299; CHECK-LABEL: t_vmulx_lane_f16:
300; CHECK-NEXT:    .cfi_startproc
301; CHECK-NEXT:  // %bb.0: // %entry
302; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
303; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.h[0]
304; CHECK-NEXT:    ret
305entry:
306  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer
307  %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4
308  ret <4 x half> %vmulx2.i
309}
310
311define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) {
312; CHECK-LABEL: t_vmulxq_lane_f16:
313; CHECK-NEXT:    .cfi_startproc
314; CHECK-NEXT:  // %bb.0: // %entry
315; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
316; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.h[0]
317; CHECK-NEXT:    ret
318entry:
319  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> zeroinitializer
320  %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4
321  ret <8 x half> %vmulx2.i
322}
323
324define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
325; CHECK-LABEL: t_vmulx_laneq_f16:
326; CHECK-NEXT:    .cfi_startproc
327; CHECK-NEXT:  // %bb.0: // %entry
328; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.h[0]
329; CHECK-NEXT:    ret
330entry:
331  %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer
332  %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4
333  ret <4 x half> %vmulx2.i
334}
335
336define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
337; CHECK-LABEL: t_vmulxq_laneq_f16:
338; CHECK-NEXT:    .cfi_startproc
339; CHECK-NEXT:  // %bb.0: // %entry
340; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.h[0]
341; CHECK-NEXT:    ret
342entry:
343  %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer
344  %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4
345  ret <8 x half> %vmulx2.i
346}
347
348define dso_local half @t_vmulxh_laneq_f16(half %a, <8 x half> %b, i32 %lane) {
349; CHECK-LABEL: t_vmulxh_laneq_f16:
350; CHECK-NEXT:    .cfi_startproc
351; CHECK-NEXT:  // %bb.0: // %entry
352; CHECK-NEXT:    fmulx h0, h0, v1.h[7]
353; CHECK-NEXT:    ret
354entry:
355  %extract = extractelement <8 x half> %b, i32 7
356  %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
357  ret half %fmulx.i
358}
359
360define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) {
361; CHECK-LABEL: t_vmulx_n_f16:
362; CHECK-NEXT:    .cfi_startproc
363; CHECK-NEXT:  // %bb.0: // %entry
364; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
365; CHECK-NEXT:    dup v1.4h, v1.h[0]
366; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.4h
367; CHECK-NEXT:    ret
368entry:
369  %vecinit = insertelement <4 x half> undef, half %c, i32 0
370  %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
371  %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %vecinit3) #4
372  ret <4 x half> %vmulx2.i
373}
374
375define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) {
376; CHECK-LABEL: t_vmulxq_n_f16:
377; CHECK-NEXT:    .cfi_startproc
378; CHECK-NEXT:  // %bb.0: // %entry
379; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
380; CHECK-NEXT:    dup v1.8h, v1.h[0]
381; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.8h
382; CHECK-NEXT:    ret
383entry:
384  %vecinit = insertelement <8 x half> undef, half %c, i32 0
385  %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
386  %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %vecinit7) #4
387  ret <8 x half> %vmulx2.i
388}
389
390define dso_local half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) {
391; CHECK-LABEL: t_vfmah_lane3_f16:
392; CHECK-NEXT:    .cfi_startproc
393; CHECK-NEXT:  // %bb.0: // %entry
394; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
395; CHECK-NEXT:    fmla h0, h1, v2.h[3]
396; CHECK-NEXT:    ret
397entry:
398  %extract = extractelement <4 x half> %c, i32 3
399  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
400  ret half %0
401}
402
403define dso_local half @t_vfmah_laneq7_f16(half %a, half %b, <8 x half> %c) {
404; CHECK-LABEL: t_vfmah_laneq7_f16:
405; CHECK-NEXT:    .cfi_startproc
406; CHECK-NEXT:  // %bb.0: // %entry
407; CHECK-NEXT:    fmla h0, h1, v2.h[7]
408; CHECK-NEXT:    ret
409entry:
410  %extract = extractelement <8 x half> %c, i32 7
411  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
412  ret half %0
413}
414
415define dso_local half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) {
416; CHECK-LABEL: t_vfmsh_lane3_f16:
417; CHECK-NEXT:    .cfi_startproc
418; CHECK-NEXT:  // %bb.0: // %entry
419; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
420; CHECK-NEXT:    fmls h0, h1, v2.h[3]
421; CHECK-NEXT:    ret
422entry:
423  %0 = fsub half 0xH8000, %b
424  %extract = extractelement <4 x half> %c, i32 3
425  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
426  ret half %1
427}
428
429define dso_local half @t_vfmsh_laneq7_f16(half %a, half %b, <8 x half> %c) {
430; CHECK-LABEL: t_vfmsh_laneq7_f16:
431; CHECK-NEXT:    .cfi_startproc
432; CHECK-NEXT:  // %bb.0: // %entry
433; CHECK-NEXT:    fmls h0, h1, v2.h[7]
434; CHECK-NEXT:    ret
435entry:
436  %0 = fsub half 0xH8000, %b
437  %extract = extractelement <8 x half> %c, i32 7
438  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
439  ret half %1
440}
441
442define dso_local half @t_fadd_vfmah_f16(half %a, half %b, <4 x half> %c, <4 x half> %d) {
443; CHECK-LABEL: t_fadd_vfmah_f16:
444; CHECK-NEXT:    .cfi_startproc
445; CHECK-NEXT:  // %bb.0: // %entry
446; CHECK-NEXT:    fadd v2.4h, v2.4h, v3.4h
447; CHECK-NEXT:    fmla h0, h1, v2.h[3]
448; CHECK-NEXT:    ret
449entry:
450  %0 = fadd <4 x half> %c, %d
451  %extract = extractelement <4 x half> %0, i32 3
452  %1 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
453  ret half %1
454}
455