1; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FP16  --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL
2; RUN: llc -asm-verbose=false < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP
3; RUN: llc -asm-verbose=false < %s -mattr=-fpregs | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
4
5target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
6target triple = "armv7---eabihf"
7
8; CHECK-ALL-LABEL: test_fadd:
9; CHECK-FP16: vcvtb.f32.f16
10; CHECK-FP16: vcvtb.f32.f16
11; CHECK-LIBCALL: bl __aeabi_h2f
12; CHECK-LIBCALL: bl __aeabi_h2f
13; CHECK-VFP: vadd.f32
14; CHECK-NOVFP: bl __aeabi_fadd
15; CHECK-FP16: vcvtb.f16.f32
16; CHECK-LIBCALL: bl __aeabi_f2h
17define void @test_fadd(half* %p, half* %q) #0 {
18  %a = load half, half* %p, align 2
19  %b = load half, half* %q, align 2
20  %r = fadd half %a, %b
21  store half %r, half* %p
22  ret void
23}
24
25; CHECK-ALL-LABEL: test_fsub:
26; CHECK-FP16: vcvtb.f32.f16
27; CHECK-FP16: vcvtb.f32.f16
28; CHECK-LIBCALL: bl __aeabi_h2f
29; CHECK-LIBCALL: bl __aeabi_h2f
30; CHECK-VFP: vsub.f32
31; CHECK-NOVFP: bl __aeabi_fsub
32; CHECK-FP16: vcvtb.f16.f32
33; CHECK-LIBCALL: bl __aeabi_f2h
34define void @test_fsub(half* %p, half* %q) #0 {
35  %a = load half, half* %p, align 2
36  %b = load half, half* %q, align 2
37  %r = fsub half %a, %b
38  store half %r, half* %p
39  ret void
40}
41
42; CHECK-ALL-LABEL: test_fmul:
43; CHECK-FP16: vcvtb.f32.f16
44; CHECK-FP16: vcvtb.f32.f16
45; CHECK-LIBCALL: bl __aeabi_h2f
46; CHECK-LIBCALL: bl __aeabi_h2f
47; CHECK-VFP: vmul.f32
48; CHECK-NOVFP: bl __aeabi_fmul
49; CHECK-FP16: vcvtb.f16.f32
50; CHECK-LIBCALL: bl __aeabi_f2h
51define void @test_fmul(half* %p, half* %q) #0 {
52  %a = load half, half* %p, align 2
53  %b = load half, half* %q, align 2
54  %r = fmul half %a, %b
55  store half %r, half* %p
56  ret void
57}
58
59; CHECK-ALL-LABEL: test_fdiv:
60; CHECK-FP16: vcvtb.f32.f16
61; CHECK-FP16: vcvtb.f32.f16
62; CHECK-LIBCALL: bl __aeabi_h2f
63; CHECK-LIBCALL: bl __aeabi_h2f
64; CHECK-VFP: vdiv.f32
65; CHECK-NOVFP: bl __aeabi_fdiv
66; CHECK-FP16: vcvtb.f16.f32
67; CHECK-LIBCALL: bl __aeabi_f2h
68define void @test_fdiv(half* %p, half* %q) #0 {
69  %a = load half, half* %p, align 2
70  %b = load half, half* %q, align 2
71  %r = fdiv half %a, %b
72  store half %r, half* %p
73  ret void
74}
75
76; CHECK-ALL-LABEL: test_frem:
77; CHECK-FP16: vcvtb.f32.f16
78; CHECK-FP16: vcvtb.f32.f16
79; CHECK-LIBCALL: bl __aeabi_h2f
80; CHECK-LIBCALL: bl __aeabi_h2f
81; CHECK-LIBCALL: bl fmodf
82; CHECK-FP16: vcvtb.f16.f32
83; CHECK-LIBCALL: bl __aeabi_f2h
84define void @test_frem(half* %p, half* %q) #0 {
85  %a = load half, half* %p, align 2
86  %b = load half, half* %q, align 2
87  %r = frem half %a, %b
88  store half %r, half* %p
89  ret void
90}
91
92; CHECK-ALL-LABEL: test_load_store:
93; CHECK-ALL-NEXT: .fnstart
94; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
95; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
96define void @test_load_store(half* %p, half* %q) #0 {
97  %a = load half, half* %p, align 2
98  store half %a, half* %q
99  ret void
100}
101
102; Testing only successfull compilation of function calls.  In ARM ABI, half
103; args and returns are handled as f32.
104
105declare half @test_callee(half %a, half %b) #0
106
107; CHECK-ALL-LABEL: test_call:
108; CHECK-ALL-NEXT: .fnstart
109; CHECK-ALL-NEXT: .save {r11, lr}
110; CHECK-ALL-NEXT: push {r11, lr}
111; CHECK-ALL-NEXT: bl test_callee
112; CHECK-ALL-NEXT: pop {r11, pc}
113define half @test_call(half %a, half %b) #0 {
114  %r = call half @test_callee(half %a, half %b)
115  ret half %r
116}
117
118; CHECK-ALL-LABEL: test_call_flipped:
119; CHECK-ALL-NEXT: .fnstart
120; CHECK-ALL-NEXT: .save {r11, lr}
121; CHECK-ALL-NEXT: push {r11, lr}
122; CHECK-VFP-NEXT: vmov.f32 s2, s0
123; CHECK-VFP-NEXT: vmov.f32 s0, s1
124; CHECK-VFP-NEXT: vmov.f32 s1, s2
125; CHECK-NOVFP-NEXT: mov r2, r0
126; CHECK-NOVFP-NEXT: mov r0, r1
127; CHECK-NOVFP-NEXT: mov r1, r2
128; CHECK-ALL-NEXT: bl test_callee
129; CHECK-ALL-NEXT: pop {r11, pc}
130define half @test_call_flipped(half %a, half %b) #0 {
131  %r = call half @test_callee(half %b, half %a)
132  ret half %r
133}
134
135; CHECK-ALL-LABEL: test_tailcall_flipped:
136; CHECK-ALL-NEXT: .fnstart
137; CHECK-VFP-NEXT: vmov.f32 s2, s0
138; CHECK-VFP-NEXT: vmov.f32 s0, s1
139; CHECK-VFP-NEXT: vmov.f32 s1, s2
140; CHECK-NOVFP-NEXT: mov r2, r0
141; CHECK-NOVFP-NEXT: mov r0, r1
142; CHECK-NOVFP-NEXT: mov r1, r2
143; CHECK-ALL-NEXT: b test_callee
144define half @test_tailcall_flipped(half %a, half %b) #0 {
145  %r = tail call half @test_callee(half %b, half %a)
146  ret half %r
147}
148
149; Optimizer picks %p or %q based on %c and only loads that value
150; No conversion is needed
151; CHECK-ALL-LABEL: test_select:
152; CHECK-ALL: cmp {{r[0-9]+}}, #0
153; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}}
154; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
155; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
156define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
157  %a = load half, half* %p, align 2
158  %b = load half, half* %q, align 2
159  %r = select i1 %c, half %a, half %b
160  store half %r, half* %p
161  ret void
162}
163
164; Test only two variants of fcmp.  These get translated to f32 vcmp
165; instructions anyway.
166; CHECK-ALL-LABEL: test_fcmp_une:
167; CHECK-FP16: vcvtb.f32.f16
168; CHECK-FP16: vcvtb.f32.f16
169; CHECK-LIBCALL: bl __aeabi_h2f
170; CHECK-LIBCALL: bl __aeabi_h2f
171; CHECK-VFP: vcmp.f32
172; CHECK-NOVFP: bl __aeabi_fcmpeq
173; CHECK-VFP-NEXT: vmrs APSR_nzcv, fpscr
174; CHECK-VFP-NEXT: movwne
175; CHECK-NOVFP-NEXT: clz r0, r0
176; CHECK-NOVFP-NEXT: lsr r0, r0, #5
177define i1 @test_fcmp_une(half* %p, half* %q) #0 {
178  %a = load half, half* %p, align 2
179  %b = load half, half* %q, align 2
180  %r = fcmp une half %a, %b
181  ret i1 %r
182}
183
184; CHECK-ALL-LABEL: test_fcmp_ueq:
185; CHECK-FP16: vcvtb.f32.f16
186; CHECK-FP16: vcvtb.f32.f16
187; CHECK-LIBCALL: bl __aeabi_h2f
188; CHECK-LIBCALL: bl __aeabi_h2f
189; CHECK-VFP: vcmp.f32
190; CHECK-NOVFP: bl __aeabi_fcmpeq
191; CHECK-FP16: vmrs APSR_nzcv, fpscr
192; CHECK-LIBCALL: movw{{ne|eq}}
193define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
194  %a = load half, half* %p, align 2
195  %b = load half, half* %q, align 2
196  %r = fcmp ueq half %a, %b
197  ret i1 %r
198}
199
200; CHECK-ALL-LABEL: test_br_cc:
201; CHECK-FP16: vcvtb.f32.f16
202; CHECK-FP16: vcvtb.f32.f16
203; CHECK-LIBCALL: bl __aeabi_h2f
204; CHECK-LIBCALL: bl __aeabi_h2f
205; CHECK-VFP: vcmp.f32
206; CHECK-NOVFP: bl __aeabi_fcmplt
207; CHECK-FP16: vmrs APSR_nzcv, fpscr
208; CHECK-VFP: strmi
209; CHECK-VFP: strpl
210; CHECK-NOVFP: strne
211; CHECK-NOVFP: streq
212define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 {
213  %a = load half, half* %p, align 2
214  %b = load half, half* %q, align 2
215  %c = fcmp uge half %a, %b
216  br i1 %c, label %then, label %else
217then:
218  store i32 0, i32* %p1
219  ret void
220else:
221  store i32 0, i32* %p2
222  ret void
223}
224
225declare i1 @test_dummy(half* %p) #0
226; CHECK-ALL-LABEL: test_phi:
227; CHECK-FP16: vcvtb.f32.f16
228; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]:
229; CHECK-FP16: vcvtb.f32.f16
230; CHECK-FP16: bl      test_dummy
231; CHECK-FP16: bne     [[LOOP]]
232; CHECK-FP16: vcvtb.f16.f32
233; CHECK-LIBCALL-VFP: bl __aeabi_h2f
234; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]:
235; CHECK-LIBCALL-VFP: bl __aeabi_h2f
236; CHECK-LIBCALL: bl test_dummy
237; CHECK-LIBCALL: bne     [[LOOP]]
238; CHECK-LIBCALL-VFP: bl __aeabi_f2h
239define void @test_phi(half* %p) #0 {
240entry:
241  %a = load half, half* %p
242  br label %loop
243loop:
244  %r = phi half [%a, %entry], [%b, %loop]
245  %b = load half, half* %p
246  %c = call i1 @test_dummy(half* %p)
247  br i1 %c, label %loop, label %return
248return:
249  store half %r, half* %p
250  ret void
251}
252
253; CHECK-ALL-LABEL: test_fptosi_i32:
254; CHECK-FP16: vcvtb.f32.f16
255; CHECK-LIBCALL: bl __aeabi_h2f
256; CHECK-VFP: vcvt.s32.f32
257; CHECK-NOVFP: bl __aeabi_f2iz
258define i32 @test_fptosi_i32(half* %p) #0 {
259  %a = load half, half* %p, align 2
260  %r = fptosi half %a to i32
261  ret i32 %r
262}
263
264; CHECK-ALL-LABEL: test_fptosi_i64:
265; CHECK-FP16: vcvtb.f32.f16
266; CHECK-LIBCALL: bl __aeabi_h2f
267; CHECK-ALL: bl __aeabi_f2lz
268define i64 @test_fptosi_i64(half* %p) #0 {
269  %a = load half, half* %p, align 2
270  %r = fptosi half %a to i64
271  ret i64 %r
272}
273
274; CHECK-ALL-LABEL: test_fptoui_i32:
275; CHECK-FP16: vcvtb.f32.f16
276; CHECK-LIBCALL: bl __aeabi_h2f
277; CHECK-VFP: vcvt.u32.f32
278; CHECK-NOVFP: bl __aeabi_f2uiz
279define i32 @test_fptoui_i32(half* %p) #0 {
280  %a = load half, half* %p, align 2
281  %r = fptoui half %a to i32
282  ret i32 %r
283}
284
285; CHECK-ALL-LABEL: test_fptoui_i64:
286; CHECK-FP16: vcvtb.f32.f16
287; CHECK-LIBCALL: bl __aeabi_h2f
288; CHECK-ALL: bl __aeabi_f2ulz
289define i64 @test_fptoui_i64(half* %p) #0 {
290  %a = load half, half* %p, align 2
291  %r = fptoui half %a to i64
292  ret i64 %r
293}
294
295; CHECK-ALL-LABEL: test_sitofp_i32:
296; CHECK-VFP: vcvt.f32.s32
297; CHECK-NOVFP: bl __aeabi_i2f
298; CHECK-FP16: vcvtb.f16.f32
299; CHECK-LIBCALL: bl __aeabi_f2h
300define void @test_sitofp_i32(i32 %a, half* %p) #0 {
301  %r = sitofp i32 %a to half
302  store half %r, half* %p
303  ret void
304}
305
306; CHECK-ALL-LABEL: test_uitofp_i32:
307; CHECK-VFP: vcvt.f32.u32
308; CHECK-NOVFP: bl __aeabi_ui2f
309; CHECK-FP16: vcvtb.f16.f32
310; CHECK-LIBCALL: bl __aeabi_f2h
311define void @test_uitofp_i32(i32 %a, half* %p) #0 {
312  %r = uitofp i32 %a to half
313  store half %r, half* %p
314  ret void
315}
316
317; CHECK-ALL-LABEL: test_sitofp_i64:
318; CHECK-ALL: bl __aeabi_l2f
319; CHECK-FP16: vcvtb.f16.f32
320; CHECK-LIBCALL: bl __aeabi_f2h
321define void @test_sitofp_i64(i64 %a, half* %p) #0 {
322  %r = sitofp i64 %a to half
323  store half %r, half* %p
324  ret void
325}
326
327; CHECK-ALL-LABEL: test_uitofp_i64:
328; CHECK-ALL: bl __aeabi_ul2f
329; CHECK-FP16: vcvtb.f16.f32
330; CHECK-LIBCALL: bl __aeabi_f2h
331define void @test_uitofp_i64(i64 %a, half* %p) #0 {
332  %r = uitofp i64 %a to half
333  store half %r, half* %p
334  ret void
335}
336
337; CHECK-FP16-LABEL: test_fptrunc_float:
338; CHECK-FP16: vcvtb.f16.f32
339; CHECK-LIBCALL-LABEL: test_fptrunc_float:
340; CHECK-LIBCALL: bl __aeabi_f2h
341define void @test_fptrunc_float(float %f, half* %p) #0 {
342  %a = fptrunc float %f to half
343  store half %a, half* %p
344  ret void
345}
346
347; CHECK-FP16-LABEL: test_fptrunc_double:
348; CHECK-FP16: bl __aeabi_d2h
349; CHECK-LIBCALL-LABEL: test_fptrunc_double:
350; CHECK-LIBCALL: bl __aeabi_d2h
351define void @test_fptrunc_double(double %d, half* %p) #0 {
352  %a = fptrunc double %d to half
353  store half %a, half* %p
354  ret void
355}
356
357; CHECK-FP16-LABEL: test_fpextend_float:
358; CHECK-FP16: vcvtb.f32.f16
359; CHECK-LIBCALL-LABEL: test_fpextend_float:
360; CHECK-LIBCALL: bl __aeabi_h2f
361define float @test_fpextend_float(half* %p) {
362  %a = load half, half* %p, align 2
363  %r = fpext half %a to float
364  ret float %r
365}
366
367; CHECK-FP16-LABEL: test_fpextend_double:
368; CHECK-FP16: vcvtb.f32.f16
369; CHECK-LIBCALL-LABEL: test_fpextend_double:
370; CHECK-LIBCALL: bl __aeabi_h2f
371; CHECK-VFP: vcvt.f64.f32
372; CHECK-NOVFP: bl __aeabi_f2d
373define double @test_fpextend_double(half* %p) {
374  %a = load half, half* %p, align 2
375  %r = fpext half %a to double
376  ret double %r
377}
378
379; CHECK-ALL-LABEL: test_bitcast_halftoi16:
380; CHECK-ALL-NEXT: .fnstart
381; CHECK-ALL-NEXT: ldrh r0, [r0]
382; CHECK-ALL-NEXT: bx lr
383define i16 @test_bitcast_halftoi16(half* %p) #0 {
384  %a = load half, half* %p, align 2
385  %r = bitcast half %a to i16
386  ret i16 %r
387}
388
389; CHECK-ALL-LABEL: test_bitcast_i16tohalf:
390; CHECK-ALL-NEXT: .fnstart
391; CHECK-ALL-NEXT: strh r0, [r1]
392; CHECK-ALL-NEXT: bx lr
393define void @test_bitcast_i16tohalf(i16 %a, half* %p) #0 {
394  %r = bitcast i16 %a to half
395  store half %r, half* %p
396  ret void
397}
398
399declare half @llvm.sqrt.f16(half %a) #0
400declare half @llvm.powi.f16(half %a, i32 %b) #0
401declare half @llvm.sin.f16(half %a) #0
402declare half @llvm.cos.f16(half %a) #0
403declare half @llvm.pow.f16(half %a, half %b) #0
404declare half @llvm.exp.f16(half %a) #0
405declare half @llvm.exp2.f16(half %a) #0
406declare half @llvm.log.f16(half %a) #0
407declare half @llvm.log10.f16(half %a) #0
408declare half @llvm.log2.f16(half %a) #0
409declare half @llvm.fma.f16(half %a, half %b, half %c) #0
410declare half @llvm.fabs.f16(half %a) #0
411declare half @llvm.minnum.f16(half %a, half %b) #0
412declare half @llvm.maxnum.f16(half %a, half %b) #0
413declare half @llvm.copysign.f16(half %a, half %b) #0
414declare half @llvm.floor.f16(half %a) #0
415declare half @llvm.ceil.f16(half %a) #0
416declare half @llvm.trunc.f16(half %a) #0
417declare half @llvm.rint.f16(half %a) #0
418declare half @llvm.nearbyint.f16(half %a) #0
419declare half @llvm.round.f16(half %a) #0
420declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
421
422; CHECK-ALL-LABEL: test_sqrt:
423; CHECK-FP16: vcvtb.f32.f16
424; CHECK-FP16: vsqrt.f32
425; CHECK-FP16: vcvtb.f16.f32
426; CHECK-LIBCALL: bl __aeabi_h2f
427; CHECK-LIBCALL-VFP: vsqrt.f32
428; CHECK-NOVFP: bl sqrtf
429; CHECK-LIBCALL: bl __aeabi_f2h
430define void @test_sqrt(half* %p) #0 {
431  %a = load half, half* %p, align 2
432  %r = call half @llvm.sqrt.f16(half %a)
433  store half %r, half* %p
434  ret void
435}
436
437; CHECK-FP16-LABEL: test_fpowi:
438; CHECK-FP16: vcvtb.f32.f16
439; CHECK-FP16: bl __powisf2
440; CHECK-FP16: vcvtb.f16.f32
441; CHECK-LIBCALL-LABEL: test_fpowi:
442; CHECK-LIBCALL: bl __aeabi_h2f
443; CHECK-LIBCALL: bl __powisf2
444; CHECK-LIBCALL: bl __aeabi_f2h
445define void @test_fpowi(half* %p, i32 %b) #0 {
446  %a = load half, half* %p, align 2
447  %r = call half @llvm.powi.f16(half %a, i32 %b)
448  store half %r, half* %p
449  ret void
450}
451
452; CHECK-FP16-LABEL: test_sin:
453; CHECK-FP16: vcvtb.f32.f16
454; CHECK-FP16: bl sinf
455; CHECK-FP16: vcvtb.f16.f32
456; CHECK-LIBCALL-LABEL: test_sin:
457; CHECK-LIBCALL: bl __aeabi_h2f
458; CHECK-LIBCALL: bl sinf
459; CHECK-LIBCALL: bl __aeabi_f2h
460define void @test_sin(half* %p) #0 {
461  %a = load half, half* %p, align 2
462  %r = call half @llvm.sin.f16(half %a)
463  store half %r, half* %p
464  ret void
465}
466
467; CHECK-FP16-LABEL: test_cos:
468; CHECK-FP16: vcvtb.f32.f16
469; CHECK-FP16: bl cosf
470; CHECK-FP16: vcvtb.f16.f32
471; CHECK-LIBCALL-LABEL: test_cos:
472; CHECK-LIBCALL: bl __aeabi_h2f
473; CHECK-LIBCALL: bl cosf
474; CHECK-LIBCALL: bl __aeabi_f2h
475define void @test_cos(half* %p) #0 {
476  %a = load half, half* %p, align 2
477  %r = call half @llvm.cos.f16(half %a)
478  store half %r, half* %p
479  ret void
480}
481
482; CHECK-FP16-LABEL: test_pow:
483; CHECK-FP16: vcvtb.f32.f16
484; CHECK-FP16: vcvtb.f32.f16
485; CHECK-FP16: bl powf
486; CHECK-FP16: vcvtb.f16.f32
487; CHECK-LIBCALL-LABEL: test_pow:
488; CHECK-LIBCALL: bl __aeabi_h2f
489; CHECK-LIBCALL: bl __aeabi_h2f
490; CHECK-LIBCALL: bl powf
491; CHECK-LIBCALL: bl __aeabi_f2h
492define void @test_pow(half* %p, half* %q) #0 {
493  %a = load half, half* %p, align 2
494  %b = load half, half* %q, align 2
495  %r = call half @llvm.pow.f16(half %a, half %b)
496  store half %r, half* %p
497  ret void
498}
499
500; CHECK-FP16-LABEL: test_cbrt:
501; CHECK-FP16: vcvtb.f32.f16
502; CHECK-FP16: bl powf
503; CHECK-FP16: vcvtb.f16.f32
504; CHECK-LIBCALL-LABEL: test_cbrt:
505; CHECK-LIBCALL: bl __aeabi_h2f
506; CHECK-LIBCALL: bl powf
507; CHECK-LIBCALL: bl __aeabi_f2h
508define void @test_cbrt(half* %p) #0 {
509  %a = load half, half* %p, align 2
510  %r = call half @llvm.pow.f16(half %a, half 0x3FD5540000000000)
511  store half %r, half* %p
512  ret void
513}
514
515; CHECK-FP16-LABEL: test_exp:
516; CHECK-FP16: vcvtb.f32.f16
517; CHECK-FP16: bl expf
518; CHECK-FP16: vcvtb.f16.f32
519; CHECK-LIBCALL-LABEL: test_exp:
520; CHECK-LIBCALL: bl __aeabi_h2f
521; CHECK-LIBCALL: bl expf
522; CHECK-LIBCALL: bl __aeabi_f2h
523define void @test_exp(half* %p) #0 {
524  %a = load half, half* %p, align 2
525  %r = call half @llvm.exp.f16(half %a)
526  store half %r, half* %p
527  ret void
528}
529
530; CHECK-FP16-LABEL: test_exp2:
531; CHECK-FP16: vcvtb.f32.f16
532; CHECK-FP16: bl exp2f
533; CHECK-FP16: vcvtb.f16.f32
534; CHECK-LIBCALL-LABEL: test_exp2:
535; CHECK-LIBCALL: bl __aeabi_h2f
536; CHECK-LIBCALL: bl exp2f
537; CHECK-LIBCALL: bl __aeabi_f2h
538define void @test_exp2(half* %p) #0 {
539  %a = load half, half* %p, align 2
540  %r = call half @llvm.exp2.f16(half %a)
541  store half %r, half* %p
542  ret void
543}
544
545; CHECK-FP16-LABEL: test_log:
546; CHECK-FP16: vcvtb.f32.f16
547; CHECK-FP16: bl logf
548; CHECK-FP16: vcvtb.f16.f32
549; CHECK-LIBCALL-LABEL: test_log:
550; CHECK-LIBCALL: bl __aeabi_h2f
551; CHECK-LIBCALL: bl logf
552; CHECK-LIBCALL: bl __aeabi_f2h
553define void @test_log(half* %p) #0 {
554  %a = load half, half* %p, align 2
555  %r = call half @llvm.log.f16(half %a)
556  store half %r, half* %p
557  ret void
558}
559
560; CHECK-FP16-LABEL: test_log10:
561; CHECK-FP16: vcvtb.f32.f16
562; CHECK-FP16: bl log10f
563; CHECK-FP16: vcvtb.f16.f32
564; CHECK-LIBCALL-LABEL: test_log10:
565; CHECK-LIBCALL: bl __aeabi_h2f
566; CHECK-LIBCALL: bl log10f
567; CHECK-LIBCALL: bl __aeabi_f2h
568define void @test_log10(half* %p) #0 {
569  %a = load half, half* %p, align 2
570  %r = call half @llvm.log10.f16(half %a)
571  store half %r, half* %p
572  ret void
573}
574
575; CHECK-FP16-LABEL: test_log2:
576; CHECK-FP16: vcvtb.f32.f16
577; CHECK-FP16: bl log2f
578; CHECK-FP16: vcvtb.f16.f32
579; CHECK-LIBCALL-LABEL: test_log2:
580; CHECK-LIBCALL: bl __aeabi_h2f
581; CHECK-LIBCALL: bl log2f
582; CHECK-LIBCALL: bl __aeabi_f2h
583define void @test_log2(half* %p) #0 {
584  %a = load half, half* %p, align 2
585  %r = call half @llvm.log2.f16(half %a)
586  store half %r, half* %p
587  ret void
588}
589
590; CHECK-FP16-LABEL: test_fma:
591; CHECK-FP16: vcvtb.f32.f16
592; CHECK-FP16: vcvtb.f32.f16
593; CHECK-FP16: vcvtb.f32.f16
594; CHECK-FP16: bl fmaf
595; CHECK-FP16: vcvtb.f16.f32
596; CHECK-LIBCALL-LABEL: test_fma:
597; CHECK-LIBCALL: bl __aeabi_h2f
598; CHECK-LIBCALL: bl __aeabi_h2f
599; CHECK-LIBCALL: bl __aeabi_h2f
600; CHECK-LIBCALL: bl fmaf
601; CHECK-LIBCALL: bl __aeabi_f2h
602define void @test_fma(half* %p, half* %q, half* %r) #0 {
603  %a = load half, half* %p, align 2
604  %b = load half, half* %q, align 2
605  %c = load half, half* %r, align 2
606  %v = call half @llvm.fma.f16(half %a, half %b, half %c)
607  store half %v, half* %p
608  ret void
609}
610
611; CHECK-FP16-LABEL: test_fabs:
612; CHECK-FP16: vcvtb.f32.f16
613; CHECK-FP16: vabs.f32
614; CHECK-FP16: vcvtb.f16.f32
615; CHECK-LIBCALL-LABEL: test_fabs:
616; CHECK-LIBCALL: bl __aeabi_h2f
617; CHECK-LIBCALL: bic
618; CHECK-LIBCALL: bl __aeabi_f2h
619define void @test_fabs(half* %p) {
620  %a = load half, half* %p, align 2
621  %r = call half @llvm.fabs.f16(half %a)
622  store half %r, half* %p
623  ret void
624}
625
626; CHECK-FP16-LABEL: test_minnum:
627; CHECK-FP16: vcvtb.f32.f16
628; CHECK-FP16: vcvtb.f32.f16
629; CHECK-FP16: bl fminf
630; CHECK-FP16: vcvtb.f16.f32
631; CHECK-LIBCALL-LABEL: test_minnum:
632; CHECK-LIBCALL: bl __aeabi_h2f
633; CHECK-LIBCALL: bl __aeabi_h2f
634; CHECK-LIBCALL: bl fminf
635; CHECK-LIBCALL: bl __aeabi_f2h
636define void @test_minnum(half* %p, half* %q) #0 {
637  %a = load half, half* %p, align 2
638  %b = load half, half* %q, align 2
639  %r = call half @llvm.minnum.f16(half %a, half %b)
640  store half %r, half* %p
641  ret void
642}
643
644; CHECK-FP16-LABEL: test_maxnum:
645; CHECK-FP16: vcvtb.f32.f16
646; CHECK-FP16: vcvtb.f32.f16
647; CHECK-FP16: bl fmaxf
648; CHECK-FP16: vcvtb.f16.f32
649; CHECK-LIBCALL-LABEL: test_maxnum:
650; CHECK-LIBCALL: bl __aeabi_h2f
651; CHECK-LIBCALL: bl __aeabi_h2f
652; CHECK-LIBCALL: bl fmaxf
653; CHECK-LIBCALL: bl __aeabi_f2h
654define void @test_maxnum(half* %p, half* %q) #0 {
655  %a = load half, half* %p, align 2
656  %b = load half, half* %q, align 2
657  %r = call half @llvm.maxnum.f16(half %a, half %b)
658  store half %r, half* %p
659  ret void
660}
661
662; CHECK-ALL-LABEL: test_minimum:
663; CHECK-FP16: vmov.f32 s0, #1.000000e+00
664; CHECK-FP16: vcvtb.f32.f16
665; CHECK-LIBCALL: bl __aeabi_h2f
666; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00
667; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
668; CHECK-VFP: vcmp.f32
669; CHECK-VFP: vmrs
670; CHECK-VFP: vmovlt.f32
671; CHECK-NOVFP: bl __aeabi_fcmpge
672; CHECK-FP16: vcvtb.f16.f32
673; CHECK-LIBCALL: bl __aeabi_f2h
674define void @test_minimum(half* %p) #0 {
675  %a = load half, half* %p, align 2
676  %c = fcmp ult half %a, 1.0
677  %r = select i1 %c, half %a, half 1.0
678  store half %r, half* %p
679  ret void
680}
681
682; CHECK-ALL-LABEL: test_maximum:
683; CHECK-FP16: vmov.f32 s0, #1.000000e+00
684; CHECK-FP16: vcvtb.f32.f16
685; CHECK-LIBCALL: bl __aeabi_h2f
686; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00
687; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
688; CHECK-VFP: vcmp.f32
689; CHECK-VFP: vmrs
690; CHECK-VFP: vmovhi.f32
691; CHECK-NOVFP: bl __aeabi_fcmple
692; CHECK-FP16: vcvtb.f16.f32
693; CHECK-LIBCALL: bl __aeabi_f2h
694define void @test_maximum(half* %p) #0 {
695  %a = load half, half* %p, align 2
696  %c = fcmp ugt half %a, 1.0
697  %r = select i1 %c, half %a, half 1.0
698  store half %r, half* %p
699  ret void
700}
701
702; CHECK-FP16-LABEL: test_copysign:
703; CHECK-FP16:         ldrh r2, [r0]
704; CHECK-FP16-NEXT:    vmov.i32 d16, #0x80000000
705; CHECK-FP16-NEXT:    ldrh r1, [r1]
706; CHECK-FP16-NEXT:    vmov s0, r2
707; CHECK-FP16-NEXT:    vmov s2, r1
708; CHECK-FP16-NEXT:    vcvtb.f32.f16 s0, s0
709; CHECK-FP16-NEXT:    vcvtb.f32.f16 s2, s2
710; CHECK-FP16-NEXT:    vbit d0, d1, d16
711; CHECK-FP16-NEXT:    vcvtb.f16.f32 s0, s0
712; CHECK-FP16-NEXT:    vmov r1, s0
713; CHECK-FP16-NEXT:    strh r1, [r0]
714; CHECK-FP16-NEXT:    bx lr
715
716; CHECK-LIBCALL-LABEL: test_copysign:
717; CHECK-LIBCALL-VFP:         .fnstart
718; CHECK-LIBCALL-VFP-NEXT:    .save {r4, r5, r11, lr}
719; CHECK-LIBCALL-VFP-NEXT:    push {r4, r5, r11, lr}
720; CHECK-LIBCALL-VFP-NEXT:    .vsave {d8, d9}
721; CHECK-LIBCALL-VFP-NEXT:    vpush {d8, d9}
722; CHECK-LIBCALL-VFP-NEXT:    mov r5, r0
723; CHECK-LIBCALL-VFP-NEXT:    ldrh r0, [r0]
724; CHECK-LIBCALL-VFP-NEXT:    mov r4, r1
725; CHECK-LIBCALL: bl __aeabi_h2f
726; CHECK-LIBCALL-VFP:         ldrh r1, [r4]
727; CHECK-LIBCALL-VFP-NEXT:    vmov s18, r0
728; CHECK-LIBCALL-VFP-NEXT:    vmov.i32 d8, #0x80000000
729; CHECK-LIBCALL-VFP-NEXT:    mov r0, r1
730; CHECK-LIBCALL: bl __aeabi_h2f
731; CHECK-LIBCALL-VFP:         vmov s0, r0
732; CHECK-LIBCALL-VFP-NEXT:    vbif d0, d9, d8
733; CHECK-LIBCALL-VFP-NEXT:    vmov r0, s0
734; CHECK-LIBCALL: bl __aeabi_f2h
735; CHECK-LIBCALL-VFP:         strh r0, [r5]
736; CHECK-LIBCALL-VFP-NEXT:    vpop {d8, d9}
737; CHECK-LIBCALL-VFP-NEXT:    pop {r4, r5, r11, pc}
738; CHECK-NOVFP: and
739; CHECK-NOVFP: bic
740; CHECK-NOVFP: orr
741define void @test_copysign(half* %p, half* %q) #0 {
742  %a = load half, half* %p, align 2
743  %b = load half, half* %q, align 2
744  %r = call half @llvm.copysign.f16(half %a, half %b)
745  store half %r, half* %p
746  ret void
747}
748
749; CHECK-FP16-LABEL: test_floor:
750; CHECK-FP16: vcvtb.f32.f16
751; CHECK-FP16: bl floorf
752; CHECK-FP16: vcvtb.f16.f32
753; CHECK-LIBCALL-LABEL: test_floor:
754; CHECK-LIBCALL: bl __aeabi_h2f
755; CHECK-LIBCALL: bl floorf
756; CHECK-LIBCALL: bl __aeabi_f2h
757define void @test_floor(half* %p) {
758  %a = load half, half* %p, align 2
759  %r = call half @llvm.floor.f16(half %a)
760  store half %r, half* %p
761  ret void
762}
763
764; CHECK-FP16-LABEL: test_ceil:
765; CHECK-FP16: vcvtb.f32.f16
766; CHECK-FP16: bl ceilf
767; CHECK-FP16: vcvtb.f16.f32
768; CHECK-LIBCALL-LABEL: test_ceil:
769; CHECK-LIBCALL: bl __aeabi_h2f
770; CHECK-LIBCALL: bl ceilf
771; CHECK-LIBCALL: bl __aeabi_f2h
772define void @test_ceil(half* %p) {
773  %a = load half, half* %p, align 2
774  %r = call half @llvm.ceil.f16(half %a)
775  store half %r, half* %p
776  ret void
777}
778
779; CHECK-FP16-LABEL: test_trunc:
780; CHECK-FP16: vcvtb.f32.f16
781; CHECK-FP16: bl truncf
782; CHECK-FP16: vcvtb.f16.f32
783; CHECK-LIBCALL-LABEL: test_trunc:
784; CHECK-LIBCALL: bl __aeabi_h2f
785; CHECK-LIBCALL: bl truncf
786; CHECK-LIBCALL: bl __aeabi_f2h
787define void @test_trunc(half* %p) {
788  %a = load half, half* %p, align 2
789  %r = call half @llvm.trunc.f16(half %a)
790  store half %r, half* %p
791  ret void
792}
793
794; CHECK-FP16-LABEL: test_rint:
795; CHECK-FP16: vcvtb.f32.f16
796; CHECK-FP16: bl rintf
797; CHECK-FP16: vcvtb.f16.f32
798; CHECK-LIBCALL-LABEL: test_rint:
799; CHECK-LIBCALL: bl __aeabi_h2f
800; CHECK-LIBCALL: bl rintf
801; CHECK-LIBCALL: bl __aeabi_f2h
802define void @test_rint(half* %p) {
803  %a = load half, half* %p, align 2
804  %r = call half @llvm.rint.f16(half %a)
805  store half %r, half* %p
806  ret void
807}
808
809; CHECK-FP16-LABEL: test_nearbyint:
810; CHECK-FP16: vcvtb.f32.f16
811; CHECK-FP16: bl nearbyintf
812; CHECK-FP16: vcvtb.f16.f32
813; CHECK-LIBCALL-LABEL: test_nearbyint:
814; CHECK-LIBCALL: bl __aeabi_h2f
815; CHECK-LIBCALL: bl nearbyintf
816; CHECK-LIBCALL: bl __aeabi_f2h
817define void @test_nearbyint(half* %p) {
818  %a = load half, half* %p, align 2
819  %r = call half @llvm.nearbyint.f16(half %a)
820  store half %r, half* %p
821  ret void
822}
823
824; CHECK-FP16-LABEL: test_round:
825; CHECK-FP16: vcvtb.f32.f16
826; CHECK-FP16: bl roundf
827; CHECK-FP16: vcvtb.f16.f32
828; CHECK-LIBCALL-LABEL: test_round:
829; CHECK-LIBCALL: bl __aeabi_h2f
830; CHECK-LIBCALL: bl roundf
831; CHECK-LIBCALL: bl __aeabi_f2h
832define void @test_round(half* %p) {
833  %a = load half, half* %p, align 2
834  %r = call half @llvm.round.f16(half %a)
835  store half %r, half* %p
836  ret void
837}
838
839; CHECK-FP16-LABEL: test_fmuladd:
840; CHECK-FP16: vcvtb.f32.f16
841; CHECK-FP16: vcvtb.f32.f16
842; CHECK-FP16: vcvtb.f32.f16
843; CHECK-FP16: vmla.f32
844; CHECK-FP16: vcvtb.f16.f32
845; CHECK-LIBCALL-LABEL: test_fmuladd:
846; CHECK-LIBCALL: bl __aeabi_h2f
847; CHECK-LIBCALL: bl __aeabi_h2f
848; CHECK-LIBCALL: bl __aeabi_h2f
849; CHECK-LIBCALL-VFP: vmla.f32
850; CHECK-NOVFP: bl __aeabi_fmul
851; CHECK-LIBCALL: bl __aeabi_f2h
852define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
853  %a = load half, half* %p, align 2
854  %b = load half, half* %q, align 2
855  %c = load half, half* %r, align 2
856  %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
857  store half %v, half* %p
858  ret void
859}
860
861; f16 vectors are not legal in the backend.  Vector elements are not assigned
862; to the register, but are stored in the stack instead.  Hence insertelement
863; and extractelement have these extra loads and stores.
864
865; CHECK-ALL-LABEL: test_insertelement:
866; CHECK-ALL: sub sp, sp, #8
867
868; CHECK-VFP:	and
869; CHECK-VFP:	mov
870; CHECK-VFP:	ldrd
871; CHECK-VFP:	orr
872; CHECK-VFP:	ldrh
873; CHECK-VFP:	stm
874; CHECK-VFP:	strh
875; CHECK-VFP:	ldm
876; CHECK-VFP:	stm
877
878; CHECK-NOVFP: ldrh
879; CHECK-NOVFP: ldrh
880; CHECK-NOVFP: ldrh
881; CHECK-NOVFP: ldrh
882; CHECK-NOVFP-DAG: strh
883; CHECK-NOVFP-DAG: strh
884; CHECK-NOVFP-DAG: mov
885; CHECK-NOVFP-DAG: ldrh
886; CHECK-NOVFP-DAG: orr
887; CHECK-NOVFP-DAG: strh
888; CHECK-NOVFP-DAG: strh
889; CHECK-NOVFP-DAG: strh
890; CHECK-NOVFP-DAG: ldrh
891; CHECK-NOVFP-DAG: ldrh
892; CHECK-NOVFP-DAG: ldrh
893; CHECK-NOVFP-DAG: strh
894; CHECK-NOVFP-DAG: strh
895; CHECK-NOVFP-DAG: strh
896; CHECK-NOVFP-DAG: strh
897
898; CHECK-ALL: add sp, sp, #8
899define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
900  %a = load half, half* %p, align 2
901  %b = load <4 x half>, <4 x half>* %q, align 8
902  %c = insertelement <4 x half> %b, half %a, i32 %i
903  store <4 x half> %c, <4 x half>* %q
904  ret void
905}
906
907; CHECK-ALL-LABEL: test_extractelement:
908; CHECK-VFP: push {{{.*}}, lr}
909; CHECK-VFP: sub sp, sp, #8
910; CHECK-VFP: ldrd
911; CHECK-VFP: mov
912; CHECK-VFP: orr
913; CHECK-VFP: ldrh
914; CHECK-VFP: strh
915; CHECK-VFP: add sp, sp, #8
916; CHECK-VFP: pop {{{.*}}, pc}
917; CHECK-NOVFP: ldrh
918; CHECK-NOVFP: strh
919; CHECK-NOVFP: ldrh
920; CHECK-NOVFP: strh
921; CHECK-NOVFP: ldrh
922; CHECK-NOVFP: strh
923; CHECK-NOVFP: ldrh
924; CHECK-NOVFP: strh
925; CHECK-NOVFP: ldrh
926define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
927  %a = load <4 x half>, <4 x half>* %q, align 8
928  %b = extractelement <4 x half> %a, i32 %i
929  store half %b, half* %p
930  ret void
931}
932
933; test struct operations
934
935%struct.dummy = type { i32, half }
936
937; CHECK-ALL-LABEL: test_insertvalue:
938; CHECK-ALL-DAG: ldr
939; CHECK-ALL-DAG: ldrh
940; CHECK-ALL-DAG: strh
941; CHECK-ALL-DAG: str
942define void @test_insertvalue(%struct.dummy* %p, half* %q) {
943  %a = load %struct.dummy, %struct.dummy* %p
944  %b = load half, half* %q
945  %c = insertvalue %struct.dummy %a, half %b, 1
946  store %struct.dummy %c, %struct.dummy* %p
947  ret void
948}
949
950; CHECK-ALL-LABEL: test_extractvalue:
951; CHECK-ALL: .fnstart
952; CHECK-ALL: ldrh
953; CHECK-ALL: strh
954define void @test_extractvalue(%struct.dummy* %p, half* %q) {
955  %a = load %struct.dummy, %struct.dummy* %p
956  %b = extractvalue %struct.dummy %a, 1
957  store half %b, half* %q
958  ret void
959}
960
961; CHECK-ALL-LABEL: test_struct_return:
962; CHECK-VFP-LIBCALL: bl __aeabi_h2f
963; CHECK-NOVFP-DAG: ldr
964; CHECK-NOVFP-DAG: ldrh
965define %struct.dummy @test_struct_return(%struct.dummy* %p) {
966  %a = load %struct.dummy, %struct.dummy* %p
967  ret %struct.dummy %a
968}
969
970; CHECK-ALL-LABEL: test_struct_arg:
971; CHECK-ALL-NEXT: .fnstart
972; CHECK-NOVFP-NEXT: mov r0, r1
973; CHECK-ALL-NEXT: bx lr
974define half @test_struct_arg(%struct.dummy %p) {
975  %a = extractvalue %struct.dummy %p, 1
976  ret half %a
977}
978
979; CHECK-LABEL: test_uitofp_i32_fadd:
980; CHECK-VFP-DAG: vcvt.f32.u32
981; CHECK-NOVFP-DAG: bl __aeabi_ui2f
982
983; CHECK-FP16-DAG: vcvtb.f16.f32
984; CHECK-FP16-DAG: vcvtb.f32.f16
985; CHECK-LIBCALL-DAG: bl __aeabi_h2f
986; CHECK-LIBCALL-DAG: bl __aeabi_h2f
987
988; CHECK-VFP-DAG: vadd.f32
989; CHECK-NOVFP-DAG: bl __aeabi_fadd
990
991; CHECK-FP16-DAG: vcvtb.f16.f32
992; CHECK-LIBCALL-DAG: bl __aeabi_f2h
993define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
994  %c = uitofp i32 %a to half
995  %r = fadd half %b, %c
996  ret half %r
997}
998
999; CHECK-LABEL: test_sitofp_i32_fadd:
1000; CHECK-VFP-DAG: vcvt.f32.s32
1001; CHECK-NOVFP-DAG: bl __aeabi_i2f
1002
1003; CHECK-FP16-DAG: vcvtb.f16.f32
1004; CHECK-FP16-DAG: vcvtb.f32.f16
1005; CHECK-LIBCALL-DAG: bl __aeabi_h2f
1006; CHECK-LIBCALL-DAG: bl __aeabi_h2f
1007
1008; CHECK-VFP-DAG: vadd.f32
1009; CHECK-NOVFP-DAG: bl __aeabi_fadd
1010
1011; CHECK-FP16-DAG: vcvtb.f16.f32
1012; CHECK-LIBCALL-DAG: bl __aeabi_f2h
1013define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
1014  %c = sitofp i32 %a to half
1015  %r = fadd half %b, %c
1016  ret half %r
1017}
1018
1019attributes #0 = { nounwind }
1020