1; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FP16 --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL 2; RUN: llc -asm-verbose=false < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP 3; RUN: llc -asm-verbose=false < %s -mattr=-fpregs | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL 4 5target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" 6target triple = "armv7---eabihf" 7 8; CHECK-ALL-LABEL: test_fadd: 9; CHECK-FP16: vcvtb.f32.f16 10; CHECK-FP16: vcvtb.f32.f16 11; CHECK-LIBCALL: bl __aeabi_h2f 12; CHECK-LIBCALL: bl __aeabi_h2f 13; CHECK-VFP: vadd.f32 14; CHECK-NOVFP: bl __aeabi_fadd 15; CHECK-FP16: vcvtb.f16.f32 16; CHECK-LIBCALL: bl __aeabi_f2h 17define void @test_fadd(half* %p, half* %q) #0 { 18 %a = load half, half* %p, align 2 19 %b = load half, half* %q, align 2 20 %r = fadd half %a, %b 21 store half %r, half* %p 22 ret void 23} 24 25; CHECK-ALL-LABEL: test_fsub: 26; CHECK-FP16: vcvtb.f32.f16 27; CHECK-FP16: vcvtb.f32.f16 28; CHECK-LIBCALL: bl __aeabi_h2f 29; CHECK-LIBCALL: bl __aeabi_h2f 30; CHECK-VFP: vsub.f32 31; CHECK-NOVFP: bl __aeabi_fsub 32; CHECK-FP16: vcvtb.f16.f32 33; CHECK-LIBCALL: bl __aeabi_f2h 34define void @test_fsub(half* %p, half* %q) #0 { 35 %a = load half, half* %p, align 2 36 %b = load half, half* %q, align 2 37 %r = fsub half %a, %b 38 store half %r, half* %p 39 ret void 40} 41 42; CHECK-ALL-LABEL: test_fmul: 43; CHECK-FP16: vcvtb.f32.f16 44; CHECK-FP16: vcvtb.f32.f16 45; CHECK-LIBCALL: bl __aeabi_h2f 46; CHECK-LIBCALL: bl __aeabi_h2f 47; CHECK-VFP: vmul.f32 48; CHECK-NOVFP: bl __aeabi_fmul 49; CHECK-FP16: vcvtb.f16.f32 50; CHECK-LIBCALL: bl __aeabi_f2h 51define void @test_fmul(half* %p, half* %q) #0 { 52 %a = load half, half* %p, align 2 53 %b = load half, half* %q, align 2 54 %r = fmul half %a, %b 55 store half %r, half* %p 56 ret void 57} 58 59; CHECK-ALL-LABEL: test_fdiv: 60; CHECK-FP16: vcvtb.f32.f16 61; CHECK-FP16: vcvtb.f32.f16 62; CHECK-LIBCALL: bl __aeabi_h2f 63; CHECK-LIBCALL: bl __aeabi_h2f 64; CHECK-VFP: vdiv.f32 65; CHECK-NOVFP: bl __aeabi_fdiv 66; CHECK-FP16: vcvtb.f16.f32 67; CHECK-LIBCALL: bl __aeabi_f2h 68define void @test_fdiv(half* %p, half* %q) #0 { 69 %a = load half, half* %p, align 2 70 %b = load half, half* %q, align 2 71 %r = fdiv half %a, %b 72 store half %r, half* %p 73 ret void 74} 75 76; CHECK-ALL-LABEL: test_frem: 77; CHECK-FP16: vcvtb.f32.f16 78; CHECK-FP16: vcvtb.f32.f16 79; CHECK-LIBCALL: bl __aeabi_h2f 80; CHECK-LIBCALL: bl __aeabi_h2f 81; CHECK-LIBCALL: bl fmodf 82; CHECK-FP16: vcvtb.f16.f32 83; CHECK-LIBCALL: bl __aeabi_f2h 84define void @test_frem(half* %p, half* %q) #0 { 85 %a = load half, half* %p, align 2 86 %b = load half, half* %q, align 2 87 %r = frem half %a, %b 88 store half %r, half* %p 89 ret void 90} 91 92; CHECK-ALL-LABEL: test_load_store: 93; CHECK-ALL-NEXT: .fnstart 94; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}] 95; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}] 96define void @test_load_store(half* %p, half* %q) #0 { 97 %a = load half, half* %p, align 2 98 store half %a, half* %q 99 ret void 100} 101 102; Testing only successfull compilation of function calls. In ARM ABI, half 103; args and returns are handled as f32. 104 105declare half @test_callee(half %a, half %b) #0 106 107; CHECK-ALL-LABEL: test_call: 108; CHECK-ALL-NEXT: .fnstart 109; CHECK-ALL-NEXT: .save {r11, lr} 110; CHECK-ALL-NEXT: push {r11, lr} 111; CHECK-ALL-NEXT: bl test_callee 112; CHECK-ALL-NEXT: pop {r11, pc} 113define half @test_call(half %a, half %b) #0 { 114 %r = call half @test_callee(half %a, half %b) 115 ret half %r 116} 117 118; CHECK-ALL-LABEL: test_call_flipped: 119; CHECK-ALL-NEXT: .fnstart 120; CHECK-ALL-NEXT: .save {r11, lr} 121; CHECK-ALL-NEXT: push {r11, lr} 122; CHECK-VFP-NEXT: vmov.f32 s2, s0 123; CHECK-VFP-NEXT: vmov.f32 s0, s1 124; CHECK-VFP-NEXT: vmov.f32 s1, s2 125; CHECK-NOVFP-NEXT: mov r2, r0 126; CHECK-NOVFP-NEXT: mov r0, r1 127; CHECK-NOVFP-NEXT: mov r1, r2 128; CHECK-ALL-NEXT: bl test_callee 129; CHECK-ALL-NEXT: pop {r11, pc} 130define half @test_call_flipped(half %a, half %b) #0 { 131 %r = call half @test_callee(half %b, half %a) 132 ret half %r 133} 134 135; CHECK-ALL-LABEL: test_tailcall_flipped: 136; CHECK-ALL-NEXT: .fnstart 137; CHECK-VFP-NEXT: vmov.f32 s2, s0 138; CHECK-VFP-NEXT: vmov.f32 s0, s1 139; CHECK-VFP-NEXT: vmov.f32 s1, s2 140; CHECK-NOVFP-NEXT: mov r2, r0 141; CHECK-NOVFP-NEXT: mov r0, r1 142; CHECK-NOVFP-NEXT: mov r1, r2 143; CHECK-ALL-NEXT: b test_callee 144define half @test_tailcall_flipped(half %a, half %b) #0 { 145 %r = tail call half @test_callee(half %b, half %a) 146 ret half %r 147} 148 149; Optimizer picks %p or %q based on %c and only loads that value 150; No conversion is needed 151; CHECK-ALL-LABEL: test_select: 152; CHECK-ALL: cmp {{r[0-9]+}}, #0 153; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}} 154; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}] 155; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}] 156define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 { 157 %a = load half, half* %p, align 2 158 %b = load half, half* %q, align 2 159 %r = select i1 %c, half %a, half %b 160 store half %r, half* %p 161 ret void 162} 163 164; Test only two variants of fcmp. These get translated to f32 vcmp 165; instructions anyway. 166; CHECK-ALL-LABEL: test_fcmp_une: 167; CHECK-FP16: vcvtb.f32.f16 168; CHECK-FP16: vcvtb.f32.f16 169; CHECK-LIBCALL: bl __aeabi_h2f 170; CHECK-LIBCALL: bl __aeabi_h2f 171; CHECK-VFP: vcmp.f32 172; CHECK-NOVFP: bl __aeabi_fcmpeq 173; CHECK-VFP-NEXT: vmrs APSR_nzcv, fpscr 174; CHECK-VFP-NEXT: movwne 175; CHECK-NOVFP-NEXT: clz r0, r0 176; CHECK-NOVFP-NEXT: lsr r0, r0, #5 177define i1 @test_fcmp_une(half* %p, half* %q) #0 { 178 %a = load half, half* %p, align 2 179 %b = load half, half* %q, align 2 180 %r = fcmp une half %a, %b 181 ret i1 %r 182} 183 184; CHECK-ALL-LABEL: test_fcmp_ueq: 185; CHECK-FP16: vcvtb.f32.f16 186; CHECK-FP16: vcvtb.f32.f16 187; CHECK-LIBCALL: bl __aeabi_h2f 188; CHECK-LIBCALL: bl __aeabi_h2f 189; CHECK-VFP: vcmp.f32 190; CHECK-NOVFP: bl __aeabi_fcmpeq 191; CHECK-FP16: vmrs APSR_nzcv, fpscr 192; CHECK-LIBCALL: movw{{ne|eq}} 193define i1 @test_fcmp_ueq(half* %p, half* %q) #0 { 194 %a = load half, half* %p, align 2 195 %b = load half, half* %q, align 2 196 %r = fcmp ueq half %a, %b 197 ret i1 %r 198} 199 200; CHECK-ALL-LABEL: test_br_cc: 201; CHECK-FP16: vcvtb.f32.f16 202; CHECK-FP16: vcvtb.f32.f16 203; CHECK-LIBCALL: bl __aeabi_h2f 204; CHECK-LIBCALL: bl __aeabi_h2f 205; CHECK-VFP: vcmp.f32 206; CHECK-NOVFP: bl __aeabi_fcmplt 207; CHECK-FP16: vmrs APSR_nzcv, fpscr 208; CHECK-VFP: strmi 209; CHECK-VFP: strpl 210; CHECK-NOVFP: strne 211; CHECK-NOVFP: streq 212define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 { 213 %a = load half, half* %p, align 2 214 %b = load half, half* %q, align 2 215 %c = fcmp uge half %a, %b 216 br i1 %c, label %then, label %else 217then: 218 store i32 0, i32* %p1 219 ret void 220else: 221 store i32 0, i32* %p2 222 ret void 223} 224 225declare i1 @test_dummy(half* %p) #0 226; CHECK-ALL-LABEL: test_phi: 227; CHECK-FP16: vcvtb.f32.f16 228; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]: 229; CHECK-FP16: vcvtb.f32.f16 230; CHECK-FP16: bl test_dummy 231; CHECK-FP16: bne [[LOOP]] 232; CHECK-FP16: vcvtb.f16.f32 233; CHECK-LIBCALL-VFP: bl __aeabi_h2f 234; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]: 235; CHECK-LIBCALL-VFP: bl __aeabi_h2f 236; CHECK-LIBCALL: bl test_dummy 237; CHECK-LIBCALL: bne [[LOOP]] 238; CHECK-LIBCALL-VFP: bl __aeabi_f2h 239define void @test_phi(half* %p) #0 { 240entry: 241 %a = load half, half* %p 242 br label %loop 243loop: 244 %r = phi half [%a, %entry], [%b, %loop] 245 %b = load half, half* %p 246 %c = call i1 @test_dummy(half* %p) 247 br i1 %c, label %loop, label %return 248return: 249 store half %r, half* %p 250 ret void 251} 252 253; CHECK-ALL-LABEL: test_fptosi_i32: 254; CHECK-FP16: vcvtb.f32.f16 255; CHECK-LIBCALL: bl __aeabi_h2f 256; CHECK-VFP: vcvt.s32.f32 257; CHECK-NOVFP: bl __aeabi_f2iz 258define i32 @test_fptosi_i32(half* %p) #0 { 259 %a = load half, half* %p, align 2 260 %r = fptosi half %a to i32 261 ret i32 %r 262} 263 264; CHECK-ALL-LABEL: test_fptosi_i64: 265; CHECK-FP16: vcvtb.f32.f16 266; CHECK-LIBCALL: bl __aeabi_h2f 267; CHECK-ALL: bl __aeabi_f2lz 268define i64 @test_fptosi_i64(half* %p) #0 { 269 %a = load half, half* %p, align 2 270 %r = fptosi half %a to i64 271 ret i64 %r 272} 273 274; CHECK-ALL-LABEL: test_fptoui_i32: 275; CHECK-FP16: vcvtb.f32.f16 276; CHECK-LIBCALL: bl __aeabi_h2f 277; CHECK-VFP: vcvt.u32.f32 278; CHECK-NOVFP: bl __aeabi_f2uiz 279define i32 @test_fptoui_i32(half* %p) #0 { 280 %a = load half, half* %p, align 2 281 %r = fptoui half %a to i32 282 ret i32 %r 283} 284 285; CHECK-ALL-LABEL: test_fptoui_i64: 286; CHECK-FP16: vcvtb.f32.f16 287; CHECK-LIBCALL: bl __aeabi_h2f 288; CHECK-ALL: bl __aeabi_f2ulz 289define i64 @test_fptoui_i64(half* %p) #0 { 290 %a = load half, half* %p, align 2 291 %r = fptoui half %a to i64 292 ret i64 %r 293} 294 295; CHECK-ALL-LABEL: test_sitofp_i32: 296; CHECK-VFP: vcvt.f32.s32 297; CHECK-NOVFP: bl __aeabi_i2f 298; CHECK-FP16: vcvtb.f16.f32 299; CHECK-LIBCALL: bl __aeabi_f2h 300define void @test_sitofp_i32(i32 %a, half* %p) #0 { 301 %r = sitofp i32 %a to half 302 store half %r, half* %p 303 ret void 304} 305 306; CHECK-ALL-LABEL: test_uitofp_i32: 307; CHECK-VFP: vcvt.f32.u32 308; CHECK-NOVFP: bl __aeabi_ui2f 309; CHECK-FP16: vcvtb.f16.f32 310; CHECK-LIBCALL: bl __aeabi_f2h 311define void @test_uitofp_i32(i32 %a, half* %p) #0 { 312 %r = uitofp i32 %a to half 313 store half %r, half* %p 314 ret void 315} 316 317; CHECK-ALL-LABEL: test_sitofp_i64: 318; CHECK-ALL: bl __aeabi_l2f 319; CHECK-FP16: vcvtb.f16.f32 320; CHECK-LIBCALL: bl __aeabi_f2h 321define void @test_sitofp_i64(i64 %a, half* %p) #0 { 322 %r = sitofp i64 %a to half 323 store half %r, half* %p 324 ret void 325} 326 327; CHECK-ALL-LABEL: test_uitofp_i64: 328; CHECK-ALL: bl __aeabi_ul2f 329; CHECK-FP16: vcvtb.f16.f32 330; CHECK-LIBCALL: bl __aeabi_f2h 331define void @test_uitofp_i64(i64 %a, half* %p) #0 { 332 %r = uitofp i64 %a to half 333 store half %r, half* %p 334 ret void 335} 336 337; CHECK-FP16-LABEL: test_fptrunc_float: 338; CHECK-FP16: vcvtb.f16.f32 339; CHECK-LIBCALL-LABEL: test_fptrunc_float: 340; CHECK-LIBCALL: bl __aeabi_f2h 341define void @test_fptrunc_float(float %f, half* %p) #0 { 342 %a = fptrunc float %f to half 343 store half %a, half* %p 344 ret void 345} 346 347; CHECK-FP16-LABEL: test_fptrunc_double: 348; CHECK-FP16: bl __aeabi_d2h 349; CHECK-LIBCALL-LABEL: test_fptrunc_double: 350; CHECK-LIBCALL: bl __aeabi_d2h 351define void @test_fptrunc_double(double %d, half* %p) #0 { 352 %a = fptrunc double %d to half 353 store half %a, half* %p 354 ret void 355} 356 357; CHECK-FP16-LABEL: test_fpextend_float: 358; CHECK-FP16: vcvtb.f32.f16 359; CHECK-LIBCALL-LABEL: test_fpextend_float: 360; CHECK-LIBCALL: bl __aeabi_h2f 361define float @test_fpextend_float(half* %p) { 362 %a = load half, half* %p, align 2 363 %r = fpext half %a to float 364 ret float %r 365} 366 367; CHECK-FP16-LABEL: test_fpextend_double: 368; CHECK-FP16: vcvtb.f32.f16 369; CHECK-LIBCALL-LABEL: test_fpextend_double: 370; CHECK-LIBCALL: bl __aeabi_h2f 371; CHECK-VFP: vcvt.f64.f32 372; CHECK-NOVFP: bl __aeabi_f2d 373define double @test_fpextend_double(half* %p) { 374 %a = load half, half* %p, align 2 375 %r = fpext half %a to double 376 ret double %r 377} 378 379; CHECK-ALL-LABEL: test_bitcast_halftoi16: 380; CHECK-ALL-NEXT: .fnstart 381; CHECK-ALL-NEXT: ldrh r0, [r0] 382; CHECK-ALL-NEXT: bx lr 383define i16 @test_bitcast_halftoi16(half* %p) #0 { 384 %a = load half, half* %p, align 2 385 %r = bitcast half %a to i16 386 ret i16 %r 387} 388 389; CHECK-ALL-LABEL: test_bitcast_i16tohalf: 390; CHECK-ALL-NEXT: .fnstart 391; CHECK-ALL-NEXT: strh r0, [r1] 392; CHECK-ALL-NEXT: bx lr 393define void @test_bitcast_i16tohalf(i16 %a, half* %p) #0 { 394 %r = bitcast i16 %a to half 395 store half %r, half* %p 396 ret void 397} 398 399declare half @llvm.sqrt.f16(half %a) #0 400declare half @llvm.powi.f16(half %a, i32 %b) #0 401declare half @llvm.sin.f16(half %a) #0 402declare half @llvm.cos.f16(half %a) #0 403declare half @llvm.pow.f16(half %a, half %b) #0 404declare half @llvm.exp.f16(half %a) #0 405declare half @llvm.exp2.f16(half %a) #0 406declare half @llvm.log.f16(half %a) #0 407declare half @llvm.log10.f16(half %a) #0 408declare half @llvm.log2.f16(half %a) #0 409declare half @llvm.fma.f16(half %a, half %b, half %c) #0 410declare half @llvm.fabs.f16(half %a) #0 411declare half @llvm.minnum.f16(half %a, half %b) #0 412declare half @llvm.maxnum.f16(half %a, half %b) #0 413declare half @llvm.copysign.f16(half %a, half %b) #0 414declare half @llvm.floor.f16(half %a) #0 415declare half @llvm.ceil.f16(half %a) #0 416declare half @llvm.trunc.f16(half %a) #0 417declare half @llvm.rint.f16(half %a) #0 418declare half @llvm.nearbyint.f16(half %a) #0 419declare half @llvm.round.f16(half %a) #0 420declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 421 422; CHECK-ALL-LABEL: test_sqrt: 423; CHECK-FP16: vcvtb.f32.f16 424; CHECK-FP16: vsqrt.f32 425; CHECK-FP16: vcvtb.f16.f32 426; CHECK-LIBCALL: bl __aeabi_h2f 427; CHECK-LIBCALL-VFP: vsqrt.f32 428; CHECK-NOVFP: bl sqrtf 429; CHECK-LIBCALL: bl __aeabi_f2h 430define void @test_sqrt(half* %p) #0 { 431 %a = load half, half* %p, align 2 432 %r = call half @llvm.sqrt.f16(half %a) 433 store half %r, half* %p 434 ret void 435} 436 437; CHECK-FP16-LABEL: test_fpowi: 438; CHECK-FP16: vcvtb.f32.f16 439; CHECK-FP16: bl __powisf2 440; CHECK-FP16: vcvtb.f16.f32 441; CHECK-LIBCALL-LABEL: test_fpowi: 442; CHECK-LIBCALL: bl __aeabi_h2f 443; CHECK-LIBCALL: bl __powisf2 444; CHECK-LIBCALL: bl __aeabi_f2h 445define void @test_fpowi(half* %p, i32 %b) #0 { 446 %a = load half, half* %p, align 2 447 %r = call half @llvm.powi.f16(half %a, i32 %b) 448 store half %r, half* %p 449 ret void 450} 451 452; CHECK-FP16-LABEL: test_sin: 453; CHECK-FP16: vcvtb.f32.f16 454; CHECK-FP16: bl sinf 455; CHECK-FP16: vcvtb.f16.f32 456; CHECK-LIBCALL-LABEL: test_sin: 457; CHECK-LIBCALL: bl __aeabi_h2f 458; CHECK-LIBCALL: bl sinf 459; CHECK-LIBCALL: bl __aeabi_f2h 460define void @test_sin(half* %p) #0 { 461 %a = load half, half* %p, align 2 462 %r = call half @llvm.sin.f16(half %a) 463 store half %r, half* %p 464 ret void 465} 466 467; CHECK-FP16-LABEL: test_cos: 468; CHECK-FP16: vcvtb.f32.f16 469; CHECK-FP16: bl cosf 470; CHECK-FP16: vcvtb.f16.f32 471; CHECK-LIBCALL-LABEL: test_cos: 472; CHECK-LIBCALL: bl __aeabi_h2f 473; CHECK-LIBCALL: bl cosf 474; CHECK-LIBCALL: bl __aeabi_f2h 475define void @test_cos(half* %p) #0 { 476 %a = load half, half* %p, align 2 477 %r = call half @llvm.cos.f16(half %a) 478 store half %r, half* %p 479 ret void 480} 481 482; CHECK-FP16-LABEL: test_pow: 483; CHECK-FP16: vcvtb.f32.f16 484; CHECK-FP16: vcvtb.f32.f16 485; CHECK-FP16: bl powf 486; CHECK-FP16: vcvtb.f16.f32 487; CHECK-LIBCALL-LABEL: test_pow: 488; CHECK-LIBCALL: bl __aeabi_h2f 489; CHECK-LIBCALL: bl __aeabi_h2f 490; CHECK-LIBCALL: bl powf 491; CHECK-LIBCALL: bl __aeabi_f2h 492define void @test_pow(half* %p, half* %q) #0 { 493 %a = load half, half* %p, align 2 494 %b = load half, half* %q, align 2 495 %r = call half @llvm.pow.f16(half %a, half %b) 496 store half %r, half* %p 497 ret void 498} 499 500; CHECK-FP16-LABEL: test_cbrt: 501; CHECK-FP16: vcvtb.f32.f16 502; CHECK-FP16: bl powf 503; CHECK-FP16: vcvtb.f16.f32 504; CHECK-LIBCALL-LABEL: test_cbrt: 505; CHECK-LIBCALL: bl __aeabi_h2f 506; CHECK-LIBCALL: bl powf 507; CHECK-LIBCALL: bl __aeabi_f2h 508define void @test_cbrt(half* %p) #0 { 509 %a = load half, half* %p, align 2 510 %r = call half @llvm.pow.f16(half %a, half 0x3FD5540000000000) 511 store half %r, half* %p 512 ret void 513} 514 515; CHECK-FP16-LABEL: test_exp: 516; CHECK-FP16: vcvtb.f32.f16 517; CHECK-FP16: bl expf 518; CHECK-FP16: vcvtb.f16.f32 519; CHECK-LIBCALL-LABEL: test_exp: 520; CHECK-LIBCALL: bl __aeabi_h2f 521; CHECK-LIBCALL: bl expf 522; CHECK-LIBCALL: bl __aeabi_f2h 523define void @test_exp(half* %p) #0 { 524 %a = load half, half* %p, align 2 525 %r = call half @llvm.exp.f16(half %a) 526 store half %r, half* %p 527 ret void 528} 529 530; CHECK-FP16-LABEL: test_exp2: 531; CHECK-FP16: vcvtb.f32.f16 532; CHECK-FP16: bl exp2f 533; CHECK-FP16: vcvtb.f16.f32 534; CHECK-LIBCALL-LABEL: test_exp2: 535; CHECK-LIBCALL: bl __aeabi_h2f 536; CHECK-LIBCALL: bl exp2f 537; CHECK-LIBCALL: bl __aeabi_f2h 538define void @test_exp2(half* %p) #0 { 539 %a = load half, half* %p, align 2 540 %r = call half @llvm.exp2.f16(half %a) 541 store half %r, half* %p 542 ret void 543} 544 545; CHECK-FP16-LABEL: test_log: 546; CHECK-FP16: vcvtb.f32.f16 547; CHECK-FP16: bl logf 548; CHECK-FP16: vcvtb.f16.f32 549; CHECK-LIBCALL-LABEL: test_log: 550; CHECK-LIBCALL: bl __aeabi_h2f 551; CHECK-LIBCALL: bl logf 552; CHECK-LIBCALL: bl __aeabi_f2h 553define void @test_log(half* %p) #0 { 554 %a = load half, half* %p, align 2 555 %r = call half @llvm.log.f16(half %a) 556 store half %r, half* %p 557 ret void 558} 559 560; CHECK-FP16-LABEL: test_log10: 561; CHECK-FP16: vcvtb.f32.f16 562; CHECK-FP16: bl log10f 563; CHECK-FP16: vcvtb.f16.f32 564; CHECK-LIBCALL-LABEL: test_log10: 565; CHECK-LIBCALL: bl __aeabi_h2f 566; CHECK-LIBCALL: bl log10f 567; CHECK-LIBCALL: bl __aeabi_f2h 568define void @test_log10(half* %p) #0 { 569 %a = load half, half* %p, align 2 570 %r = call half @llvm.log10.f16(half %a) 571 store half %r, half* %p 572 ret void 573} 574 575; CHECK-FP16-LABEL: test_log2: 576; CHECK-FP16: vcvtb.f32.f16 577; CHECK-FP16: bl log2f 578; CHECK-FP16: vcvtb.f16.f32 579; CHECK-LIBCALL-LABEL: test_log2: 580; CHECK-LIBCALL: bl __aeabi_h2f 581; CHECK-LIBCALL: bl log2f 582; CHECK-LIBCALL: bl __aeabi_f2h 583define void @test_log2(half* %p) #0 { 584 %a = load half, half* %p, align 2 585 %r = call half @llvm.log2.f16(half %a) 586 store half %r, half* %p 587 ret void 588} 589 590; CHECK-FP16-LABEL: test_fma: 591; CHECK-FP16: vcvtb.f32.f16 592; CHECK-FP16: vcvtb.f32.f16 593; CHECK-FP16: vcvtb.f32.f16 594; CHECK-FP16: bl fmaf 595; CHECK-FP16: vcvtb.f16.f32 596; CHECK-LIBCALL-LABEL: test_fma: 597; CHECK-LIBCALL: bl __aeabi_h2f 598; CHECK-LIBCALL: bl __aeabi_h2f 599; CHECK-LIBCALL: bl __aeabi_h2f 600; CHECK-LIBCALL: bl fmaf 601; CHECK-LIBCALL: bl __aeabi_f2h 602define void @test_fma(half* %p, half* %q, half* %r) #0 { 603 %a = load half, half* %p, align 2 604 %b = load half, half* %q, align 2 605 %c = load half, half* %r, align 2 606 %v = call half @llvm.fma.f16(half %a, half %b, half %c) 607 store half %v, half* %p 608 ret void 609} 610 611; CHECK-FP16-LABEL: test_fabs: 612; CHECK-FP16: vcvtb.f32.f16 613; CHECK-FP16: vabs.f32 614; CHECK-FP16: vcvtb.f16.f32 615; CHECK-LIBCALL-LABEL: test_fabs: 616; CHECK-LIBCALL: bl __aeabi_h2f 617; CHECK-LIBCALL: bic 618; CHECK-LIBCALL: bl __aeabi_f2h 619define void @test_fabs(half* %p) { 620 %a = load half, half* %p, align 2 621 %r = call half @llvm.fabs.f16(half %a) 622 store half %r, half* %p 623 ret void 624} 625 626; CHECK-FP16-LABEL: test_minnum: 627; CHECK-FP16: vcvtb.f32.f16 628; CHECK-FP16: vcvtb.f32.f16 629; CHECK-FP16: bl fminf 630; CHECK-FP16: vcvtb.f16.f32 631; CHECK-LIBCALL-LABEL: test_minnum: 632; CHECK-LIBCALL: bl __aeabi_h2f 633; CHECK-LIBCALL: bl __aeabi_h2f 634; CHECK-LIBCALL: bl fminf 635; CHECK-LIBCALL: bl __aeabi_f2h 636define void @test_minnum(half* %p, half* %q) #0 { 637 %a = load half, half* %p, align 2 638 %b = load half, half* %q, align 2 639 %r = call half @llvm.minnum.f16(half %a, half %b) 640 store half %r, half* %p 641 ret void 642} 643 644; CHECK-FP16-LABEL: test_maxnum: 645; CHECK-FP16: vcvtb.f32.f16 646; CHECK-FP16: vcvtb.f32.f16 647; CHECK-FP16: bl fmaxf 648; CHECK-FP16: vcvtb.f16.f32 649; CHECK-LIBCALL-LABEL: test_maxnum: 650; CHECK-LIBCALL: bl __aeabi_h2f 651; CHECK-LIBCALL: bl __aeabi_h2f 652; CHECK-LIBCALL: bl fmaxf 653; CHECK-LIBCALL: bl __aeabi_f2h 654define void @test_maxnum(half* %p, half* %q) #0 { 655 %a = load half, half* %p, align 2 656 %b = load half, half* %q, align 2 657 %r = call half @llvm.maxnum.f16(half %a, half %b) 658 store half %r, half* %p 659 ret void 660} 661 662; CHECK-ALL-LABEL: test_minimum: 663; CHECK-FP16: vmov.f32 s0, #1.000000e+00 664; CHECK-FP16: vcvtb.f32.f16 665; CHECK-LIBCALL: bl __aeabi_h2f 666; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00 667; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 668; CHECK-VFP: vcmp.f32 669; CHECK-VFP: vmrs 670; CHECK-VFP: vmovlt.f32 671; CHECK-NOVFP: bl __aeabi_fcmpge 672; CHECK-FP16: vcvtb.f16.f32 673; CHECK-LIBCALL: bl __aeabi_f2h 674define void @test_minimum(half* %p) #0 { 675 %a = load half, half* %p, align 2 676 %c = fcmp ult half %a, 1.0 677 %r = select i1 %c, half %a, half 1.0 678 store half %r, half* %p 679 ret void 680} 681 682; CHECK-ALL-LABEL: test_maximum: 683; CHECK-FP16: vmov.f32 s0, #1.000000e+00 684; CHECK-FP16: vcvtb.f32.f16 685; CHECK-LIBCALL: bl __aeabi_h2f 686; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00 687; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216 688; CHECK-VFP: vcmp.f32 689; CHECK-VFP: vmrs 690; CHECK-VFP: vmovhi.f32 691; CHECK-NOVFP: bl __aeabi_fcmple 692; CHECK-FP16: vcvtb.f16.f32 693; CHECK-LIBCALL: bl __aeabi_f2h 694define void @test_maximum(half* %p) #0 { 695 %a = load half, half* %p, align 2 696 %c = fcmp ugt half %a, 1.0 697 %r = select i1 %c, half %a, half 1.0 698 store half %r, half* %p 699 ret void 700} 701 702; CHECK-FP16-LABEL: test_copysign: 703; CHECK-FP16: ldrh r2, [r0] 704; CHECK-FP16-NEXT: vmov.i32 d16, #0x80000000 705; CHECK-FP16-NEXT: ldrh r1, [r1] 706; CHECK-FP16-NEXT: vmov s0, r2 707; CHECK-FP16-NEXT: vmov s2, r1 708; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0 709; CHECK-FP16-NEXT: vcvtb.f32.f16 s2, s2 710; CHECK-FP16-NEXT: vbit d0, d1, d16 711; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0 712; CHECK-FP16-NEXT: vmov r1, s0 713; CHECK-FP16-NEXT: strh r1, [r0] 714; CHECK-FP16-NEXT: bx lr 715 716; CHECK-LIBCALL-LABEL: test_copysign: 717; CHECK-LIBCALL-VFP: .fnstart 718; CHECK-LIBCALL-VFP-NEXT: .save {r4, r5, r11, lr} 719; CHECK-LIBCALL-VFP-NEXT: push {r4, r5, r11, lr} 720; CHECK-LIBCALL-VFP-NEXT: .vsave {d8, d9} 721; CHECK-LIBCALL-VFP-NEXT: vpush {d8, d9} 722; CHECK-LIBCALL-VFP-NEXT: mov r5, r0 723; CHECK-LIBCALL-VFP-NEXT: ldrh r0, [r0] 724; CHECK-LIBCALL-VFP-NEXT: mov r4, r1 725; CHECK-LIBCALL: bl __aeabi_h2f 726; CHECK-LIBCALL-VFP: ldrh r1, [r4] 727; CHECK-LIBCALL-VFP-NEXT: vmov s18, r0 728; CHECK-LIBCALL-VFP-NEXT: vmov.i32 d8, #0x80000000 729; CHECK-LIBCALL-VFP-NEXT: mov r0, r1 730; CHECK-LIBCALL: bl __aeabi_h2f 731; CHECK-LIBCALL-VFP: vmov s0, r0 732; CHECK-LIBCALL-VFP-NEXT: vbif d0, d9, d8 733; CHECK-LIBCALL-VFP-NEXT: vmov r0, s0 734; CHECK-LIBCALL: bl __aeabi_f2h 735; CHECK-LIBCALL-VFP: strh r0, [r5] 736; CHECK-LIBCALL-VFP-NEXT: vpop {d8, d9} 737; CHECK-LIBCALL-VFP-NEXT: pop {r4, r5, r11, pc} 738; CHECK-NOVFP: and 739; CHECK-NOVFP: bic 740; CHECK-NOVFP: orr 741define void @test_copysign(half* %p, half* %q) #0 { 742 %a = load half, half* %p, align 2 743 %b = load half, half* %q, align 2 744 %r = call half @llvm.copysign.f16(half %a, half %b) 745 store half %r, half* %p 746 ret void 747} 748 749; CHECK-FP16-LABEL: test_floor: 750; CHECK-FP16: vcvtb.f32.f16 751; CHECK-FP16: bl floorf 752; CHECK-FP16: vcvtb.f16.f32 753; CHECK-LIBCALL-LABEL: test_floor: 754; CHECK-LIBCALL: bl __aeabi_h2f 755; CHECK-LIBCALL: bl floorf 756; CHECK-LIBCALL: bl __aeabi_f2h 757define void @test_floor(half* %p) { 758 %a = load half, half* %p, align 2 759 %r = call half @llvm.floor.f16(half %a) 760 store half %r, half* %p 761 ret void 762} 763 764; CHECK-FP16-LABEL: test_ceil: 765; CHECK-FP16: vcvtb.f32.f16 766; CHECK-FP16: bl ceilf 767; CHECK-FP16: vcvtb.f16.f32 768; CHECK-LIBCALL-LABEL: test_ceil: 769; CHECK-LIBCALL: bl __aeabi_h2f 770; CHECK-LIBCALL: bl ceilf 771; CHECK-LIBCALL: bl __aeabi_f2h 772define void @test_ceil(half* %p) { 773 %a = load half, half* %p, align 2 774 %r = call half @llvm.ceil.f16(half %a) 775 store half %r, half* %p 776 ret void 777} 778 779; CHECK-FP16-LABEL: test_trunc: 780; CHECK-FP16: vcvtb.f32.f16 781; CHECK-FP16: bl truncf 782; CHECK-FP16: vcvtb.f16.f32 783; CHECK-LIBCALL-LABEL: test_trunc: 784; CHECK-LIBCALL: bl __aeabi_h2f 785; CHECK-LIBCALL: bl truncf 786; CHECK-LIBCALL: bl __aeabi_f2h 787define void @test_trunc(half* %p) { 788 %a = load half, half* %p, align 2 789 %r = call half @llvm.trunc.f16(half %a) 790 store half %r, half* %p 791 ret void 792} 793 794; CHECK-FP16-LABEL: test_rint: 795; CHECK-FP16: vcvtb.f32.f16 796; CHECK-FP16: bl rintf 797; CHECK-FP16: vcvtb.f16.f32 798; CHECK-LIBCALL-LABEL: test_rint: 799; CHECK-LIBCALL: bl __aeabi_h2f 800; CHECK-LIBCALL: bl rintf 801; CHECK-LIBCALL: bl __aeabi_f2h 802define void @test_rint(half* %p) { 803 %a = load half, half* %p, align 2 804 %r = call half @llvm.rint.f16(half %a) 805 store half %r, half* %p 806 ret void 807} 808 809; CHECK-FP16-LABEL: test_nearbyint: 810; CHECK-FP16: vcvtb.f32.f16 811; CHECK-FP16: bl nearbyintf 812; CHECK-FP16: vcvtb.f16.f32 813; CHECK-LIBCALL-LABEL: test_nearbyint: 814; CHECK-LIBCALL: bl __aeabi_h2f 815; CHECK-LIBCALL: bl nearbyintf 816; CHECK-LIBCALL: bl __aeabi_f2h 817define void @test_nearbyint(half* %p) { 818 %a = load half, half* %p, align 2 819 %r = call half @llvm.nearbyint.f16(half %a) 820 store half %r, half* %p 821 ret void 822} 823 824; CHECK-FP16-LABEL: test_round: 825; CHECK-FP16: vcvtb.f32.f16 826; CHECK-FP16: bl roundf 827; CHECK-FP16: vcvtb.f16.f32 828; CHECK-LIBCALL-LABEL: test_round: 829; CHECK-LIBCALL: bl __aeabi_h2f 830; CHECK-LIBCALL: bl roundf 831; CHECK-LIBCALL: bl __aeabi_f2h 832define void @test_round(half* %p) { 833 %a = load half, half* %p, align 2 834 %r = call half @llvm.round.f16(half %a) 835 store half %r, half* %p 836 ret void 837} 838 839; CHECK-FP16-LABEL: test_fmuladd: 840; CHECK-FP16: vcvtb.f32.f16 841; CHECK-FP16: vcvtb.f32.f16 842; CHECK-FP16: vcvtb.f32.f16 843; CHECK-FP16: vmla.f32 844; CHECK-FP16: vcvtb.f16.f32 845; CHECK-LIBCALL-LABEL: test_fmuladd: 846; CHECK-LIBCALL: bl __aeabi_h2f 847; CHECK-LIBCALL: bl __aeabi_h2f 848; CHECK-LIBCALL: bl __aeabi_h2f 849; CHECK-LIBCALL-VFP: vmla.f32 850; CHECK-NOVFP: bl __aeabi_fmul 851; CHECK-LIBCALL: bl __aeabi_f2h 852define void @test_fmuladd(half* %p, half* %q, half* %r) #0 { 853 %a = load half, half* %p, align 2 854 %b = load half, half* %q, align 2 855 %c = load half, half* %r, align 2 856 %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c) 857 store half %v, half* %p 858 ret void 859} 860 861; f16 vectors are not legal in the backend. Vector elements are not assigned 862; to the register, but are stored in the stack instead. Hence insertelement 863; and extractelement have these extra loads and stores. 864 865; CHECK-ALL-LABEL: test_insertelement: 866; CHECK-ALL: sub sp, sp, #8 867 868; CHECK-VFP: and 869; CHECK-VFP: mov 870; CHECK-VFP: ldrd 871; CHECK-VFP: orr 872; CHECK-VFP: ldrh 873; CHECK-VFP: stm 874; CHECK-VFP: strh 875; CHECK-VFP: ldm 876; CHECK-VFP: stm 877 878; CHECK-NOVFP: ldrh 879; CHECK-NOVFP: ldrh 880; CHECK-NOVFP: ldrh 881; CHECK-NOVFP: ldrh 882; CHECK-NOVFP-DAG: strh 883; CHECK-NOVFP-DAG: strh 884; CHECK-NOVFP-DAG: mov 885; CHECK-NOVFP-DAG: ldrh 886; CHECK-NOVFP-DAG: orr 887; CHECK-NOVFP-DAG: strh 888; CHECK-NOVFP-DAG: strh 889; CHECK-NOVFP-DAG: strh 890; CHECK-NOVFP-DAG: ldrh 891; CHECK-NOVFP-DAG: ldrh 892; CHECK-NOVFP-DAG: ldrh 893; CHECK-NOVFP-DAG: strh 894; CHECK-NOVFP-DAG: strh 895; CHECK-NOVFP-DAG: strh 896; CHECK-NOVFP-DAG: strh 897 898; CHECK-ALL: add sp, sp, #8 899define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 { 900 %a = load half, half* %p, align 2 901 %b = load <4 x half>, <4 x half>* %q, align 8 902 %c = insertelement <4 x half> %b, half %a, i32 %i 903 store <4 x half> %c, <4 x half>* %q 904 ret void 905} 906 907; CHECK-ALL-LABEL: test_extractelement: 908; CHECK-VFP: push {{{.*}}, lr} 909; CHECK-VFP: sub sp, sp, #8 910; CHECK-VFP: ldrd 911; CHECK-VFP: mov 912; CHECK-VFP: orr 913; CHECK-VFP: ldrh 914; CHECK-VFP: strh 915; CHECK-VFP: add sp, sp, #8 916; CHECK-VFP: pop {{{.*}}, pc} 917; CHECK-NOVFP: ldrh 918; CHECK-NOVFP: strh 919; CHECK-NOVFP: ldrh 920; CHECK-NOVFP: strh 921; CHECK-NOVFP: ldrh 922; CHECK-NOVFP: strh 923; CHECK-NOVFP: ldrh 924; CHECK-NOVFP: strh 925; CHECK-NOVFP: ldrh 926define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 { 927 %a = load <4 x half>, <4 x half>* %q, align 8 928 %b = extractelement <4 x half> %a, i32 %i 929 store half %b, half* %p 930 ret void 931} 932 933; test struct operations 934 935%struct.dummy = type { i32, half } 936 937; CHECK-ALL-LABEL: test_insertvalue: 938; CHECK-ALL-DAG: ldr 939; CHECK-ALL-DAG: ldrh 940; CHECK-ALL-DAG: strh 941; CHECK-ALL-DAG: str 942define void @test_insertvalue(%struct.dummy* %p, half* %q) { 943 %a = load %struct.dummy, %struct.dummy* %p 944 %b = load half, half* %q 945 %c = insertvalue %struct.dummy %a, half %b, 1 946 store %struct.dummy %c, %struct.dummy* %p 947 ret void 948} 949 950; CHECK-ALL-LABEL: test_extractvalue: 951; CHECK-ALL: .fnstart 952; CHECK-ALL: ldrh 953; CHECK-ALL: strh 954define void @test_extractvalue(%struct.dummy* %p, half* %q) { 955 %a = load %struct.dummy, %struct.dummy* %p 956 %b = extractvalue %struct.dummy %a, 1 957 store half %b, half* %q 958 ret void 959} 960 961; CHECK-ALL-LABEL: test_struct_return: 962; CHECK-VFP-LIBCALL: bl __aeabi_h2f 963; CHECK-NOVFP-DAG: ldr 964; CHECK-NOVFP-DAG: ldrh 965define %struct.dummy @test_struct_return(%struct.dummy* %p) { 966 %a = load %struct.dummy, %struct.dummy* %p 967 ret %struct.dummy %a 968} 969 970; CHECK-ALL-LABEL: test_struct_arg: 971; CHECK-ALL-NEXT: .fnstart 972; CHECK-NOVFP-NEXT: mov r0, r1 973; CHECK-ALL-NEXT: bx lr 974define half @test_struct_arg(%struct.dummy %p) { 975 %a = extractvalue %struct.dummy %p, 1 976 ret half %a 977} 978 979; CHECK-LABEL: test_uitofp_i32_fadd: 980; CHECK-VFP-DAG: vcvt.f32.u32 981; CHECK-NOVFP-DAG: bl __aeabi_ui2f 982 983; CHECK-FP16-DAG: vcvtb.f16.f32 984; CHECK-FP16-DAG: vcvtb.f32.f16 985; CHECK-LIBCALL-DAG: bl __aeabi_h2f 986; CHECK-LIBCALL-DAG: bl __aeabi_h2f 987 988; CHECK-VFP-DAG: vadd.f32 989; CHECK-NOVFP-DAG: bl __aeabi_fadd 990 991; CHECK-FP16-DAG: vcvtb.f16.f32 992; CHECK-LIBCALL-DAG: bl __aeabi_f2h 993define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { 994 %c = uitofp i32 %a to half 995 %r = fadd half %b, %c 996 ret half %r 997} 998 999; CHECK-LABEL: test_sitofp_i32_fadd: 1000; CHECK-VFP-DAG: vcvt.f32.s32 1001; CHECK-NOVFP-DAG: bl __aeabi_i2f 1002 1003; CHECK-FP16-DAG: vcvtb.f16.f32 1004; CHECK-FP16-DAG: vcvtb.f32.f16 1005; CHECK-LIBCALL-DAG: bl __aeabi_h2f 1006; CHECK-LIBCALL-DAG: bl __aeabi_h2f 1007 1008; CHECK-VFP-DAG: vadd.f32 1009; CHECK-NOVFP-DAG: bl __aeabi_fadd 1010 1011; CHECK-FP16-DAG: vcvtb.f16.f32 1012; CHECK-LIBCALL-DAG: bl __aeabi_f2h 1013define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { 1014 %c = sitofp i32 %a to half 1015 %r = fadd half %b, %c 1016 ret half %r 1017} 1018 1019attributes #0 = { nounwind } 1020