1; SOFT: 2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 4; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 5; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -float-abi=soft -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT 6 7; SOFTFP: 8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 9; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32 10; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 11 12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 13; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32 14; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16 15 16; Test fast-isel 17; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 18; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD 19 20; HARD: 21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 22; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 23; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 24 25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3 26; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16 27; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,fp64 | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16 28 29; FP-CONTRACT=FAST 30; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 31; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST 32 33; TODO: we can't pass half-precision arguments as "half" types yet. We do 34; that for the time being by passing "float %f.coerce" and the necessary 35; bitconverts/truncates. But when we can pass half types, we do want to use 36; and test that here. 37 38define float @RetValBug(float %A.coerce) { 39entry: 40 ret float undef 41; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have 42; any operands) when FullFP16 is enabled. 43; 44; CHECK-LABEL: RetValBug: 45; CHECK-HARDFP-FULLFP16: {{.*}} lr 46} 47 48; 2. VADD 49define float @Add(float %a.coerce, float %b.coerce) { 50entry: 51 %0 = bitcast float %a.coerce to i32 52 %tmp.0.extract.trunc = trunc i32 %0 to i16 53 %1 = bitcast i16 %tmp.0.extract.trunc to half 54 %2 = bitcast float %b.coerce to i32 55 %tmp1.0.extract.trunc = trunc i32 %2 to i16 56 %3 = bitcast i16 %tmp1.0.extract.trunc to half 57 %add = fadd half %1, %3 58 %4 = bitcast half %add to i16 59 %tmp4.0.insert.ext = zext i16 %4 to i32 60 %5 = bitcast i32 %tmp4.0.insert.ext to float 61 ret float %5 62 63; CHECK-LABEL: Add: 64 65; CHECK-SOFT: bl __aeabi_h2f 66; CHECK-SOFT: bl __aeabi_h2f 67; CHECK-SOFT: bl __aeabi_fadd 68; CHECK-SOFT: bl __aeabi_f2h 69 70; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 71; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 72; CHECK-SOFTFP-VFP3: vadd.f32 73; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 74 75; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 76; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 77; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] 78; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] 79; CHECK-SOFTFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 80; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 81; CHECK-SOFTFP-FP16: vmov r0, s0 82 83; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 84; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 85; CHECK-SOFTFP-FULLFP16: vadd.f16 [[S0]], [[S2]], [[S0]] 86; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 87 88; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 89; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 90; CHECK-HARDFP-VFP3: bl __aeabi_h2f 91; CHECK-HARDFP-VFP3: bl __aeabi_h2f 92; CHECK-HARDFP-VFP3: vadd.f32 93; CHECK-HARDFP-VFP3: bl __aeabi_f2h 94; CHECK-HARDFP-VFP3: vmov s0, r0 95 96; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 97; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 98; CHECK-HARDFP-FP16: vadd.f32 [[S0]], [[S0]], [[S2]] 99; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 100 101; CHECK-HARDFP-FULLFP16: vadd.f16 s0, s0, s1 102} 103 104; 3. VCMP 105define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) { 106entry: 107 %0 = bitcast float %F.coerce to i32 108 %tmp.0.extract.trunc = trunc i32 %0 to i16 109 %1 = bitcast i16 %tmp.0.extract.trunc to half 110 %2 = bitcast float %G.coerce to i32 111 %tmp1.0.extract.trunc = trunc i32 %2 to i16 112 %3 = bitcast i16 %tmp1.0.extract.trunc to half 113 %cmp = fcmp une half %1, %3 114 ret i1 %cmp 115 116; CHECK-LABEL: VCMP1: 117 118; CHECK-SOFT: bl __aeabi_fcmpeq 119 120; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 121; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 122; CHECK-SOFTFP-VFP3: vcmp.f32 s{{.}}, s{{.}} 123 124; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 125; CHECK-SOFTFP-FP16: vcvtb.f32.f16 s{{.}}, s{{.}} 126; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} 127 128; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 129; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 130; CHECK-SOFTFP-FULLFP16: vcmp.f16 [[S2]], [[S0]] 131 132; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r0 133; CHECK-HARDFP-FULLFP16-NOT: vmov.f16 s{{.}}, r1 134; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s1 135} 136 137; Check VCMPZH 138define zeroext i1 @VCMP2(float %F.coerce) { 139entry: 140 %0 = bitcast float %F.coerce to i32 141 %tmp.0.extract.trunc = trunc i32 %0 to i16 142 %1 = bitcast i16 %tmp.0.extract.trunc to half 143 %cmp = fcmp une half %1, 0.000000e+00 144 ret i1 %cmp 145 146; CHECK-LABEL: VCMP2: 147 148; CHECK-SOFT: bl __aeabi_fcmpeq 149; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 150; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 151; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 152} 153 154; 4. VCMPE 155define i32 @VCMPE1(float %F.coerce) { 156entry: 157 %0 = bitcast float %F.coerce to i32 158 %tmp.0.extract.trunc = trunc i32 %0 to i16 159 %1 = bitcast i16 %tmp.0.extract.trunc to half 160 %tmp = fcmp olt half %1, 0.000000e+00 161 %tmp1 = zext i1 %tmp to i32 162 ret i32 %tmp1 163 164; CHECK-LABEL: VCMPE1: 165 166; CHECK-SOFT: bl __aeabi_fcmplt 167; CHECK-SOFTFP-FP16: vcmp.f32 s0, #0 168; CHECK-SOFTFP-FULLFP16: vcmp.f16 s0, #0 169; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, #0 170} 171 172define i32 @VCMPE2(float %F.coerce, float %G.coerce) { 173entry: 174 %0 = bitcast float %F.coerce to i32 175 %tmp.0.extract.trunc = trunc i32 %0 to i16 176 %1 = bitcast i16 %tmp.0.extract.trunc to half 177 %2 = bitcast float %G.coerce to i32 178 %tmp.1.extract.trunc = trunc i32 %2 to i16 179 %3 = bitcast i16 %tmp.1.extract.trunc to half 180 %tmp = fcmp olt half %1, %3 181 %tmp1 = zext i1 %tmp to i32 182 ret i32 %tmp1 183 184; CHECK-LABEL: VCMPE2: 185 186; CHECK-SOFT: bl __aeabi_fcmplt 187; CHECK-SOFTFP-FP16: vcmp.f32 s{{.}}, s{{.}} 188; CHECK-SOFTFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} 189; CHECK-HARDFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} 190} 191 192; Test lowering of BR_CC 193define hidden i32 @VCMPBRCC() { 194entry: 195 %f = alloca half, align 2 196 br label %for.cond 197 198for.cond: 199 %0 = load half, half* %f, align 2 200 %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800 201 br i1 %cmp, label %for.body, label %for.end 202 203for.body: 204 ret i32 1 205 206for.end: 207 ret i32 0 208 209; CHECK-LABEL: VCMPBRCC: 210 211; CHECK-SOFT: bl __aeabi_fcmp{{gt|le}} 212; CHECK-SOFT: cmp r0, #{{0|1}} 213 214; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] 215; CHECK-SOFTFP-FP16: vcmp.f32 [[S2]], s0 216; CHECK-SOFTFP-FP16: vmrs APSR_nzcv, fpscr 217 218; CHECK-SOFTFP-FULLFP16: vcmp.f16 s{{.}}, s{{.}} 219; CHECK-SOFTFP-FULLFP16: vmrs APSR_nzcv, fpscr 220} 221 222; 5. VCVT (between floating-point and fixed-point) 223; Only assembly/disassembly support 224 225; 6. VCVT (between floating-point and integer, both directions) 226define i32 @fptosi(i32 %A.coerce) { 227entry: 228 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 229 %0 = bitcast i16 %tmp.0.extract.trunc to half 230 %conv = fptosi half %0 to i32 231 ret i32 %conv 232 233; CHECK-LABEL: fptosi: 234 235; CHECK-HARDFP-FULLFP16: vmov.f16 s0, r0 236; CHECK-HARDFP-FULLFP16-NEXT: vcvt.s32.f16 s0, s0 237; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 238} 239 240define i32 @fptoui(i32 %A.coerce) { 241entry: 242 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 243 %0 = bitcast i16 %tmp.0.extract.trunc to half 244 %conv = fptoui half %0 to i32 245 ret i32 %conv 246 247; CHECK-HARDFP-FULLFP16: vcvt.u32.f16 s0, s0 248; CHECK-HARDFP-FULLFP16-NEXT: vmov r0, s0 249} 250 251define float @UintToH(i32 %a, i32 %b) { 252entry: 253 %0 = uitofp i32 %a to half 254 %1 = bitcast half %0 to i16 255 %tmp0.insert.ext = zext i16 %1 to i32 256 %2 = bitcast i32 %tmp0.insert.ext to float 257 ret float %2 258 259; CHECK-LABEL: UintToH: 260 261; CHECK-HARDFP-FULLFP16: vmov s0, r0 262; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.u32 s0, s0 263} 264 265define float @SintToH(i32 %a, i32 %b) { 266entry: 267 %0 = sitofp i32 %a to half 268 %1 = bitcast half %0 to i16 269 %tmp0.insert.ext = zext i16 %1 to i32 270 %2 = bitcast i32 %tmp0.insert.ext to float 271 ret float %2 272 273; CHECK-LABEL: SintToH: 274 275; CHECK-HARDFP-FULLFP16: vmov s0, r0 276; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 277} 278 279define i32 @f2h(float %f) { 280entry: 281 %conv = fptrunc float %f to half 282 %0 = bitcast half %conv to i16 283 %tmp.0.insert.ext = zext i16 %0 to i32 284 ret i32 %tmp.0.insert.ext 285 286; CHECK-LABEL: f2h: 287; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0 288} 289 290define float @h2f(i32 %h.coerce) { 291entry: 292 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 293 %0 = bitcast i16 %tmp.0.extract.trunc to half 294 %conv = fpext half %0 to float 295 ret float %conv 296 297; CHECK-LABEL: h2f: 298; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0 299} 300 301 302define double @h2d(i32 %h.coerce) { 303entry: 304 %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 305 %0 = bitcast i16 %tmp.0.extract.trunc to half 306 %conv = fpext half %0 to double 307 ret double %conv 308 309; CHECK-LABEL: h2d: 310; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}} 311} 312 313define i32 @d2h(double %d) { 314entry: 315 %conv = fptrunc double %d to half 316 %0 = bitcast half %conv to i16 317 %tmp.0.insert.ext = zext i16 %0 to i32 318 ret i32 %tmp.0.insert.ext 319 320; CHECK-LABEL: d2h: 321; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}} 322} 323 324; TODO: 325; 7. VCVTA 326; 8. VCVTM 327; 9. VCVTN 328; 10. VCVTP 329; 11. VCVTR 330 331; 12. VDIV 332define float @Div(float %a.coerce, float %b.coerce) { 333entry: 334 %0 = bitcast float %a.coerce to i32 335 %tmp.0.extract.trunc = trunc i32 %0 to i16 336 %1 = bitcast i16 %tmp.0.extract.trunc to half 337 %2 = bitcast float %b.coerce to i32 338 %tmp1.0.extract.trunc = trunc i32 %2 to i16 339 %3 = bitcast i16 %tmp1.0.extract.trunc to half 340 %add = fdiv half %1, %3 341 %4 = bitcast half %add to i16 342 %tmp4.0.insert.ext = zext i16 %4 to i32 343 %5 = bitcast i32 %tmp4.0.insert.ext to float 344 ret float %5 345 346; CHECK-LABEL: Div: 347 348; CHECK-SOFT: bl __aeabi_h2f 349; CHECK-SOFT: bl __aeabi_h2f 350; CHECK-SOFT: bl __aeabi_fdiv 351; CHECK-SOFT: bl __aeabi_f2h 352 353; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 354; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 355; CHECK-SOFTFP-VFP3: vdiv.f32 356; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 357 358; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 359; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 360; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] 361; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] 362; CHECK-SOFTFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 363; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 364; CHECK-SOFTFP-FP16: vmov r0, s0 365 366; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 367; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 368; CHECK-SOFTFP-FULLFP16: vdiv.f16 [[S0]], [[S2]], [[S0]] 369; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 370 371; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 372; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 373; CHECK-HARDFP-VFP3: bl __aeabi_h2f 374; CHECK-HARDFP-VFP3: bl __aeabi_h2f 375; CHECK-HARDFP-VFP3: vdiv.f32 376; CHECK-HARDFP-VFP3: bl __aeabi_f2h 377; CHECK-HARDFP-VFP3: vmov s0, r0 378 379; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 380; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 381; CHECK-HARDFP-FP16: vdiv.f32 [[S0]], [[S0]], [[S2]] 382; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 383 384; CHECK-HARDFP-FULLFP16: vdiv.f16 s0, s0, s1 385} 386 387; 13. VFMA 388define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) { 389entry: 390 %0 = bitcast float %a.coerce to i32 391 %tmp.0.extract.trunc = trunc i32 %0 to i16 392 %1 = bitcast i16 %tmp.0.extract.trunc to half 393 %2 = bitcast float %b.coerce to i32 394 %tmp1.0.extract.trunc = trunc i32 %2 to i16 395 %3 = bitcast i16 %tmp1.0.extract.trunc to half 396 %4 = bitcast float %c.coerce to i32 397 %tmp2.0.extract.trunc = trunc i32 %4 to i16 398 %5 = bitcast i16 %tmp2.0.extract.trunc to half 399 %mul = fmul half %1, %3 400 %add = fadd half %mul, %5 401 %6 = bitcast half %add to i16 402 %tmp4.0.insert.ext = zext i16 %6 to i32 403 %7 = bitcast i32 %tmp4.0.insert.ext to float 404 ret float %7 405 406; CHECK-LABEL: VFMA: 407; CHECK-HARDFP-FULLFP16-FAST: vfma.f16 s2, s0, s1 408; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 409} 410 411; 14. VFMS 412define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) { 413entry: 414 %0 = bitcast float %a.coerce to i32 415 %tmp.0.extract.trunc = trunc i32 %0 to i16 416 %1 = bitcast i16 %tmp.0.extract.trunc to half 417 %2 = bitcast float %b.coerce to i32 418 %tmp1.0.extract.trunc = trunc i32 %2 to i16 419 %3 = bitcast i16 %tmp1.0.extract.trunc to half 420 %4 = bitcast float %c.coerce to i32 421 %tmp2.0.extract.trunc = trunc i32 %4 to i16 422 %5 = bitcast i16 %tmp2.0.extract.trunc to half 423 %mul = fmul half %1, %3 424 %sub = fsub half %5, %mul 425 %6 = bitcast half %sub to i16 426 %tmp4.0.insert.ext = zext i16 %6 to i32 427 %7 = bitcast i32 %tmp4.0.insert.ext to float 428 ret float %7 429 430; CHECK-LABEL: VFMS: 431; CHECK-HARDFP-FULLFP16-FAST: vfms.f16 s2, s0, s1 432; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 433} 434 435; 15. VFNMA 436define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) { 437entry: 438 %0 = bitcast float %a.coerce to i32 439 %tmp.0.extract.trunc = trunc i32 %0 to i16 440 %1 = bitcast i16 %tmp.0.extract.trunc to half 441 %2 = bitcast float %b.coerce to i32 442 %tmp1.0.extract.trunc = trunc i32 %2 to i16 443 %3 = bitcast i16 %tmp1.0.extract.trunc to half 444 %4 = bitcast float %c.coerce to i32 445 %tmp2.0.extract.trunc = trunc i32 %4 to i16 446 %5 = bitcast i16 %tmp2.0.extract.trunc to half 447 %mul = fmul half %1, %3 448 %sub = fsub half -0.0, %mul 449 %sub2 = fsub half %sub, %5 450 %6 = bitcast half %sub2 to i16 451 %tmp4.0.insert.ext = zext i16 %6 to i32 452 %7 = bitcast i32 %tmp4.0.insert.ext to float 453 ret float %7 454 455; CHECK-LABEL: VFNMA: 456; CHECK-HARDFP-FULLFP16-FAST: vfnma.f16 s2, s0, s1 457; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 458} 459 460; 16. VFNMS 461define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) { 462entry: 463 %0 = bitcast float %a.coerce to i32 464 %tmp.0.extract.trunc = trunc i32 %0 to i16 465 %1 = bitcast i16 %tmp.0.extract.trunc to half 466 %2 = bitcast float %b.coerce to i32 467 %tmp1.0.extract.trunc = trunc i32 %2 to i16 468 %3 = bitcast i16 %tmp1.0.extract.trunc to half 469 %4 = bitcast float %c.coerce to i32 470 %tmp2.0.extract.trunc = trunc i32 %4 to i16 471 %5 = bitcast i16 %tmp2.0.extract.trunc to half 472 %mul = fmul half %1, %3 473 %sub2 = fsub half %mul, %5 474 %6 = bitcast half %sub2 to i16 475 %tmp4.0.insert.ext = zext i16 %6 to i32 476 %7 = bitcast i32 %tmp4.0.insert.ext to float 477 ret float %7 478 479; CHECK-LABEL: VFNMS: 480; CHECK-HARDFP-FULLFP16-FAST: vfnms.f16 s2, s0, s1 481; CHECK-HARDFP-FULLFP16-FAST-NEXT: vmov.f32 s0, s2 482} 483 484; 17. VMAXNM 485; 18. VMINNM 486; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll 487 488; 19. VMLA 489define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 490entry: 491 %0 = bitcast float %a.coerce to i32 492 %tmp.0.extract.trunc = trunc i32 %0 to i16 493 %1 = bitcast i16 %tmp.0.extract.trunc to half 494 %2 = bitcast float %b.coerce to i32 495 %tmp1.0.extract.trunc = trunc i32 %2 to i16 496 %3 = bitcast i16 %tmp1.0.extract.trunc to half 497 %4 = bitcast float %c.coerce to i32 498 %tmp2.0.extract.trunc = trunc i32 %4 to i16 499 %5 = bitcast i16 %tmp2.0.extract.trunc to half 500 %mul = fmul half %1, %3 501 %add = fadd half %5, %mul 502 %6 = bitcast half %add to i16 503 %tmp4.0.insert.ext = zext i16 %6 to i32 504 %7 = bitcast i32 %tmp4.0.insert.ext to float 505 ret float %7 506 507; CHECK-LABEL: VMLA: 508; CHECK-HARDFP-FULLFP16: vmla.f16 s2, s0, s1 509; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 510} 511 512; 20. VMLS 513define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 514entry: 515 %0 = bitcast float %a.coerce to i32 516 %tmp.0.extract.trunc = trunc i32 %0 to i16 517 %1 = bitcast i16 %tmp.0.extract.trunc to half 518 %2 = bitcast float %b.coerce to i32 519 %tmp1.0.extract.trunc = trunc i32 %2 to i16 520 %3 = bitcast i16 %tmp1.0.extract.trunc to half 521 %4 = bitcast float %c.coerce to i32 522 %tmp2.0.extract.trunc = trunc i32 %4 to i16 523 %5 = bitcast i16 %tmp2.0.extract.trunc to half 524 %mul = fmul half %1, %3 525 %add = fsub half %5, %mul 526 %6 = bitcast half %add to i16 527 %tmp4.0.insert.ext = zext i16 %6 to i32 528 %7 = bitcast i32 %tmp4.0.insert.ext to float 529 ret float %7 530 531; CHECK-LABEL: VMLS: 532; CHECK-HARDFP-FULLFP16: vmls.f16 s2, s0, s1 533; CHECK-HARDFP-FULLFP16-NEXT: vmov.f32 s0, s2 534} 535 536; TODO: fix immediates. 537; 21. VMOV (between general-purpose register and half-precision register) 538 539; 22. VMOV (immediate) 540define i32 @movi(i32 %a.coerce) { 541entry: 542 %tmp.0.extract.trunc = trunc i32 %a.coerce to i16 543 %0 = bitcast i16 %tmp.0.extract.trunc to half 544 %add = fadd half %0, 0xHC000 545 %1 = bitcast half %add to i16 546 %tmp2.0.insert.ext = zext i16 %1 to i32 547 ret i32 %tmp2.0.insert.ext 548 549; CHECK-LABEL: movi: 550; CHECK-HARDFP-FULLFP16: vmov.f16 s0, #-2.000000e+00 551} 552 553; 23. VMUL 554define float @Mul(float %a.coerce, float %b.coerce) { 555entry: 556 %0 = bitcast float %a.coerce to i32 557 %tmp.0.extract.trunc = trunc i32 %0 to i16 558 %1 = bitcast i16 %tmp.0.extract.trunc to half 559 %2 = bitcast float %b.coerce to i32 560 %tmp1.0.extract.trunc = trunc i32 %2 to i16 561 %3 = bitcast i16 %tmp1.0.extract.trunc to half 562 %add = fmul half %1, %3 563 %4 = bitcast half %add to i16 564 %tmp4.0.insert.ext = zext i16 %4 to i32 565 %5 = bitcast i32 %tmp4.0.insert.ext to float 566 ret float %5 567 568; CHECK-LABEL: Mul: 569 570; CHECK-SOFT: bl __aeabi_h2f 571; CHECK-SOFT: bl __aeabi_h2f 572; CHECK-SOFT: bl __aeabi_fmul 573; CHECK-SOFT: bl __aeabi_f2h 574 575; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 576; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 577; CHECK-SOFTFP-VFP3: vmul.f32 578; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 579 580; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 581; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 582; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] 583; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] 584; CHECK-SOFTFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 585; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 586; CHECK-SOFTFP-FP16: vmov r0, s0 587 588; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 589; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 590; CHECK-SOFTFP-FULLFP16: vmul.f16 [[S0]], [[S2]], [[S0]] 591; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 592 593; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 594; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 595; CHECK-HARDFP-VFP3: bl __aeabi_h2f 596; CHECK-HARDFP-VFP3: bl __aeabi_h2f 597; CHECK-HARDFP-VFP3: vmul.f32 598; CHECK-HARDFP-VFP3: bl __aeabi_f2h 599; CHECK-HARDFP-VFP3: vmov s0, r0 600 601; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 602; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 603; CHECK-HARDFP-FP16: vmul.f32 [[S0]], [[S0]], [[S2]] 604; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 605 606; CHECK-HARDFP-FULLFP16: vmul.f16 s0, s0, s1 607} 608 609; 24. VNEG 610define float @Neg(float %a.coerce) { 611entry: 612 %0 = bitcast float %a.coerce to i32 613 %tmp.0.extract.trunc = trunc i32 %0 to i16 614 %1 = bitcast i16 %tmp.0.extract.trunc to half 615 %2 = fsub half -0.000000e+00, %1 616 %3 = bitcast half %2 to i16 617 %tmp4.0.insert.ext = zext i16 %3 to i32 618 %4 = bitcast i32 %tmp4.0.insert.ext to float 619 ret float %4 620 621; CHECK-LABEL: Neg: 622; CHECK-HARDFP-FULLFP16: vneg.f16 s0, s0 623} 624 625; 25. VNMLA 626define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) { 627entry: 628 %0 = bitcast float %a.coerce to i32 629 %tmp.0.extract.trunc = trunc i32 %0 to i16 630 %1 = bitcast i16 %tmp.0.extract.trunc to half 631 %2 = bitcast float %b.coerce to i32 632 %tmp1.0.extract.trunc = trunc i32 %2 to i16 633 %3 = bitcast i16 %tmp1.0.extract.trunc to half 634 %4 = bitcast float %c.coerce to i32 635 %tmp2.0.extract.trunc = trunc i32 %4 to i16 636 %5 = bitcast i16 %tmp2.0.extract.trunc to half 637 %add = fmul half %1, %3 638 %add2 = fsub half -0.000000e+00, %add 639 %add3 = fsub half %add2, %5 640 %6 = bitcast half %add3 to i16 641 %tmp4.0.insert.ext = zext i16 %6 to i32 642 %7 = bitcast i32 %tmp4.0.insert.ext to float 643 ret float %7 644 645; CHECK-LABEL: VNMLA: 646; CHECK-HARDFP-FULLFP16: vnmla.f16 s2, s0, s1 647; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 648} 649 650; 26. VNMLS 651define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) { 652entry: 653 %0 = bitcast float %a.coerce to i32 654 %tmp.0.extract.trunc = trunc i32 %0 to i16 655 %1 = bitcast i16 %tmp.0.extract.trunc to half 656 %2 = bitcast float %b.coerce to i32 657 %tmp1.0.extract.trunc = trunc i32 %2 to i16 658 %3 = bitcast i16 %tmp1.0.extract.trunc to half 659 %4 = bitcast float %c.coerce to i32 660 %tmp2.0.extract.trunc = trunc i32 %4 to i16 661 %5 = bitcast i16 %tmp2.0.extract.trunc to half 662 %add = fmul half %1, %3 663 %add2 = fsub half %add, %5 664 %6 = bitcast half %add2 to i16 665 %tmp4.0.insert.ext = zext i16 %6 to i32 666 %7 = bitcast i32 %tmp4.0.insert.ext to float 667 ret float %7 668 669; CHECK-LABEL: VNMLS: 670; CHECK-HARDFP-FULLFP16: vnmls.f16 s2, s0, s1 671; CHECK-HARDFP-FULLFP16: vmov.f32 s0, s2 672} 673 674; 27. VNMUL 675define float @NMul(float %a.coerce, float %b.coerce) { 676entry: 677 %0 = bitcast float %a.coerce to i32 678 %tmp.0.extract.trunc = trunc i32 %0 to i16 679 %1 = bitcast i16 %tmp.0.extract.trunc to half 680 %2 = bitcast float %b.coerce to i32 681 %tmp1.0.extract.trunc = trunc i32 %2 to i16 682 %3 = bitcast i16 %tmp1.0.extract.trunc to half 683 %add = fmul half %1, %3 684 %add2 = fsub half -0.0, %add 685 %4 = bitcast half %add2 to i16 686 %tmp4.0.insert.ext = zext i16 %4 to i32 687 %5 = bitcast i32 %tmp4.0.insert.ext to float 688 ret float %5 689 690; CHECK-LABEL: NMul: 691; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1 692} 693 694; 35. VSELEQ 695define half @select_cc1(half* %a0) { 696 %1 = load half, half* %a0 697 %2 = fcmp nsz oeq half %1, 0xH0001 698 %3 = select i1 %2, half 0xHC000, half 0xH0002 699 ret half %3 700 701; CHECK-LABEL: select_cc1: 702 703; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 704; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 705; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}} 706 707; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 708; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 709; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}} 710 711; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 712; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 713; CHECK-SOFTFP-FP16-T32: it eq 714; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}} 715} 716 717; FIXME: more tests need to be added for VSELGE and VSELGT. 718; That is, more combinations of immediate operands that can or can't 719; be encoded as an FP16 immediate need to be added here. 720; 721; 36. VSELGE 722define half @select_cc_ge1(half* %a0) { 723 %1 = load half, half* %a0 724 %2 = fcmp nsz oge half %1, 0xH0001 725 %3 = select i1 %2, half 0xHC000, half 0xH0002 726 ret half %3 727 728; CHECK-LABEL: select_cc_ge1: 729 730; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 731; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 732; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 733 734; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 735; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 736; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} 737 738; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 739; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 740; CHECK-SOFTFP-FP16-T32-NEXT: it ge 741; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} 742} 743 744define half @select_cc_ge2(half* %a0) { 745 %1 = load half, half* %a0 746 %2 = fcmp nsz ole half %1, 0xH0001 747 %3 = select i1 %2, half 0xHC000, half 0xH0002 748 ret half %3 749 750; CHECK-LABEL: select_cc_ge2: 751 752; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 753; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 754; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 755 756; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 757; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 758; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}} 759 760; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 761; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 762; CHECK-SOFTFP-FP16-T32-NEXT: it ls 763; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}} 764} 765 766define half @select_cc_ge3(half* %a0) { 767 %1 = load half, half* %a0 768 %2 = fcmp nsz ugt half %1, 0xH0001 769 %3 = select i1 %2, half 0xHC000, half 0xH0002 770 ret half %3 771 772; CHECK-LABEL: select_cc_ge3: 773 774; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 775; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 776; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 777 778; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 779; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 780; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} 781 782; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 783; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 784; CHECK-SOFTFP-FP16-T32-NEXT: it hi 785; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} 786} 787 788define half @select_cc_ge4(half* %a0) { 789 %1 = load half, half* %a0 790 %2 = fcmp nsz ult half %1, 0xH0001 791 %3 = select i1 %2, half 0xHC000, half 0xH0002 792 ret half %3 793 794; CHECK-LABEL: select_cc_ge4: 795 796; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 797; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 798; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} 799 800; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 801; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 802; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}} 803 804; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 805; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 806; CHECK-SOFTFP-FP16-T32-NEXT: it lt 807; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}} 808} 809 810; 37. VSELGT 811define half @select_cc_gt1(half* %a0) { 812 %1 = load half, half* %a0 813 %2 = fcmp nsz ogt half %1, 0xH0001 814 %3 = select i1 %2, half 0xHC000, half 0xH0002 815 ret half %3 816 817; CHECK-LABEL: select_cc_gt1: 818 819; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 820; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 821; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 822 823; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 824; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 825; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} 826 827; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 828; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 829; CHECK-SOFTFP-FP16-T32-NEXT: it gt 830; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} 831} 832 833define half @select_cc_gt2(half* %a0) { 834 %1 = load half, half* %a0 835 %2 = fcmp nsz uge half %1, 0xH0001 836 %3 = select i1 %2, half 0xHC000, half 0xH0002 837 ret half %3 838 839; CHECK-LABEL: select_cc_gt2: 840 841; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 842; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 843; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 844 845; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 846; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 847; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} 848 849; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 850; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 851; CHECK-SOFTFP-FP16-T32-NEXT: it pl 852; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} 853} 854 855define half @select_cc_gt3(half* %a0) { 856 %1 = load half, half* %a0 857 %2 = fcmp nsz ule half %1, 0xH0001 858 %3 = select i1 %2, half 0xHC000, half 0xH0002 859 ret half %3 860 861; CHECK-LABEL: select_cc_gt3: 862 863; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 864; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 865; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 866 867; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 868; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 869; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}} 870 871; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 872; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 873; CHECK-SOFTFP-FP16-T32-NEXT: it le 874; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}} 875} 876 877define half @select_cc_gt4(half* %a0) { 878 %1 = load half, half* %a0 879 %2 = fcmp nsz olt half %1, 0xH0001 880 %3 = select i1 %2, half 0xHC000, half 0xH0002 881 ret half %3 882 883; CHECK-LABEL: select_cc_gt4: 884 885; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s6 886; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 887; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} 888 889; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 890; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr 891; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}} 892 893; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 894; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr 895; CHECK-SOFTFP-FP16-T32-NEXT: it mi 896; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}} 897} 898 899; 38. VSELVS 900define float @select_cc4(float %a.coerce) { 901entry: 902 %0 = bitcast float %a.coerce to i32 903 %tmp.0.extract.trunc = trunc i32 %0 to i16 904 %1 = bitcast i16 %tmp.0.extract.trunc to half 905 906 %2 = fcmp nsz ueq half %1, 0xH0001 907 %3 = select i1 %2, half 0xHC000, half 0xH0002 908 909 %4 = bitcast half %3 to i16 910 %tmp4.0.insert.ext = zext i16 %4 to i32 911 %5 = bitcast i32 %tmp4.0.insert.ext to float 912 ret float %5 913 914; CHECK-LABEL: select_cc4: 915 916; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} 917; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}} 918; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00 919; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] 920; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr 921; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]] 922; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]] 923 924; CHECK-SOFTFP-FP16-A32: vmov [[S6:s[0-9]]], r0 925; CHECK-SOFTFP-FP16-A32: vldr s0, .LCP{{.*}} 926; CHECK-SOFTFP-FP16-A32: vcvtb.f32.f16 [[S6]], [[S6]] 927; CHECK-SOFTFP-FP16-A32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 928; CHECK-SOFTFP-FP16-A32: vcmp.f32 [[S6]], s0 929; CHECK-SOFTFP-FP16-A32: vldr [[S4:s[0-9]]], .LCPI{{.*}} 930; CHECK-SOFTFP-FP16-A32: vmrs APSR_nzcv, fpscr 931; CHECK-SOFTFP-FP16-A32: vmoveq.f32 [[S4]], [[S2]] 932; CHECK-SOFTFP-FP16-A32-NEXT: vmovvs.f32 [[S4]], [[S2]] 933; CHECK-SOFTFP-FP16-A32-NEXT: vcvtb.f16.f32 s0, [[S4]] 934 935; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0 936; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}} 937; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]] 938; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}} 939; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0 940; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00 941; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr 942; CHECK-SOFTFP-FP16-T32: it eq 943; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]] 944; CHECK-SOFTFP-FP16-T32: it vs 945; CHECK-SOFTFP-FP16-T32-NEXT: vmovvs.f32 [[S4]], [[S2]] 946; CHECK-SOFTFP-FP16-T32-NEXT: vcvtb.f16.f32 s0, [[S4]] 947} 948 949; 40. VSUB 950define float @Sub(float %a.coerce, float %b.coerce) { 951entry: 952 %0 = bitcast float %a.coerce to i32 953 %tmp.0.extract.trunc = trunc i32 %0 to i16 954 %1 = bitcast i16 %tmp.0.extract.trunc to half 955 %2 = bitcast float %b.coerce to i32 956 %tmp1.0.extract.trunc = trunc i32 %2 to i16 957 %3 = bitcast i16 %tmp1.0.extract.trunc to half 958 %add = fsub half %1, %3 959 %4 = bitcast half %add to i16 960 %tmp4.0.insert.ext = zext i16 %4 to i32 961 %5 = bitcast i32 %tmp4.0.insert.ext to float 962 ret float %5 963 964; CHECK-LABEL: Sub: 965 966; CHECK-SOFT: bl __aeabi_h2f 967; CHECK-SOFT: bl __aeabi_h2f 968; CHECK-SOFT: bl __aeabi_fsub 969; CHECK-SOFT: bl __aeabi_f2h 970 971; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 972; CHECK-SOFTFP-VFP3: bl __aeabi_h2f 973; CHECK-SOFTFP-VFP3: vsub.f32 974; CHECK-SOFTFP-VFP3: bl __aeabi_f2h 975 976; CHECK-SOFTFP-FP16-DAG: vmov [[S0:s[0-9]]], r0 977; CHECK-SOFTFP-FP16-DAG: vmov [[S2:s[0-9]]], r1 978; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S0]], [[S0]] 979; CHECK-SOFTFP-FP16-DAG: vcvtb.f32.f16 [[S2]], [[S2]] 980; CHECK-SOFTFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 981; CHECK-SOFTFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 982; CHECK-SOFTFP-FP16: vmov r0, s0 983 984; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], r1 985; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S2:s[0-9]]], r0 986; CHECK-SOFTFP-FULLFP16: vsub.f16 [[S0]], [[S2]], [[S0]] 987; CHECK-SOFTFP-FULLFP16-NEXT: vmov.f16 r0, s0 988 989; CHECK-HARDFP-VFP3: vmov r{{.}}, s0 990; CHECK-HARDFP-VFP3: vmov{{.*}}, s1 991; CHECK-HARDFP-VFP3: bl __aeabi_h2f 992; CHECK-HARDFP-VFP3: bl __aeabi_h2f 993; CHECK-HARDFP-VFP3: vsub.f32 994; CHECK-HARDFP-VFP3: bl __aeabi_f2h 995; CHECK-HARDFP-VFP3: vmov s0, r0 996 997; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], s1 998; CHECK-HARDFP-FP16: vcvtb.f32.f16 [[S0:s[0-9]]], s0 999; CHECK-HARDFP-FP16: vsub.f32 [[S0]], [[S0]], [[S2]] 1000; CHECK-HARDFP-FP16: vcvtb.f16.f32 [[S0]], [[S0]] 1001 1002; CHECK-HARDFP-FULLFP16: vsub.f16 s0, s0, s1 1003} 1004 1005; Check for VSTRH with a FCONSTH, this checks that addressing mode 1006; AddrMode5FP16 is supported. 1007define i32 @ThumbAddrMode5FP16(i32 %A.coerce) { 1008entry: 1009 %S = alloca half, align 2 1010 %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 1011 %0 = bitcast i16 %tmp.0.extract.trunc to half 1012 %S.0.S.0..sroa_cast = bitcast half* %S to i8* 1013 store volatile half 0xH3C00, half* %S, align 2 1014 %S.0.S.0. = load volatile half, half* %S, align 2 1015 %add = fadd half %S.0.S.0., %0 1016 %1 = bitcast half %add to i16 1017 %tmp2.0.insert.ext = zext i16 %1 to i32 1018 ret i32 %tmp2.0.insert.ext 1019 1020; CHECK-LABEL: ThumbAddrMode5FP16 1021 1022; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0:s[0-9]]], #1.000000e+00 1023; CHECK-SOFTFP-FULLFP16: vstr.16 [[S0]], [sp, #{{.}}] 1024; CHECK-SOFTFP-FULLFP16: vmov.f16 [[S0_2:s[0-9]]], r0 1025; CHECK-SOFTFP-FULLFP16: vldr.16 [[S2:s[0-9]]], [sp, #{{.}}] 1026; CHECK-SOFTFP-FULLFP16: vadd.f16 s{{.}}, [[S2]], [[S0_2]] 1027} 1028 1029; Test function calls to check store/load reg to/from stack 1030define i32 @fn1() { 1031entry: 1032 %coerce = alloca half, align 2 1033 %tmp2 = alloca i32, align 4 1034 store half 0xH7C00, half* %coerce, align 2 1035 %0 = load i32, i32* %tmp2, align 4 1036 %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0) 1037 store half 0xH7C00, half* %coerce, align 2 1038 %1 = load i32, i32* %tmp2, align 4 1039 %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1) 1040 ret i32 %call3 1041 1042; CHECK-SPILL-RELOAD-LABEL: fn1: 1043; CHECK-SPILL-RELOAD: vstr.16 s0, [sp, #{{.}}] @ 2-byte Spill 1044; CHECK-SPILL-RELOAD: bl fn2 1045; CHECK-SPILL-RELOAD-NEXT: vldr.16 s0, [sp, #{{.}}] @ 2-byte Reload 1046} 1047 1048declare dso_local i32 @fn2(...) 1049declare dso_local i32 @fn3(...) 1050