1; ## Full FP16 support enabled by default. 2; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 3; RUN: -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \ 4; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s 5; ## FP16 support explicitly disabled. 6; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 7; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \ 8; RUN: -verify-machineinstrs \ 9; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s 10; ## FP16 is not supported by hardware. 11; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \ 12; RUN: -disable-post-ra -disable-fp-elim -verify-machineinstrs \ 13; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s 14 15target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 16 17; CHECK-LABEL: test_ret_const( 18; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00; 19; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 20; CHECK-NEXT: ret; 21define half @test_ret_const() #0 { 22 ret half 1.0 23} 24 25; CHECK-LABEL: test_fadd( 26; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0]; 27; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1]; 28; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; 29; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 30; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 31; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; 32; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 33; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 34; CHECK-NEXT: ret; 35define half @test_fadd(half %a, half %b) #0 { 36 %r = fadd half %a, %b 37 ret half %r 38} 39 40; CHECK-LABEL: test_fadd_v1f16( 41; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0]; 42; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1]; 43; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; 44; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 45; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 46; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; 47; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 48; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 49; CHECK-NEXT: ret; 50define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 { 51 %r = fadd <1 x half> %a, %b 52 ret <1 x half> %r 53} 54 55; Check that we can lower fadd with immediate arguments. 56; CHECK-LABEL: test_fadd_imm_0( 57; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0]; 58; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; 59; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]]; 60; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 61; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; 62; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 63; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 64; CHECK-NEXT: ret; 65define half @test_fadd_imm_0(half %b) #0 { 66 %r = fadd half 1.0, %b 67 ret half %r 68} 69 70; CHECK-LABEL: test_fadd_imm_1( 71; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0]; 72; CHECK-F16-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; 73; CHECK-F16-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]]; 74; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 75; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; 76; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 77; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 78; CHECK-NEXT: ret; 79define half @test_fadd_imm_1(half %a) #0 { 80 %r = fadd half %a, 1.0 81 ret half %r 82} 83 84; CHECK-LABEL: test_fsub( 85; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0]; 86; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1]; 87; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; 88; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 89; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 90; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; 91; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 92; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 93; CHECK-NEXT: ret; 94define half @test_fsub(half %a, half %b) #0 { 95 %r = fsub half %a, %b 96 ret half %r 97} 98 99; CHECK-LABEL: test_fneg( 100; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0]; 101; CHECK-F16-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000 102; CHECK-F16-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]]; 103; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 104; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; 105; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]]; 106; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 107; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 108; CHECK-NEXT: ret; 109define half @test_fneg(half %a) #0 { 110 %r = fsub half 0.0, %a 111 ret half %r 112} 113 114; CHECK-LABEL: test_fmul( 115; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0]; 116; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1]; 117; CHECK-F16-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; 118; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 119; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 120; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; 121; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 122; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 123; CHECK-NEXT: ret; 124define half @test_fmul(half %a, half %b) #0 { 125 %r = fmul half %a, %b 126 ret half %r 127} 128 129; CHECK-LABEL: test_fdiv( 130; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0]; 131; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1]; 132; CHECK-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]]; 133; CHECK-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]]; 134; CHECK-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; 135; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]]; 136; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 137; CHECK-NEXT: ret; 138define half @test_fdiv(half %a, half %b) #0 { 139 %r = fdiv half %a, %b 140 ret half %r 141} 142 143; CHECK-LABEL: test_frem( 144; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0]; 145; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1]; 146; CHECK-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]]; 147; CHECK-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]]; 148; CHECK-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]]; 149; CHECK-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]]; 150; CHECK-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]]; 151; CHECK-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]]; 152; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; 153; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 154; CHECK-NEXT: ret; 155define half @test_frem(half %a, half %b) #0 { 156 %r = frem half %a, %b 157 ret half %r 158} 159 160; CHECK-LABEL: test_store( 161; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0]; 162; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1]; 163; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]]; 164; CHECK-NEXT: ret; 165define void @test_store(half %a, half* %b) #0 { 166 store half %a, half* %b 167 ret void 168} 169 170; CHECK-LABEL: test_load( 171; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0]; 172; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]]; 173; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 174; CHECK-NEXT: ret; 175define half @test_load(half* %a) #0 { 176 %r = load half, half* %a 177 ret half %r 178} 179 180; CHECK-LABEL: .visible .func test_halfp0a1( 181; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0]; 182; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1]; 183; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] 184; CHECK-DAG: st.u8 [%[[TO]]], [[B0]] 185; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] 186; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]] 187; CHECK: ret 188define void @test_halfp0a1(half * noalias readonly %from, half * %to) { 189 %1 = load half, half * %from , align 1 190 store half %1, half * %to , align 1 191 ret void 192} 193 194declare half @test_callee(half %a, half %b) #0 195 196; CHECK-LABEL: test_call( 197; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0]; 198; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1]; 199; CHECK: { 200; CHECK-DAG: .param .b32 param0; 201; CHECK-DAG: .param .b32 param1; 202; CHECK-DAG: st.param.b16 [param0+0], [[A]]; 203; CHECK-DAG: st.param.b16 [param1+0], [[B]]; 204; CHECK-DAG: .param .b32 retval0; 205; CHECK: call.uni (retval0), 206; CHECK-NEXT: test_callee, 207; CHECK: ); 208; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 209; CHECK-NEXT: } 210; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 211; CHECK-NEXT: ret; 212define half @test_call(half %a, half %b) #0 { 213 %r = call half @test_callee(half %a, half %b) 214 ret half %r 215} 216 217; CHECK-LABEL: test_call_flipped( 218; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0]; 219; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1]; 220; CHECK: { 221; CHECK-DAG: .param .b32 param0; 222; CHECK-DAG: .param .b32 param1; 223; CHECK-DAG: st.param.b16 [param0+0], [[B]]; 224; CHECK-DAG: st.param.b16 [param1+0], [[A]]; 225; CHECK-DAG: .param .b32 retval0; 226; CHECK: call.uni (retval0), 227; CHECK-NEXT: test_callee, 228; CHECK: ); 229; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 230; CHECK-NEXT: } 231; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 232; CHECK-NEXT: ret; 233define half @test_call_flipped(half %a, half %b) #0 { 234 %r = call half @test_callee(half %b, half %a) 235 ret half %r 236} 237 238; CHECK-LABEL: test_tailcall_flipped( 239; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0]; 240; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1]; 241; CHECK: { 242; CHECK-DAG: .param .b32 param0; 243; CHECK-DAG: .param .b32 param1; 244; CHECK-DAG: st.param.b16 [param0+0], [[B]]; 245; CHECK-DAG: st.param.b16 [param1+0], [[A]]; 246; CHECK-DAG: .param .b32 retval0; 247; CHECK: call.uni (retval0), 248; CHECK-NEXT: test_callee, 249; CHECK: ); 250; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 251; CHECK-NEXT: } 252; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 253; CHECK-NEXT: ret; 254define half @test_tailcall_flipped(half %a, half %b) #0 { 255 %r = tail call half @test_callee(half %b, half %a) 256 ret half %r 257} 258 259; CHECK-LABEL: test_select( 260; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0]; 261; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1]; 262; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; 263; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; 264; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 265; CHECK-NEXT: ret; 266define half @test_select(half %a, half %b, i1 zeroext %c) #0 { 267 %r = select i1 %c, half %a, half %b 268 ret half %r 269} 270 271; CHECK-LABEL: test_select_cc( 272; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0]; 273; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1]; 274; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2]; 275; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3]; 276; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] 277; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; 278; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; 279; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] 280; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; 281; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 282; CHECK-NEXT: ret; 283define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { 284 %cc = fcmp une half %c, %d 285 %r = select i1 %cc, half %a, half %b 286 ret half %r 287} 288 289; CHECK-LABEL: test_select_cc_f32_f16( 290; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0]; 291; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1]; 292; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2]; 293; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3]; 294; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] 295; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; 296; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; 297; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] 298; CHECK-NEXT: selp.f32 [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]]; 299; CHECK-NEXT: st.param.f32 [func_retval0+0], [[R]]; 300; CHECK-NEXT: ret; 301define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { 302 %cc = fcmp une half %c, %d 303 %r = select i1 %cc, float %a, float %b 304 ret float %r 305} 306 307; CHECK-LABEL: test_select_cc_f16_f32( 308; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0]; 309; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2]; 310; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3]; 311; CHECK-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] 312; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1]; 313; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; 314; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; 315; CHECK-NEXT: ret; 316define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { 317 %cc = fcmp une float %c, %d 318 %r = select i1 %cc, half %a, half %b 319 ret half %r 320} 321 322; CHECK-LABEL: test_fcmp_une( 323; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0]; 324; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1]; 325; CHECK-F16: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 326; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 327; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 328; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 329; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 330; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 331; CHECK-NEXT: ret; 332define i1 @test_fcmp_une(half %a, half %b) #0 { 333 %r = fcmp une half %a, %b 334 ret i1 %r 335} 336 337; CHECK-LABEL: test_fcmp_ueq( 338; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0]; 339; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1]; 340; CHECK-F16: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 341; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 342; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 343; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 344; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 345; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 346; CHECK-NEXT: ret; 347define i1 @test_fcmp_ueq(half %a, half %b) #0 { 348 %r = fcmp ueq half %a, %b 349 ret i1 %r 350} 351 352; CHECK-LABEL: test_fcmp_ugt( 353; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0]; 354; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1]; 355; CHECK-F16: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 356; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 357; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 358; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 359; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 360; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 361; CHECK-NEXT: ret; 362define i1 @test_fcmp_ugt(half %a, half %b) #0 { 363 %r = fcmp ugt half %a, %b 364 ret i1 %r 365} 366 367; CHECK-LABEL: test_fcmp_uge( 368; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0]; 369; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1]; 370; CHECK-F16: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 371; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 372; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 373; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 374; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 375; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 376; CHECK-NEXT: ret; 377define i1 @test_fcmp_uge(half %a, half %b) #0 { 378 %r = fcmp uge half %a, %b 379 ret i1 %r 380} 381 382; CHECK-LABEL: test_fcmp_ult( 383; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0]; 384; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1]; 385; CHECK-F16: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 386; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 387; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 388; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 389; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 390; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 391; CHECK-NEXT: ret; 392define i1 @test_fcmp_ult(half %a, half %b) #0 { 393 %r = fcmp ult half %a, %b 394 ret i1 %r 395} 396 397; CHECK-LABEL: test_fcmp_ule( 398; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0]; 399; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1]; 400; CHECK-F16: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 401; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 402; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 403; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 404; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 405; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 406; CHECK-NEXT: ret; 407define i1 @test_fcmp_ule(half %a, half %b) #0 { 408 %r = fcmp ule half %a, %b 409 ret i1 %r 410} 411 412 413; CHECK-LABEL: test_fcmp_uno( 414; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0]; 415; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1]; 416; CHECK-F16: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 417; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 418; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 419; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 420; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 421; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 422; CHECK-NEXT: ret; 423define i1 @test_fcmp_uno(half %a, half %b) #0 { 424 %r = fcmp uno half %a, %b 425 ret i1 %r 426} 427 428; CHECK-LABEL: test_fcmp_one( 429; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0]; 430; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1]; 431; CHECK-F16: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 432; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 433; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 434; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 435; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 436; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 437; CHECK-NEXT: ret; 438define i1 @test_fcmp_one(half %a, half %b) #0 { 439 %r = fcmp one half %a, %b 440 ret i1 %r 441} 442 443; CHECK-LABEL: test_fcmp_oeq( 444; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0]; 445; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1]; 446; CHECK-F16: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 447; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 448; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 449; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 450; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 451; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 452; CHECK-NEXT: ret; 453define i1 @test_fcmp_oeq(half %a, half %b) #0 { 454 %r = fcmp oeq half %a, %b 455 ret i1 %r 456} 457 458; CHECK-LABEL: test_fcmp_ogt( 459; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0]; 460; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1]; 461; CHECK-F16: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 462; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 463; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 464; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 465; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 466; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 467; CHECK-NEXT: ret; 468define i1 @test_fcmp_ogt(half %a, half %b) #0 { 469 %r = fcmp ogt half %a, %b 470 ret i1 %r 471} 472 473; CHECK-LABEL: test_fcmp_oge( 474; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0]; 475; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1]; 476; CHECK-F16: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 477; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 478; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 479; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 480; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 481; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 482; CHECK-NEXT: ret; 483define i1 @test_fcmp_oge(half %a, half %b) #0 { 484 %r = fcmp oge half %a, %b 485 ret i1 %r 486} 487 488; XCHECK-LABEL: test_fcmp_olt( 489; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0]; 490; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1]; 491; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 492; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 493; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 494; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 495; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 496; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 497; CHECK-NEXT: ret; 498define i1 @test_fcmp_olt(half %a, half %b) #0 { 499 %r = fcmp olt half %a, %b 500 ret i1 %r 501} 502 503; XCHECK-LABEL: test_fcmp_ole( 504; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0]; 505; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1]; 506; CHECK-F16: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 507; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 508; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 509; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 510; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 511; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 512; CHECK-NEXT: ret; 513define i1 @test_fcmp_ole(half %a, half %b) #0 { 514 %r = fcmp ole half %a, %b 515 ret i1 %r 516} 517 518; CHECK-LABEL: test_fcmp_ord( 519; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0]; 520; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1]; 521; CHECK-F16: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 522; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 523; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 524; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 525; CHECK-NEXT: selp.u32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; 526; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 527; CHECK-NEXT: ret; 528define i1 @test_fcmp_ord(half %a, half %b) #0 { 529 %r = fcmp ord half %a, %b 530 ret i1 %r 531} 532 533; CHECK-LABEL: test_br_cc( 534; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0]; 535; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1]; 536; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2]; 537; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3]; 538; CHECK-F16: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] 539; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 540; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 541; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] 542; CHECK-NEXT: @[[PRED]] bra [[LABEL:LBB.*]]; 543; CHECK: st.u32 [%[[C]]], 544; CHECK: [[LABEL]]: 545; CHECK: st.u32 [%[[D]]], 546; CHECK: ret; 547define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 { 548 %c = fcmp uge half %a, %b 549 br i1 %c, label %then, label %else 550then: 551 store i32 0, i32* %p1 552 ret void 553else: 554 store i32 0, i32* %p2 555 ret void 556} 557 558; CHECK-LABEL: test_phi( 559; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0]; 560; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]]; 561; CHECK: [[LOOP:LBB[0-9_]+]]: 562; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]]; 563; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]]; 564; CHECK: { 565; CHECK: st.param.b64 [param0+0], %[[P1]]; 566; CHECK: call.uni (retval0), 567; CHECK-NEXT: test_dummy 568; CHECK: } 569; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1; 570; CHECK: @[[PRED]] bra [[LOOP]]; 571; CHECK: st.param.b16 [func_retval0+0], [[R]]; 572; CHECK: ret; 573define half @test_phi(half* %p1) #0 { 574entry: 575 %a = load half, half* %p1 576 br label %loop 577loop: 578 %r = phi half [%a, %entry], [%b, %loop] 579 %b = load half, half* %p1 580 %c = call i1 @test_dummy(half* %p1) 581 br i1 %c, label %loop, label %return 582return: 583 ret half %r 584} 585declare i1 @test_dummy(half* %p1) #0 586 587; CHECK-LABEL: test_fptosi_i32( 588; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0]; 589; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]]; 590; CHECK: st.param.b32 [func_retval0+0], [[R]]; 591; CHECK: ret; 592define i32 @test_fptosi_i32(half %a) #0 { 593 %r = fptosi half %a to i32 594 ret i32 %r 595} 596 597; CHECK-LABEL: test_fptosi_i64( 598; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0]; 599; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]]; 600; CHECK: st.param.b64 [func_retval0+0], [[R]]; 601; CHECK: ret; 602define i64 @test_fptosi_i64(half %a) #0 { 603 %r = fptosi half %a to i64 604 ret i64 %r 605} 606 607; CHECK-LABEL: test_fptoui_i32( 608; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0]; 609; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]]; 610; CHECK: st.param.b32 [func_retval0+0], [[R]]; 611; CHECK: ret; 612define i32 @test_fptoui_i32(half %a) #0 { 613 %r = fptoui half %a to i32 614 ret i32 %r 615} 616 617; CHECK-LABEL: test_fptoui_i64( 618; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0]; 619; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]]; 620; CHECK: st.param.b64 [func_retval0+0], [[R]]; 621; CHECK: ret; 622define i64 @test_fptoui_i64(half %a) #0 { 623 %r = fptoui half %a to i64 624 ret i64 %r 625} 626 627; CHECK-LABEL: test_uitofp_i32( 628; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0]; 629; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]]; 630; CHECK: st.param.b16 [func_retval0+0], [[R]]; 631; CHECK: ret; 632define half @test_uitofp_i32(i32 %a) #0 { 633 %r = uitofp i32 %a to half 634 ret half %r 635} 636 637; CHECK-LABEL: test_uitofp_i64( 638; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0]; 639; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]]; 640; CHECK: st.param.b16 [func_retval0+0], [[R]]; 641; CHECK: ret; 642define half @test_uitofp_i64(i64 %a) #0 { 643 %r = uitofp i64 %a to half 644 ret half %r 645} 646 647; CHECK-LABEL: test_sitofp_i32( 648; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0]; 649; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]]; 650; CHECK: st.param.b16 [func_retval0+0], [[R]]; 651; CHECK: ret; 652define half @test_sitofp_i32(i32 %a) #0 { 653 %r = sitofp i32 %a to half 654 ret half %r 655} 656 657; CHECK-LABEL: test_sitofp_i64( 658; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0]; 659; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]]; 660; CHECK: st.param.b16 [func_retval0+0], [[R]]; 661; CHECK: ret; 662define half @test_sitofp_i64(i64 %a) #0 { 663 %r = sitofp i64 %a to half 664 ret half %r 665} 666 667; CHECK-LABEL: test_uitofp_i32_fadd( 668; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0]; 669; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]]; 670; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1]; 671; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]]; 672; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 673; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] 674; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; 675; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 676; CHECK: st.param.b16 [func_retval0+0], [[R]]; 677; CHECK: ret; 678define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { 679 %c = uitofp i32 %a to half 680 %r = fadd half %b, %c 681 ret half %r 682} 683 684; CHECK-LABEL: test_sitofp_i32_fadd( 685; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0]; 686; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]]; 687; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1]; 688; CHECK-F16: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]]; 689; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 690; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] 691; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; 692; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 693; CHECK: st.param.b16 [func_retval0+0], [[R]]; 694; CHECK: ret; 695define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { 696 %c = sitofp i32 %a to half 697 %r = fadd half %b, %c 698 ret half %r 699} 700 701; CHECK-LABEL: test_fptrunc_float( 702; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0]; 703; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]]; 704; CHECK: st.param.b16 [func_retval0+0], [[R]]; 705; CHECK: ret; 706define half @test_fptrunc_float(float %a) #0 { 707 %r = fptrunc float %a to half 708 ret half %r 709} 710 711; CHECK-LABEL: test_fptrunc_double( 712; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0]; 713; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]]; 714; CHECK: st.param.b16 [func_retval0+0], [[R]]; 715; CHECK: ret; 716define half @test_fptrunc_double(double %a) #0 { 717 %r = fptrunc double %a to half 718 ret half %r 719} 720 721; CHECK-LABEL: test_fpext_float( 722; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0]; 723; CHECK: cvt.f32.f16 [[R:%f[0-9]+]], [[A]]; 724; CHECK: st.param.f32 [func_retval0+0], [[R]]; 725; CHECK: ret; 726define float @test_fpext_float(half %a) #0 { 727 %r = fpext half %a to float 728 ret float %r 729} 730 731; CHECK-LABEL: test_fpext_double( 732; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0]; 733; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]]; 734; CHECK: st.param.f64 [func_retval0+0], [[R]]; 735; CHECK: ret; 736define double @test_fpext_double(half %a) #0 { 737 %r = fpext half %a to double 738 ret double %r 739} 740 741 742; CHECK-LABEL: test_bitcast_halftoi16( 743; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0]; 744; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]] 745; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]] 746; CHECK: st.param.b32 [func_retval0+0], [[R]]; 747; CHECK: ret; 748define i16 @test_bitcast_halftoi16(half %a) #0 { 749 %r = bitcast half %a to i16 750 ret i16 %r 751} 752 753; CHECK-LABEL: test_bitcast_i16tohalf( 754; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0]; 755; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]] 756; CHECK: st.param.b16 [func_retval0+0], [[AH]]; 757; CHECK: ret; 758define half @test_bitcast_i16tohalf(i16 %a) #0 { 759 %r = bitcast i16 %a to half 760 ret half %r 761} 762 763 764declare half @llvm.sqrt.f16(half %a) #0 765declare half @llvm.powi.f16(half %a, i32 %b) #0 766declare half @llvm.sin.f16(half %a) #0 767declare half @llvm.cos.f16(half %a) #0 768declare half @llvm.pow.f16(half %a, half %b) #0 769declare half @llvm.exp.f16(half %a) #0 770declare half @llvm.exp2.f16(half %a) #0 771declare half @llvm.log.f16(half %a) #0 772declare half @llvm.log10.f16(half %a) #0 773declare half @llvm.log2.f16(half %a) #0 774declare half @llvm.fma.f16(half %a, half %b, half %c) #0 775declare half @llvm.fabs.f16(half %a) #0 776declare half @llvm.minnum.f16(half %a, half %b) #0 777declare half @llvm.maxnum.f16(half %a, half %b) #0 778declare half @llvm.copysign.f16(half %a, half %b) #0 779declare half @llvm.floor.f16(half %a) #0 780declare half @llvm.ceil.f16(half %a) #0 781declare half @llvm.trunc.f16(half %a) #0 782declare half @llvm.rint.f16(half %a) #0 783declare half @llvm.nearbyint.f16(half %a) #0 784declare half @llvm.round.f16(half %a) #0 785declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 786 787; CHECK-LABEL: test_sqrt( 788; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0]; 789; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 790; CHECK: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]]; 791; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; 792; CHECK: st.param.b16 [func_retval0+0], [[R]]; 793; CHECK: ret; 794define half @test_sqrt(half %a) #0 { 795 %r = call half @llvm.sqrt.f16(half %a) 796 ret half %r 797} 798 799;;; Can't do this yet: requires libcall. 800; XCHECK-LABEL: test_powi( 801;define half @test_powi(half %a, i32 %b) #0 { 802; %r = call half @llvm.powi.f16(half %a, i32 %b) 803; ret half %r 804;} 805 806; CHECK-LABEL: test_sin( 807; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0]; 808; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 809; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]]; 810; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; 811; CHECK: st.param.b16 [func_retval0+0], [[R]]; 812; CHECK: ret; 813define half @test_sin(half %a) #0 #1 { 814 %r = call half @llvm.sin.f16(half %a) 815 ret half %r 816} 817 818; CHECK-LABEL: test_cos( 819; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0]; 820; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 821; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]]; 822; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; 823; CHECK: st.param.b16 [func_retval0+0], [[R]]; 824; CHECK: ret; 825define half @test_cos(half %a) #0 #1 { 826 %r = call half @llvm.cos.f16(half %a) 827 ret half %r 828} 829 830;;; Can't do this yet: requires libcall. 831; XCHECK-LABEL: test_pow( 832;define half @test_pow(half %a, half %b) #0 { 833; %r = call half @llvm.pow.f16(half %a, half %b) 834; ret half %r 835;} 836 837;;; Can't do this yet: requires libcall. 838; XCHECK-LABEL: test_exp( 839;define half @test_exp(half %a) #0 { 840; %r = call half @llvm.exp.f16(half %a) 841; ret half %r 842;} 843 844;;; Can't do this yet: requires libcall. 845; XCHECK-LABEL: test_exp2( 846;define half @test_exp2(half %a) #0 { 847; %r = call half @llvm.exp2.f16(half %a) 848; ret half %r 849;} 850 851;;; Can't do this yet: requires libcall. 852; XCHECK-LABEL: test_log( 853;define half @test_log(half %a) #0 { 854; %r = call half @llvm.log.f16(half %a) 855; ret half %r 856;} 857 858;;; Can't do this yet: requires libcall. 859; XCHECK-LABEL: test_log10( 860;define half @test_log10(half %a) #0 { 861; %r = call half @llvm.log10.f16(half %a) 862; ret half %r 863;} 864 865;;; Can't do this yet: requires libcall. 866; XCHECK-LABEL: test_log2( 867;define half @test_log2(half %a) #0 { 868; %r = call half @llvm.log2.f16(half %a) 869; ret half %r 870;} 871 872; CHECK-LABEL: test_fma( 873; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0]; 874; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1]; 875; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2]; 876; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; 877; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 878; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 879; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] 880; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; 881; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 882; CHECK: st.param.b16 [func_retval0+0], [[R]]; 883; CHECK: ret 884define half @test_fma(half %a, half %b, half %c) #0 { 885 %r = call half @llvm.fma.f16(half %a, half %b, half %c) 886 ret half %r 887} 888 889; CHECK-LABEL: test_fabs( 890; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0]; 891; CHECK: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 892; CHECK: abs.f32 [[RF:%f[0-9]+]], [[AF]]; 893; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; 894; CHECK: st.param.b16 [func_retval0+0], [[R]]; 895; CHECK: ret; 896define half @test_fabs(half %a) #0 { 897 %r = call half @llvm.fabs.f16(half %a) 898 ret half %r 899} 900 901; CHECK-LABEL: test_minnum( 902; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0]; 903; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1]; 904; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 905; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 906; CHECK: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; 907; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; 908; CHECK: st.param.b16 [func_retval0+0], [[R]]; 909; CHECK: ret; 910define half @test_minnum(half %a, half %b) #0 { 911 %r = call half @llvm.minnum.f16(half %a, half %b) 912 ret half %r 913} 914 915; CHECK-LABEL: test_maxnum( 916; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0]; 917; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1]; 918; CHECK-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; 919; CHECK-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; 920; CHECK: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; 921; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; 922; CHECK: st.param.b16 [func_retval0+0], [[R]]; 923; CHECK: ret; 924define half @test_maxnum(half %a, half %b) #0 { 925 %r = call half @llvm.maxnum.f16(half %a, half %b) 926 ret half %r 927} 928 929; CHECK-LABEL: test_copysign( 930; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0]; 931; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1]; 932; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]]; 933; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]]; 934; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767; 935; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768; 936; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; 937; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; 938; CHECK: st.param.b16 [func_retval0+0], [[R]]; 939; CHECK: ret; 940define half @test_copysign(half %a, half %b) #0 { 941 %r = call half @llvm.copysign.f16(half %a, half %b) 942 ret half %r 943} 944 945; CHECK-LABEL: test_copysign_f32( 946; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0]; 947; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1]; 948; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]]; 949; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]]; 950; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767; 951; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648; 952; CHECK-DAG: shr.u32 [[BX1:%r[0-9]+]], [[BX0]], 16; 953; CHECK-DAG: cvt.u16.u32 [[BX2:%rs[0-9]+]], [[BX1]]; 954; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; 955; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; 956; CHECK: st.param.b16 [func_retval0+0], [[R]]; 957; CHECK: ret; 958define half @test_copysign_f32(half %a, float %b) #0 { 959 %tb = fptrunc float %b to half 960 %r = call half @llvm.copysign.f16(half %a, half %tb) 961 ret half %r 962} 963 964; CHECK-LABEL: test_copysign_f64( 965; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0]; 966; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1]; 967; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]]; 968; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]]; 969; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767; 970; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808; 971; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48; 972; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]]; 973; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; 974; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; 975; CHECK: st.param.b16 [func_retval0+0], [[R]]; 976; CHECK: ret; 977define half @test_copysign_f64(half %a, double %b) #0 { 978 %tb = fptrunc double %b to half 979 %r = call half @llvm.copysign.f16(half %a, half %tb) 980 ret half %r 981} 982 983; CHECK-LABEL: test_copysign_extended( 984; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0]; 985; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1]; 986; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]]; 987; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]]; 988; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767; 989; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768; 990; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; 991; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; 992; CHECK: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]]; 993; CHECK: st.param.f32 [func_retval0+0], [[XR]]; 994; CHECK: ret; 995define float @test_copysign_extended(half %a, half %b) #0 { 996 %r = call half @llvm.copysign.f16(half %a, half %b) 997 %xr = fpext half %r to float 998 ret float %xr 999} 1000 1001; CHECK-LABEL: test_floor( 1002; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0]; 1003; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]]; 1004; CHECK: st.param.b16 [func_retval0+0], [[R]]; 1005; CHECK: ret; 1006define half @test_floor(half %a) #0 { 1007 %r = call half @llvm.floor.f16(half %a) 1008 ret half %r 1009} 1010 1011; CHECK-LABEL: test_ceil( 1012; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0]; 1013; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]]; 1014; CHECK: st.param.b16 [func_retval0+0], [[R]]; 1015; CHECK: ret; 1016define half @test_ceil(half %a) #0 { 1017 %r = call half @llvm.ceil.f16(half %a) 1018 ret half %r 1019} 1020 1021; CHECK-LABEL: test_trunc( 1022; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0]; 1023; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]]; 1024; CHECK: st.param.b16 [func_retval0+0], [[R]]; 1025; CHECK: ret; 1026define half @test_trunc(half %a) #0 { 1027 %r = call half @llvm.trunc.f16(half %a) 1028 ret half %r 1029} 1030 1031; CHECK-LABEL: test_rint( 1032; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0]; 1033; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; 1034; CHECK: st.param.b16 [func_retval0+0], [[R]]; 1035; CHECK: ret; 1036define half @test_rint(half %a) #0 { 1037 %r = call half @llvm.rint.f16(half %a) 1038 ret half %r 1039} 1040 1041; CHECK-LABEL: test_nearbyint( 1042; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0]; 1043; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; 1044; CHECK: st.param.b16 [func_retval0+0], [[R]]; 1045; CHECK: ret; 1046define half @test_nearbyint(half %a) #0 { 1047 %r = call half @llvm.nearbyint.f16(half %a) 1048 ret half %r 1049} 1050 1051; CHECK-LABEL: test_round( 1052; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_round_param_0]; 1053; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; 1054; CHECK: st.param.b16 [func_retval0+0], [[R]]; 1055; CHECK: ret; 1056define half @test_round(half %a) #0 { 1057 %r = call half @llvm.round.f16(half %a) 1058 ret half %r 1059} 1060 1061; CHECK-LABEL: test_fmuladd( 1062; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0]; 1063; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1]; 1064; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2]; 1065; CHECK-F16: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; 1066; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] 1067; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] 1068; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] 1069; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; 1070; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] 1071; CHECK: st.param.b16 [func_retval0+0], [[R]]; 1072; CHECK: ret; 1073define half @test_fmuladd(half %a, half %b, half %c) #0 { 1074 %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c) 1075 ret half %r 1076} 1077 1078attributes #0 = { nounwind } 1079attributes #1 = { "unsafe-fp-math" = "true" } 1080