1; ## Full FP16 support enabled by default. 2; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 3; RUN: -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \ 4; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s 5; ## FP16 support explicitly disabled. 6; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 7; RUN: -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \ 8; RUN: -verify-machineinstrs \ 9; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s 10; ## FP16 is not supported by hardware. 11; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \ 12; RUN: -disable-post-ra -disable-fp-elim -verify-machineinstrs \ 13; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s 14 15target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 16 17; CHECK-LABEL: test_ret_const( 18; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184; 19; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]]; 20; CHECK: st.param.b32 [func_retval0+0], [[R]]; 21; CHECK-NEXT: ret; 22define <2 x half> @test_ret_const() #0 { 23 ret <2 x half> <half 1.0, half 2.0> 24} 25 26; CHECK-LABEL: test_extract_0( 27; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0]; 28; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]]; 29; CHECK: st.param.b16 [func_retval0+0], [[R]]; 30; CHECK: ret; 31define half @test_extract_0(<2 x half> %a) #0 { 32 %e = extractelement <2 x half> %a, i32 0 33 ret half %e 34} 35 36; CHECK-LABEL: test_extract_1( 37; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0]; 38; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]]; 39; CHECK: st.param.b16 [func_retval0+0], [[R]]; 40; CHECK: ret; 41define half @test_extract_1(<2 x half> %a) #0 { 42 %e = extractelement <2 x half> %a, i32 1 43 ret half %e 44} 45 46; CHECK-LABEL: test_extract_i( 47; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0]; 48; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1]; 49; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0; 50; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]]; 51; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]]; 52; CHECK: st.param.b16 [func_retval0+0], [[R]]; 53; CHECK: ret; 54define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { 55 %e = extractelement <2 x half> %a, i64 %idx 56 ret half %e 57} 58 59; CHECK-LABEL: test_fadd( 60; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0]; 61; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1]; 62; 63; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 64; 65; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 66; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 67; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 68; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 69; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 70; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 71; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 72; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 73; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 74; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 75; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 76; 77; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 78; CHECK-NEXT: ret; 79define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 { 80 %r = fadd <2 x half> %a, %b 81 ret <2 x half> %r 82} 83 84; Check that we can lower fadd with immediate arguments. 85; CHECK-LABEL: test_fadd_imm_0( 86; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0]; 87; 88; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; 89; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; 90; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]]; 91; 92; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 93; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 94; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 95; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; 96; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; 97; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 98; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 99; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 100; 101; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 102; CHECK-NEXT: ret; 103define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 { 104 %r = fadd <2 x half> <half 1.0, half 2.0>, %a 105 ret <2 x half> %r 106} 107 108; CHECK-LABEL: test_fadd_imm_1( 109; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0]; 110; 111; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; 112; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; 113; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]]; 114; 115; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 116; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 117; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 118; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; 119; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; 120; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 121; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 122; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 123; 124; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 125; CHECK-NEXT: ret; 126define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 { 127 %r = fadd <2 x half> %a, <half 1.0, half 2.0> 128 ret <2 x half> %r 129} 130 131; CHECK-LABEL: test_fsub( 132; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0]; 133; 134; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1]; 135; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 136; 137; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 138; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 139; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 140; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 141; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 142; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 143; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 144; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 145; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 146; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 147; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 148; 149; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 150; CHECK-NEXT: ret; 151define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 { 152 %r = fsub <2 x half> %a, %b 153 ret <2 x half> %r 154} 155 156; CHECK-LABEL: test_fneg( 157; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0]; 158; 159; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0; 160; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]]; 161; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]]; 162; 163; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 164; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 165; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 166; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; 167; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]]; 168; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]]; 169; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 170; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 171; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 172; 173; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 174; CHECK-NEXT: ret; 175define <2 x half> @test_fneg(<2 x half> %a) #0 { 176 %r = fsub <2 x half> <half 0.0, half 0.0>, %a 177 ret <2 x half> %r 178} 179 180; CHECK-LABEL: test_fmul( 181; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0]; 182; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1]; 183; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 184; 185; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 186; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 187; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 188; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 189; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 190; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 191; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 192; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 193; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 194; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 195; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 196; 197; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 198; CHECK-NEXT: ret; 199define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 { 200 %r = fmul <2 x half> %a, %b 201 ret <2 x half> %r 202} 203 204; CHECK-LABEL: test_fdiv( 205; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0]; 206; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1]; 207; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 208; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 209; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; 210; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; 211; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; 212; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; 213; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 214; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 215; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]; 216; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]; 217; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 218; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 219; CHECK-NEXT: ret; 220define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { 221 %r = fdiv <2 x half> %a, %b 222 ret <2 x half> %r 223} 224 225; CHECK-LABEL: test_frem( 226; -- Load two 16x2 inputs and split them into f16 elements 227; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0]; 228; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1]; 229; -- Split into elements 230; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 231; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 232; -- promote to f32. 233; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; 234; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; 235; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; 236; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; 237; -- frem(a[0],b[0]). 238; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]]; 239; CHECK-DAG: cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]]; 240; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]]; 241; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]]; 242; -- frem(a[1],b[1]). 243; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]]; 244; CHECK-DAG: cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]]; 245; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]]; 246; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]]; 247; -- convert back to f16. 248; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 249; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 250; -- merge into f16x2 and return it. 251; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 252; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 253; CHECK-NEXT: ret; 254define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { 255 %r = frem <2 x half> %a, %b 256 ret <2 x half> %r 257} 258 259; CHECK-LABEL: .func test_ldst_v2f16( 260; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0]; 261; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1]; 262; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]] 263; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]]; 264; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]}; 265; CHECK: ret; 266define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) { 267 %t1 = load <2 x half>, <2 x half>* %a 268 store <2 x half> %t1, <2 x half>* %b, align 16 269 ret void 270} 271 272; CHECK-LABEL: .func test_ldst_v3f16( 273; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3f16_param_0]; 274; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3f16_param_1]; 275; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair 276; number of bitshifting instructions that may change at llvm's whim. 277; So we only verify that we only issue correct number of writes using 278; correct offset, but not the values we write. 279; CHECK-DAG: ld.u64 280; CHECK-DAG: st.u32 [%[[B]]], 281; CHECK-DAG: st.b16 [%[[B]]+4], 282; CHECK: ret; 283define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) { 284 %t1 = load <3 x half>, <3 x half>* %a 285 store <3 x half> %t1, <3 x half>* %b, align 16 286 ret void 287} 288 289; CHECK-LABEL: .func test_ldst_v4f16( 290; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0]; 291; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1]; 292; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]]; 293; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; 294; CHECK: ret; 295define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) { 296 %t1 = load <4 x half>, <4 x half>* %a 297 store <4 x half> %t1, <4 x half>* %b, align 16 298 ret void 299} 300 301; CHECK-LABEL: .func test_ldst_v8f16( 302; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8f16_param_0]; 303; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8f16_param_1]; 304; CHECK-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]]; 305; CHECK-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; 306; CHECK: ret; 307define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) { 308 %t1 = load <8 x half>, <8 x half>* %a 309 store <8 x half> %t1, <8 x half>* %b, align 16 310 ret void 311} 312 313declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0 314 315; CHECK-LABEL: test_call( 316; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0]; 317; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1]; 318; CHECK: { 319; CHECK-DAG: .param .align 4 .b8 param0[4]; 320; CHECK-DAG: .param .align 4 .b8 param1[4]; 321; CHECK-DAG: st.param.b32 [param0+0], [[A]]; 322; CHECK-DAG: st.param.b32 [param1+0], [[B]]; 323; CHECK-DAG: .param .align 4 .b8 retval0[4]; 324; CHECK: call.uni (retval0), 325; CHECK-NEXT: test_callee, 326; CHECK: ); 327; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 328; CHECK-NEXT: } 329; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 330; CHECK-NEXT: ret; 331define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { 332 %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) 333 ret <2 x half> %r 334} 335 336; CHECK-LABEL: test_call_flipped( 337; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0]; 338; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1]; 339; CHECK: { 340; CHECK-DAG: .param .align 4 .b8 param0[4]; 341; CHECK-DAG: .param .align 4 .b8 param1[4]; 342; CHECK-DAG: st.param.b32 [param0+0], [[B]]; 343; CHECK-DAG: st.param.b32 [param1+0], [[A]]; 344; CHECK-DAG: .param .align 4 .b8 retval0[4]; 345; CHECK: call.uni (retval0), 346; CHECK-NEXT: test_callee, 347; CHECK: ); 348; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 349; CHECK-NEXT: } 350; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 351; CHECK-NEXT: ret; 352define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { 353 %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) 354 ret <2 x half> %r 355} 356 357; CHECK-LABEL: test_tailcall_flipped( 358; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0]; 359; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1]; 360; CHECK: { 361; CHECK-DAG: .param .align 4 .b8 param0[4]; 362; CHECK-DAG: .param .align 4 .b8 param1[4]; 363; CHECK-DAG: st.param.b32 [param0+0], [[B]]; 364; CHECK-DAG: st.param.b32 [param1+0], [[A]]; 365; CHECK-DAG: .param .align 4 .b8 retval0[4]; 366; CHECK: call.uni (retval0), 367; CHECK-NEXT: test_callee, 368; CHECK: ); 369; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 370; CHECK-NEXT: } 371; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 372; CHECK-NEXT: ret; 373define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { 374 %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) 375 ret <2 x half> %r 376} 377 378; CHECK-LABEL: test_select( 379; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0]; 380; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1]; 381; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2] 382; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; 383; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]]; 384; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 385; CHECK-NEXT: ret; 386define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 { 387 %r = select i1 %c, <2 x half> %a, <2 x half> %b 388 ret <2 x half> %r 389} 390 391; CHECK-LABEL: test_select_cc( 392; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0]; 393; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1]; 394; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2]; 395; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3]; 396; 397; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] 398; 399; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 400; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] 401; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; 402; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; 403; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; 404; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]]; 405; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] 406; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] 407; 408; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 409; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 410; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; 411; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; 412; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 413; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 414; CHECK-NEXT: ret; 415define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 { 416 %cc = fcmp une <2 x half> %c, %d 417 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b 418 ret <2 x half> %r 419} 420 421; CHECK-LABEL: test_select_cc_f32_f16( 422; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0]; 423; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1]; 424; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2]; 425; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3]; 426; 427; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] 428; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 429; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] 430; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; 431; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; 432; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; 433; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]]; 434; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] 435; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] 436; 437; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]]; 438; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]]; 439; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; 440; CHECK-NEXT: ret; 441define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, 442 <2 x half> %c, <2 x half> %d) #0 { 443 %cc = fcmp une <2 x half> %c, %d 444 %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b 445 ret <2 x float> %r 446} 447 448; CHECK-LABEL: test_select_cc_f16_f32( 449; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0]; 450; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1]; 451; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2]; 452; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3]; 453; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]] 454; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]] 455; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 456; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 457; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; 458; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; 459; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 460; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 461; CHECK-NEXT: ret; 462define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, 463 <2 x float> %c, <2 x float> %d) #0 { 464 %cc = fcmp une <2 x float> %c, %d 465 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b 466 ret <2 x half> %r 467} 468 469; CHECK-LABEL: test_fcmp_une( 470; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0]; 471; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1]; 472; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 473; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 474; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 475; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 476; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 477; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 478; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 479; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 480; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 481; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 482; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 483; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 484; CHECK-NEXT: ret; 485define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 { 486 %r = fcmp une <2 x half> %a, %b 487 ret <2 x i1> %r 488} 489 490; CHECK-LABEL: test_fcmp_ueq( 491; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0]; 492; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1]; 493; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 494; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 495; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 496; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 497; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 498; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 499; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 500; CHECK-NOF16-DAG: setp.equ.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 501; CHECK-NOF16-DAG: setp.equ.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 502; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 503; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 504; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 505; CHECK-NEXT: ret; 506define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 { 507 %r = fcmp ueq <2 x half> %a, %b 508 ret <2 x i1> %r 509} 510 511; CHECK-LABEL: test_fcmp_ugt( 512; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0]; 513; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1]; 514; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 515; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 516; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 517; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 518; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 519; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 520; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 521; CHECK-NOF16-DAG: setp.gtu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 522; CHECK-NOF16-DAG: setp.gtu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 523; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 524; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 525; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 526; CHECK-NEXT: ret; 527define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 { 528 %r = fcmp ugt <2 x half> %a, %b 529 ret <2 x i1> %r 530} 531 532; CHECK-LABEL: test_fcmp_uge( 533; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0]; 534; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1]; 535; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 536; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 537; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 538; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 539; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 540; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 541; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 542; CHECK-NOF16-DAG: setp.geu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 543; CHECK-NOF16-DAG: setp.geu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 544; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 545; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 546; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 547; CHECK-NEXT: ret; 548define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 { 549 %r = fcmp uge <2 x half> %a, %b 550 ret <2 x i1> %r 551} 552 553; CHECK-LABEL: test_fcmp_ult( 554; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0]; 555; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1]; 556; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 557; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 558; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 559; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 560; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 561; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 562; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 563; CHECK-NOF16-DAG: setp.ltu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 564; CHECK-NOF16-DAG: setp.ltu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 565; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 566; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 567; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 568; CHECK-NEXT: ret; 569define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 { 570 %r = fcmp ult <2 x half> %a, %b 571 ret <2 x i1> %r 572} 573 574; CHECK-LABEL: test_fcmp_ule( 575; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0]; 576; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1]; 577; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 578; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 579; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 580; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 581; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 582; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 583; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 584; CHECK-NOF16-DAG: setp.leu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 585; CHECK-NOF16-DAG: setp.leu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 586; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 587; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 588; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 589; CHECK-NEXT: ret; 590define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 { 591 %r = fcmp ule <2 x half> %a, %b 592 ret <2 x i1> %r 593} 594 595 596; CHECK-LABEL: test_fcmp_uno( 597; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0]; 598; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1]; 599; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 600; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 601; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 602; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 603; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 604; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 605; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 606; CHECK-NOF16-DAG: setp.nan.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 607; CHECK-NOF16-DAG: setp.nan.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 608; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 609; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 610; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 611; CHECK-NEXT: ret; 612define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 { 613 %r = fcmp uno <2 x half> %a, %b 614 ret <2 x i1> %r 615} 616 617; CHECK-LABEL: test_fcmp_one( 618; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0]; 619; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1]; 620; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 621; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 622; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 623; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 624; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 625; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 626; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 627; CHECK-NOF16-DAG: setp.ne.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 628; CHECK-NOF16-DAG: setp.ne.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 629; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 630; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 631; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 632; CHECK-NEXT: ret; 633define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 { 634 %r = fcmp one <2 x half> %a, %b 635 ret <2 x i1> %r 636} 637 638; CHECK-LABEL: test_fcmp_oeq( 639; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0]; 640; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1]; 641; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 642; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 643; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 644; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 645; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 646; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 647; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 648; CHECK-NOF16-DAG: setp.eq.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 649; CHECK-NOF16-DAG: setp.eq.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 650; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 651; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 652; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 653; CHECK-NEXT: ret; 654define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 { 655 %r = fcmp oeq <2 x half> %a, %b 656 ret <2 x i1> %r 657} 658 659; CHECK-LABEL: test_fcmp_ogt( 660; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0]; 661; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1]; 662; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 663; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 664; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 665; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 666; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 667; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 668; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 669; CHECK-NOF16-DAG: setp.gt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 670; CHECK-NOF16-DAG: setp.gt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 671; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 672; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 673; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 674; CHECK-NEXT: ret; 675define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 { 676 %r = fcmp ogt <2 x half> %a, %b 677 ret <2 x i1> %r 678} 679 680; CHECK-LABEL: test_fcmp_oge( 681; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0]; 682; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1]; 683; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 684; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 685; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 686; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 687; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 688; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 689; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 690; CHECK-NOF16-DAG: setp.ge.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 691; CHECK-NOF16-DAG: setp.ge.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 692; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 693; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 694; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 695; CHECK-NEXT: ret; 696define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 { 697 %r = fcmp oge <2 x half> %a, %b 698 ret <2 x i1> %r 699} 700 701; CHECK-LABEL: test_fcmp_olt( 702; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0]; 703; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1]; 704; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 705; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 706; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 707; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 708; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 709; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 710; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 711; CHECK-NOF16-DAG: setp.lt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 712; CHECK-NOF16-DAG: setp.lt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 713; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 714; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 715; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 716; CHECK-NEXT: ret; 717define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 { 718 %r = fcmp olt <2 x half> %a, %b 719 ret <2 x i1> %r 720} 721 722; XCHECK-LABEL: test_fcmp_ole( 723; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0]; 724; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1]; 725; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 726; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 727; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 728; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 729; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 730; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 731; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 732; CHECK-NOF16-DAG: setp.le.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 733; CHECK-NOF16-DAG: setp.le.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 734; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 735; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 736; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 737; CHECK-NEXT: ret; 738define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 { 739 %r = fcmp ole <2 x half> %a, %b 740 ret <2 x i1> %r 741} 742 743; CHECK-LABEL: test_fcmp_ord( 744; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0]; 745; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1]; 746; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 747; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 748; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 749; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 750; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 751; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 752; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 753; CHECK-NOF16-DAG: setp.num.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 754; CHECK-NOF16-DAG: setp.num.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 755; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 756; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 757; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 758; CHECK-NEXT: ret; 759define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 { 760 %r = fcmp ord <2 x half> %a, %b 761 ret <2 x i1> %r 762} 763 764; CHECK-LABEL: test_fptosi_i32( 765; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0]; 766; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 767; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]]; 768; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]]; 769; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} 770; CHECK: ret; 771define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 { 772 %r = fptosi <2 x half> %a to <2 x i32> 773 ret <2 x i32> %r 774} 775 776; CHECK-LABEL: test_fptosi_i64( 777; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0]; 778; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 779; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]]; 780; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]]; 781; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} 782; CHECK: ret; 783define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 { 784 %r = fptosi <2 x half> %a to <2 x i64> 785 ret <2 x i64> %r 786} 787 788; CHECK-LABEL: test_fptoui_2xi32( 789; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0]; 790; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 791; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]]; 792; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]]; 793; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} 794; CHECK: ret; 795define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 { 796 %r = fptoui <2 x half> %a to <2 x i32> 797 ret <2 x i32> %r 798} 799 800; CHECK-LABEL: test_fptoui_2xi64( 801; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0]; 802; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 803; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]]; 804; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]]; 805; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} 806; CHECK: ret; 807define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 { 808 %r = fptoui <2 x half> %a to <2 x i64> 809 ret <2 x i64> %r 810} 811 812; CHECK-LABEL: test_uitofp_2xi32( 813; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0]; 814; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]]; 815; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]]; 816; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 817; CHECK: st.param.b32 [func_retval0+0], [[R]]; 818; CHECK: ret; 819define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 { 820 %r = uitofp <2 x i32> %a to <2 x half> 821 ret <2 x half> %r 822} 823 824; CHECK-LABEL: test_uitofp_2xi64( 825; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0]; 826; CHECK-DAG: cvt.rn.f32.u64 [[F0:%f[0-9]+]], [[A0]]; 827; CHECK-DAG: cvt.rn.f32.u64 [[F1:%f[0-9]+]], [[A1]]; 828; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]]; 829; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]]; 830; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 831; CHECK: st.param.b32 [func_retval0+0], [[R]]; 832; CHECK: ret; 833define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 { 834 %r = uitofp <2 x i64> %a to <2 x half> 835 ret <2 x half> %r 836} 837 838; CHECK-LABEL: test_sitofp_2xi32( 839; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0]; 840; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]]; 841; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]]; 842; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 843; CHECK: st.param.b32 [func_retval0+0], [[R]]; 844; CHECK: ret; 845define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 { 846 %r = sitofp <2 x i32> %a to <2 x half> 847 ret <2 x half> %r 848} 849 850; CHECK-LABEL: test_sitofp_2xi64( 851; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0]; 852; CHECK-DAG: cvt.rn.f32.s64 [[F0:%f[0-9]+]], [[A0]]; 853; CHECK-DAG: cvt.rn.f32.s64 [[F1:%f[0-9]+]], [[A1]]; 854; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[F0]]; 855; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[F1]]; 856; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 857; CHECK: st.param.b32 [func_retval0+0], [[R]]; 858; CHECK: ret; 859define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 { 860 %r = sitofp <2 x i64> %a to <2 x half> 861 ret <2 x half> %r 862} 863 864; CHECK-LABEL: test_uitofp_2xi32_fadd( 865; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0]; 866; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1]; 867; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]]; 868; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]]; 869 870; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} 871; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; 872; 873; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 874; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 875; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 876; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 877; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] 878; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; 879; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; 880; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 881; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 882; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 883; 884; CHECK: st.param.b32 [func_retval0+0], [[R]]; 885; CHECK: ret; 886define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { 887 %c = uitofp <2 x i32> %a to <2 x half> 888 %r = fadd <2 x half> %b, %c 889 ret <2 x half> %r 890} 891 892; CHECK-LABEL: test_sitofp_2xi32_fadd( 893; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0]; 894; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1]; 895; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]]; 896; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]]; 897; 898; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} 899; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; 900; 901; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 902; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 903; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 904; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 905; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] 906; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; 907; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; 908; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 909; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 910; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 911; 912; CHECK: st.param.b32 [func_retval0+0], [[R]]; 913; CHECK: ret; 914define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { 915 %c = sitofp <2 x i32> %a to <2 x half> 916 %r = fadd <2 x half> %b, %c 917 ret <2 x half> %r 918} 919 920; CHECK-LABEL: test_fptrunc_2xfloat( 921; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0]; 922; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]]; 923; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]]; 924; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 925; CHECK: st.param.b32 [func_retval0+0], [[R]]; 926; CHECK: ret; 927define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { 928 %r = fptrunc <2 x float> %a to <2 x half> 929 ret <2 x half> %r 930} 931 932; CHECK-LABEL: test_fptrunc_2xdouble( 933; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0]; 934; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]]; 935; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]]; 936; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 937; CHECK: st.param.b32 [func_retval0+0], [[R]]; 938; CHECK: ret; 939define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 { 940 %r = fptrunc <2 x double> %a to <2 x half> 941 ret <2 x half> %r 942} 943 944; CHECK-LABEL: test_fpext_2xfloat( 945; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0]; 946; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 947; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]]; 948; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]]; 949; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; 950; CHECK: ret; 951define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 { 952 %r = fpext <2 x half> %a to <2 x float> 953 ret <2 x float> %r 954} 955 956; CHECK-LABEL: test_fpext_2xdouble( 957; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0]; 958; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 959; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]]; 960; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]]; 961; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]}; 962; CHECK: ret; 963define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 { 964 %r = fpext <2 x half> %a to <2 x double> 965 ret <2 x double> %r 966} 967 968 969; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16( 970; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0]; 971; CHECK-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A]] 972; CHECK-DAG: shr.u32 [[AH:%r[0-9]+]], [[A]], 16 973; CHECK-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[AH]] 974; CHECK: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]} 975; CHECK: ret; 976define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 { 977 %r = bitcast <2 x half> %a to <2 x i16> 978 ret <2 x i16> %r 979} 980 981; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf( 982; CHECK: ld.param.v2.u16 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [test_bitcast_2xi16_to_2xhalf_param_0]; 983; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]]; 984; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]]; 985; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16; 986; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]]; 987; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]]; 988; CHECK: st.param.b32 [func_retval0+0], [[R]]; 989; CHECK: ret; 990define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 { 991 %r = bitcast <2 x i16> %a to <2 x half> 992 ret <2 x half> %r 993} 994 995 996declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0 997declare <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) #0 998declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0 999declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0 1000declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0 1001declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0 1002declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0 1003declare <2 x half> @llvm.log.f16(<2 x half> %a) #0 1004declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0 1005declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0 1006declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 1007declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0 1008declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0 1009declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0 1010declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0 1011declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0 1012declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0 1013declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0 1014declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0 1015declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0 1016declare <2 x half> @llvm.round.f16(<2 x half> %a) #0 1017declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 1018 1019; CHECK-LABEL: test_sqrt( 1020; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0]; 1021; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1022; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1023; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1024; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1025; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1026; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1027; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1028; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1029; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1030; CHECK: ret; 1031define <2 x half> @test_sqrt(<2 x half> %a) #0 { 1032 %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a) 1033 ret <2 x half> %r 1034} 1035 1036;;; Can't do this yet: requires libcall. 1037; XCHECK-LABEL: test_powi( 1038;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 { 1039; %r = call <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) 1040; ret <2 x half> %r 1041;} 1042 1043; CHECK-LABEL: test_sin( 1044; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0]; 1045; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1046; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1047; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1048; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1049; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1050; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1051; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1052; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1053; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1054; CHECK: ret; 1055define <2 x half> @test_sin(<2 x half> %a) #0 #1 { 1056 %r = call <2 x half> @llvm.sin.f16(<2 x half> %a) 1057 ret <2 x half> %r 1058} 1059 1060; CHECK-LABEL: test_cos( 1061; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0]; 1062; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1063; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1064; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1065; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1066; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1067; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1068; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1069; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1070; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1071; CHECK: ret; 1072define <2 x half> @test_cos(<2 x half> %a) #0 #1 { 1073 %r = call <2 x half> @llvm.cos.f16(<2 x half> %a) 1074 ret <2 x half> %r 1075} 1076 1077;;; Can't do this yet: requires libcall. 1078; XCHECK-LABEL: test_pow( 1079;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 { 1080; %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) 1081; ret <2 x half> %r 1082;} 1083 1084;;; Can't do this yet: requires libcall. 1085; XCHECK-LABEL: test_exp( 1086;define <2 x half> @test_exp(<2 x half> %a) #0 { 1087; %r = call <2 x half> @llvm.exp.f16(<2 x half> %a) 1088; ret <2 x half> %r 1089;} 1090 1091;;; Can't do this yet: requires libcall. 1092; XCHECK-LABEL: test_exp2( 1093;define <2 x half> @test_exp2(<2 x half> %a) #0 { 1094; %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a) 1095; ret <2 x half> %r 1096;} 1097 1098;;; Can't do this yet: requires libcall. 1099; XCHECK-LABEL: test_log( 1100;define <2 x half> @test_log(<2 x half> %a) #0 { 1101; %r = call <2 x half> @llvm.log.f16(<2 x half> %a) 1102; ret <2 x half> %r 1103;} 1104 1105;;; Can't do this yet: requires libcall. 1106; XCHECK-LABEL: test_log10( 1107;define <2 x half> @test_log10(<2 x half> %a) #0 { 1108; %r = call <2 x half> @llvm.log10.f16(<2 x half> %a) 1109; ret <2 x half> %r 1110;} 1111 1112;;; Can't do this yet: requires libcall. 1113; XCHECK-LABEL: test_log2( 1114;define <2 x half> @test_log2(<2 x half> %a) #0 { 1115; %r = call <2 x half> @llvm.log2.f16(<2 x half> %a) 1116; ret <2 x half> %r 1117;} 1118 1119; CHECK-LABEL: test_fma( 1120; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0]; 1121; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1]; 1122; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2]; 1123; 1124; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; 1125; 1126; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1127; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1128; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 1129; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 1130; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 1131; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1132; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 1133; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 1134; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1135; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; 1136; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; 1137; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 1138; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 1139; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1140 1141; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1142; CHECK: ret 1143define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { 1144 %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) 1145 ret <2 x half> %r 1146} 1147 1148; CHECK-LABEL: test_fabs( 1149; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0]; 1150; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1151; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1152; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1153; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1154; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1155; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1156; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1157; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1158; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1159; CHECK: ret; 1160define <2 x half> @test_fabs(<2 x half> %a) #0 { 1161 %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a) 1162 ret <2 x half> %r 1163} 1164 1165; CHECK-LABEL: test_minnum( 1166; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0]; 1167; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1]; 1168; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1169; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1170; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1171; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1172; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; 1173; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; 1174; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; 1175; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; 1176; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1177; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1178; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1179; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1180; CHECK: ret; 1181define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { 1182 %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) 1183 ret <2 x half> %r 1184} 1185 1186; CHECK-LABEL: test_maxnum( 1187; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0]; 1188; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1]; 1189; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1190; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1191; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1192; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1193; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; 1194; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; 1195; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; 1196; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; 1197; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1198; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1199; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1200; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1201; CHECK: ret; 1202define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { 1203 %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) 1204 ret <2 x half> %r 1205} 1206 1207; CHECK-LABEL: test_copysign( 1208; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0]; 1209; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1]; 1210; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1211; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1212; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1213; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1214; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; 1215; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]]; 1216; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767; 1217; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767; 1218; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768; 1219; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; 1220; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; 1221; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; 1222; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1223; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1224; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1225; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1226; CHECK: ret; 1227define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { 1228 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) 1229 ret <2 x half> %r 1230} 1231 1232; CHECK-LABEL: test_copysign_f32( 1233; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0]; 1234; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1]; 1235; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1236; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1237; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1238; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]]; 1239; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]]; 1240; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767; 1241; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767; 1242; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648; 1243; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648; 1244; CHECK-DAG: shr.u32 [[BY0:%r[0-9]+]], [[BX0]], 16; 1245; CHECK-DAG: shr.u32 [[BY1:%r[0-9]+]], [[BX1]], 16; 1246; CHECK-DAG: cvt.u16.u32 [[BZ0:%rs[0-9]+]], [[BY0]]; 1247; CHECK-DAG: cvt.u16.u32 [[BZ1:%rs[0-9]+]], [[BY1]]; 1248; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; 1249; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; 1250; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1251; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1252; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1253; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1254; CHECK: ret; 1255define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { 1256 %tb = fptrunc <2 x float> %b to <2 x half> 1257 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb) 1258 ret <2 x half> %r 1259} 1260 1261; CHECK-LABEL: test_copysign_f64( 1262; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0]; 1263; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1]; 1264; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1265; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1266; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1267; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]]; 1268; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]]; 1269; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767; 1270; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767; 1271; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808; 1272; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808; 1273; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48; 1274; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48; 1275; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]]; 1276; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]]; 1277; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; 1278; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; 1279; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1280; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1281; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1282; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1283; CHECK: ret; 1284define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { 1285 %tb = fptrunc <2 x double> %b to <2 x half> 1286 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb) 1287 ret <2 x half> %r 1288} 1289 1290; CHECK-LABEL: test_copysign_extended( 1291; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0]; 1292; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1]; 1293; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1294; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1295; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1296; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1297; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; 1298; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]]; 1299; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767; 1300; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767; 1301; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768; 1302; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; 1303; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; 1304; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; 1305; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1306; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1307; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1308; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]] 1309; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; 1310; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; 1311; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; 1312; CHECK: ret; 1313define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { 1314 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) 1315 %xr = fpext <2 x half> %r to <2 x float> 1316 ret <2 x float> %xr 1317} 1318 1319; CHECK-LABEL: test_floor( 1320; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0]; 1321; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1322; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1323; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1324; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1325; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1326; CHECK: ret; 1327define <2 x half> @test_floor(<2 x half> %a) #0 { 1328 %r = call <2 x half> @llvm.floor.f16(<2 x half> %a) 1329 ret <2 x half> %r 1330} 1331 1332; CHECK-LABEL: test_ceil( 1333; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0]; 1334; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1335; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1336; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1337; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1338; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1339; CHECK: ret; 1340define <2 x half> @test_ceil(<2 x half> %a) #0 { 1341 %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a) 1342 ret <2 x half> %r 1343} 1344 1345; CHECK-LABEL: test_trunc( 1346; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0]; 1347; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1348; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1349; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1350; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1351; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1352; CHECK: ret; 1353define <2 x half> @test_trunc(<2 x half> %a) #0 { 1354 %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a) 1355 ret <2 x half> %r 1356} 1357 1358; CHECK-LABEL: test_rint( 1359; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0]; 1360; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1361; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1362; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1363; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1364; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1365; CHECK: ret; 1366define <2 x half> @test_rint(<2 x half> %a) #0 { 1367 %r = call <2 x half> @llvm.rint.f16(<2 x half> %a) 1368 ret <2 x half> %r 1369} 1370 1371; CHECK-LABEL: test_nearbyint( 1372; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0]; 1373; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1374; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1375; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1376; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1377; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1378; CHECK: ret; 1379define <2 x half> @test_nearbyint(<2 x half> %a) #0 { 1380 %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a) 1381 ret <2 x half> %r 1382} 1383 1384; CHECK-LABEL: test_round( 1385; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_round_param_0]; 1386; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1387; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1388; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1389; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1390; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1391; CHECK: ret; 1392define <2 x half> @test_round(<2 x half> %a) #0 { 1393 %r = call <2 x half> @llvm.round.f16(<2 x half> %a) 1394 ret <2 x half> %r 1395} 1396 1397; CHECK-LABEL: test_fmuladd( 1398; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0]; 1399; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1]; 1400; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2]; 1401; 1402; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; 1403; 1404; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1405; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1406; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 1407; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 1408; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 1409; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1410; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 1411; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 1412; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1413; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; 1414; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; 1415; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 1416; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 1417; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1418; 1419; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1420; CHECK: ret; 1421define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { 1422 %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) 1423 ret <2 x half> %r 1424} 1425 1426; CHECK-LABEL: test_shufflevector( 1427; CHECK: mov.b32 {%h1, %h2}, %hh1; 1428; CHECK: mov.b32 %hh2, {%h2, %h1}; 1429define <2 x half> @test_shufflevector(<2 x half> %a) #0 { 1430 %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0> 1431 ret <2 x half> %s 1432} 1433 1434; CHECK-LABEL: test_insertelement( 1435; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1; 1436; CHECK: mov.b32 %hh2, {%h2, %h1}; 1437define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { 1438 %i = insertelement <2 x half> %a, half %x, i64 1 1439 ret <2 x half> %i 1440} 1441 1442attributes #0 = { nounwind } 1443attributes #1 = { "unsafe-fp-math" = "true" } 1444