1; ## Full FP16 support enabled by default.
2; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
3; RUN:          -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \
4; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
5; ## FP16 support explicitly disabled.
6; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
7; RUN:          -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
8; RUN:           -verify-machineinstrs \
9; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
10; ## FP16 is not supported by hardware.
11; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
12; RUN:          -disable-post-ra -disable-fp-elim -verify-machineinstrs \
13; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
14
15target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
16
17; CHECK-LABEL: test_ret_const(
18; CHECK:      mov.b16         [[R:%h[0-9]+]], 0x3C00;
19; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
20; CHECK-NEXT: ret;
21define half @test_ret_const() #0 {
22  ret half 1.0
23}
24
25; CHECK-LABEL: test_fadd(
26; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fadd_param_0];
27; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_param_1];
28; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]];
29; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
30; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
31; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
32; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
33; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
34; CHECK-NEXT: ret;
35define half @test_fadd(half %a, half %b) #0 {
36  %r = fadd half %a, %b
37  ret half %r
38}
39
40; CHECK-LABEL: test_fadd_v1f16(
41; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
42; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
43; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]];
44; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
45; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
46; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
47; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
48; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
49; CHECK-NEXT: ret;
50define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
51  %r = fadd <1 x half> %a, %b
52  ret <1 x half> %r
53}
54
55; Check that we can lower fadd with immediate arguments.
56; CHECK-LABEL: test_fadd_imm_0(
57; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
58; CHECK-F16-DAG:    mov.b16        [[A:%h[0-9]+]], 0x3C00;
59; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[B]], [[A]];
60; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
61; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
62; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
63; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
64; CHECK-NEXT: ret;
65define half @test_fadd_imm_0(half %b) #0 {
66  %r = fadd half 1.0, %b
67  ret half %r
68}
69
70; CHECK-LABEL: test_fadd_imm_1(
71; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_imm_1_param_0];
72; CHECK-F16-DAG:    mov.b16        [[A:%h[0-9]+]], 0x3C00;
73; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[B]], [[A]];
74; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
75; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
76; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
77; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
78; CHECK-NEXT: ret;
79define half @test_fadd_imm_1(half %a) #0 {
80  %r = fadd half %a, 1.0
81  ret half %r
82}
83
84; CHECK-LABEL: test_fsub(
85; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fsub_param_0];
86; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fsub_param_1];
87; CHECK-F16-NEXT:   sub.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]];
88; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
89; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
90; CHECK-NOF16-NEXT: sub.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
91; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
92; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
93; CHECK-NEXT: ret;
94define half @test_fsub(half %a, half %b) #0 {
95  %r = fsub half %a, %b
96  ret half %r
97}
98
99; CHECK-LABEL: test_fneg(
100; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fneg_param_0];
101; CHECK-F16-NEXT:   mov.b16        [[Z:%h[0-9]+]], 0x0000
102; CHECK-F16-NEXT:   sub.rn.f16     [[R:%h[0-9]+]], [[Z]], [[A]];
103; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
104; CHECK-NOF16-DAG:  mov.f32        [[Z:%f[0-9]+]], 0f00000000;
105; CHECK-NOF16-NEXT: sub.rn.f32     [[R32:%f[0-9]+]], [[Z]], [[A32]];
106; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
107; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
108; CHECK-NEXT: ret;
109define half @test_fneg(half %a) #0 {
110  %r = fsub half 0.0, %a
111  ret half %r
112}
113
114; CHECK-LABEL: test_fmul(
115; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fmul_param_0];
116; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fmul_param_1];
117; CHECK-F16-NEXT: mul.rn.f16      [[R:%h[0-9]+]], [[A]], [[B]];
118; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
119; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
120; CHECK-NOF16-NEXT: mul.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
121; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
122; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
123; CHECK-NEXT: ret;
124define half @test_fmul(half %a, half %b) #0 {
125  %r = fmul half %a, %b
126  ret half %r
127}
128
129; CHECK-LABEL: test_fdiv(
130; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fdiv_param_0];
131; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fdiv_param_1];
132; CHECK-DAG:  cvt.f32.f16     [[F0:%f[0-9]+]], [[A]];
133; CHECK-DAG:  cvt.f32.f16     [[F1:%f[0-9]+]], [[B]];
134; CHECK-NEXT: div.rn.f32      [[FR:%f[0-9]+]], [[F0]], [[F1]];
135; CHECK-NEXT: cvt.rn.f16.f32  [[R:%h[0-9]+]], [[FR]];
136; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
137; CHECK-NEXT: ret;
138define half @test_fdiv(half %a, half %b) #0 {
139  %r = fdiv half %a, %b
140  ret half %r
141}
142
143; CHECK-LABEL: test_frem(
144; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_frem_param_0];
145; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_frem_param_1];
146; CHECK-DAG:  cvt.f32.f16     [[FA:%f[0-9]+]], [[A]];
147; CHECK-DAG:  cvt.f32.f16     [[FB:%f[0-9]+]], [[B]];
148; CHECK-NEXT: div.rn.f32      [[D:%f[0-9]+]], [[FA]], [[FB]];
149; CHECK-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]];
150; CHECK-NEXT: mul.f32         [[RI:%f[0-9]+]], [[DI]], [[FB]];
151; CHECK-NEXT: sub.f32         [[RF:%f[0-9]+]], [[FA]], [[RI]];
152; CHECK-NEXT: cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
153; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
154; CHECK-NEXT: ret;
155define half @test_frem(half %a, half %b) #0 {
156  %r = frem half %a, %b
157  ret half %r
158}
159
160; CHECK-LABEL: test_store(
161; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_store_param_0];
162; CHECK-DAG:  ld.param.u64    %[[PTR:rd[0-9]+]], [test_store_param_1];
163; CHECK-NEXT: st.b16          [%[[PTR]]], [[A]];
164; CHECK-NEXT: ret;
165define void @test_store(half %a, half* %b) #0 {
166  store half %a, half* %b
167  ret void
168}
169
170; CHECK-LABEL: test_load(
171; CHECK:      ld.param.u64    %[[PTR:rd[0-9]+]], [test_load_param_0];
172; CHECK-NEXT: ld.b16          [[R:%h[0-9]+]], [%[[PTR]]];
173; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
174; CHECK-NEXT: ret;
175define half @test_load(half* %a) #0 {
176  %r = load half, half* %a
177  ret half %r
178}
179
180; CHECK-LABEL: .visible .func test_halfp0a1(
181; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0];
182; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1];
183; CHECK-DAG: ld.u8        [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
184; CHECK-DAG: st.u8        [%[[TO]]], [[B0]]
185; CHECK-DAG: ld.u8        [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
186; CHECK-DAG: st.u8        [%[[TO]]+1], [[B1]]
187; CHECK: ret
188define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
189  %1 = load half, half * %from , align 1
190  store half %1, half * %to , align 1
191  ret void
192}
193
194declare half @test_callee(half %a, half %b) #0
195
196; CHECK-LABEL: test_call(
197; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_call_param_0];
198; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_call_param_1];
199; CHECK:      {
200; CHECK-DAG:  .param .b32 param0;
201; CHECK-DAG:  .param .b32 param1;
202; CHECK-DAG:  st.param.b16    [param0+0], [[A]];
203; CHECK-DAG:  st.param.b16    [param1+0], [[B]];
204; CHECK-DAG:  .param .b32 retval0;
205; CHECK:      call.uni (retval0),
206; CHECK-NEXT:        test_callee,
207; CHECK:      );
208; CHECK-NEXT: ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
209; CHECK-NEXT: }
210; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
211; CHECK-NEXT: ret;
212define half @test_call(half %a, half %b) #0 {
213  %r = call half @test_callee(half %a, half %b)
214  ret half %r
215}
216
217; CHECK-LABEL: test_call_flipped(
218; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_call_flipped_param_0];
219; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_call_flipped_param_1];
220; CHECK:      {
221; CHECK-DAG:  .param .b32 param0;
222; CHECK-DAG:  .param .b32 param1;
223; CHECK-DAG:  st.param.b16    [param0+0], [[B]];
224; CHECK-DAG:  st.param.b16    [param1+0], [[A]];
225; CHECK-DAG:  .param .b32 retval0;
226; CHECK:      call.uni (retval0),
227; CHECK-NEXT:        test_callee,
228; CHECK:      );
229; CHECK-NEXT: ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
230; CHECK-NEXT: }
231; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
232; CHECK-NEXT: ret;
233define half @test_call_flipped(half %a, half %b) #0 {
234  %r = call half @test_callee(half %b, half %a)
235  ret half %r
236}
237
238; CHECK-LABEL: test_tailcall_flipped(
239; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_tailcall_flipped_param_0];
240; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_tailcall_flipped_param_1];
241; CHECK:      {
242; CHECK-DAG:  .param .b32 param0;
243; CHECK-DAG:  .param .b32 param1;
244; CHECK-DAG:  st.param.b16    [param0+0], [[B]];
245; CHECK-DAG:  st.param.b16    [param1+0], [[A]];
246; CHECK-DAG:  .param .b32 retval0;
247; CHECK:      call.uni (retval0),
248; CHECK-NEXT:        test_callee,
249; CHECK:      );
250; CHECK-NEXT: ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
251; CHECK-NEXT: }
252; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
253; CHECK-NEXT: ret;
254define half @test_tailcall_flipped(half %a, half %b) #0 {
255  %r = tail call half @test_callee(half %b, half %a)
256  ret half %r
257}
258
259; CHECK-LABEL: test_select(
260; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_select_param_0];
261; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_select_param_1];
262; CHECK-DAG:  setp.eq.b16     [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
263; CHECK-NEXT: selp.b16        [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
264; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
265; CHECK-NEXT: ret;
266define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
267  %r = select i1 %c, half %a, half %b
268  ret half %r
269}
270
271; CHECK-LABEL: test_select_cc(
272; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_select_cc_param_0];
273; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_select_cc_param_1];
274; CHECK-DAG:  ld.param.b16    [[C:%h[0-9]+]], [test_select_cc_param_2];
275; CHECK-DAG:  ld.param.b16    [[D:%h[0-9]+]], [test_select_cc_param_3];
276; CHECK-F16:  setp.neu.f16    [[PRED:%p[0-9]+]], [[C]], [[D]]
277; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
278; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
279; CHECK-NOF16: setp.neu.f32    [[PRED:%p[0-9]+]], [[CF]], [[DF]]
280; CHECK:      selp.b16        [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
281; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
282; CHECK-NEXT: ret;
283define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
284  %cc = fcmp une half %c, %d
285  %r = select i1 %cc, half %a, half %b
286  ret half %r
287}
288
289; CHECK-LABEL: test_select_cc_f32_f16(
290; CHECK-DAG:  ld.param.f32    [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0];
291; CHECK-DAG:  ld.param.f32    [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1];
292; CHECK-DAG:  ld.param.b16    [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2];
293; CHECK-DAG:  ld.param.b16    [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3];
294; CHECK-F16:  setp.neu.f16    [[PRED:%p[0-9]+]], [[C]], [[D]]
295; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
296; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
297; CHECK-NOF16: setp.neu.f32    [[PRED:%p[0-9]+]], [[CF]], [[DF]]
298; CHECK-NEXT: selp.f32        [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]];
299; CHECK-NEXT: st.param.f32    [func_retval0+0], [[R]];
300; CHECK-NEXT: ret;
301define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
302  %cc = fcmp une half %c, %d
303  %r = select i1 %cc, float %a, float %b
304  ret float %r
305}
306
307; CHECK-LABEL: test_select_cc_f16_f32(
308; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0];
309; CHECK-DAG:  ld.param.f32    [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2];
310; CHECK-DAG:  ld.param.f32    [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3];
311; CHECK-DAG:  setp.neu.f32    [[PRED:%p[0-9]+]], [[C]], [[D]]
312; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1];
313; CHECK-NEXT: selp.b16        [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
314; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
315; CHECK-NEXT: ret;
316define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
317  %cc = fcmp une float %c, %d
318  %r = select i1 %cc, half %a, half %b
319  ret half %r
320}
321
322; CHECK-LABEL: test_fcmp_une(
323; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_une_param_0];
324; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_une_param_1];
325; CHECK-F16:  setp.neu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
326; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
327; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
328; CHECK-NOF16: setp.neu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
329; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
330; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
331; CHECK-NEXT: ret;
332define i1 @test_fcmp_une(half %a, half %b) #0 {
333  %r = fcmp une half %a, %b
334  ret i1 %r
335}
336
337; CHECK-LABEL: test_fcmp_ueq(
338; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ueq_param_0];
339; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ueq_param_1];
340; CHECK-F16:  setp.equ.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
341; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
342; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
343; CHECK-NOF16: setp.equ.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
344; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
345; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
346; CHECK-NEXT: ret;
347define i1 @test_fcmp_ueq(half %a, half %b) #0 {
348  %r = fcmp ueq half %a, %b
349  ret i1 %r
350}
351
352; CHECK-LABEL: test_fcmp_ugt(
353; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ugt_param_0];
354; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ugt_param_1];
355; CHECK-F16:  setp.gtu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
356; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
357; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
358; CHECK-NOF16: setp.gtu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
359; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
360; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
361; CHECK-NEXT: ret;
362define i1 @test_fcmp_ugt(half %a, half %b) #0 {
363  %r = fcmp ugt half %a, %b
364  ret i1 %r
365}
366
367; CHECK-LABEL: test_fcmp_uge(
368; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_uge_param_0];
369; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_uge_param_1];
370; CHECK-F16:  setp.geu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
371; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
372; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
373; CHECK-NOF16: setp.geu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
374; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
375; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
376; CHECK-NEXT: ret;
377define i1 @test_fcmp_uge(half %a, half %b) #0 {
378  %r = fcmp uge half %a, %b
379  ret i1 %r
380}
381
382; CHECK-LABEL: test_fcmp_ult(
383; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ult_param_0];
384; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ult_param_1];
385; CHECK-F16:  setp.ltu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
386; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
387; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
388; CHECK-NOF16: setp.ltu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
389; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
390; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
391; CHECK-NEXT: ret;
392define i1 @test_fcmp_ult(half %a, half %b) #0 {
393  %r = fcmp ult half %a, %b
394  ret i1 %r
395}
396
397; CHECK-LABEL: test_fcmp_ule(
398; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ule_param_0];
399; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ule_param_1];
400; CHECK-F16:  setp.leu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
401; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
402; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
403; CHECK-NOF16: setp.leu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
404; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
405; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
406; CHECK-NEXT: ret;
407define i1 @test_fcmp_ule(half %a, half %b) #0 {
408  %r = fcmp ule half %a, %b
409  ret i1 %r
410}
411
412
413; CHECK-LABEL: test_fcmp_uno(
414; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_uno_param_0];
415; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_uno_param_1];
416; CHECK-F16:  setp.nan.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
417; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
418; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
419; CHECK-NOF16: setp.nan.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
420; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
421; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
422; CHECK-NEXT: ret;
423define i1 @test_fcmp_uno(half %a, half %b) #0 {
424  %r = fcmp uno half %a, %b
425  ret i1 %r
426}
427
428; CHECK-LABEL: test_fcmp_one(
429; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_one_param_0];
430; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_one_param_1];
431; CHECK-F16:  setp.ne.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
432; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
433; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
434; CHECK-NOF16: setp.ne.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
435; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
436; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
437; CHECK-NEXT: ret;
438define i1 @test_fcmp_one(half %a, half %b) #0 {
439  %r = fcmp one half %a, %b
440  ret i1 %r
441}
442
443; CHECK-LABEL: test_fcmp_oeq(
444; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_oeq_param_0];
445; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_oeq_param_1];
446; CHECK-F16:  setp.eq.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
447; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
448; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
449; CHECK-NOF16: setp.eq.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
450; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
451; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
452; CHECK-NEXT: ret;
453define i1 @test_fcmp_oeq(half %a, half %b) #0 {
454  %r = fcmp oeq half %a, %b
455  ret i1 %r
456}
457
458; CHECK-LABEL: test_fcmp_ogt(
459; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ogt_param_0];
460; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ogt_param_1];
461; CHECK-F16:  setp.gt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
462; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
463; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
464; CHECK-NOF16: setp.gt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
465; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
466; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
467; CHECK-NEXT: ret;
468define i1 @test_fcmp_ogt(half %a, half %b) #0 {
469  %r = fcmp ogt half %a, %b
470  ret i1 %r
471}
472
473; CHECK-LABEL: test_fcmp_oge(
474; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_oge_param_0];
475; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_oge_param_1];
476; CHECK-F16:  setp.ge.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
477; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
478; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
479; CHECK-NOF16: setp.ge.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
480; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
481; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
482; CHECK-NEXT: ret;
483define i1 @test_fcmp_oge(half %a, half %b) #0 {
484  %r = fcmp oge half %a, %b
485  ret i1 %r
486}
487
488; XCHECK-LABEL: test_fcmp_olt(
489; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_olt_param_0];
490; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_olt_param_1];
491; CHECK-F16:  setp.lt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
492; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
493; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
494; CHECK-NOF16: setp.lt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
495; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
496; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
497; CHECK-NEXT: ret;
498define i1 @test_fcmp_olt(half %a, half %b) #0 {
499  %r = fcmp olt half %a, %b
500  ret i1 %r
501}
502
503; XCHECK-LABEL: test_fcmp_ole(
504; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ole_param_0];
505; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ole_param_1];
506; CHECK-F16:  setp.le.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
507; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
508; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
509; CHECK-NOF16: setp.le.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
510; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
511; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
512; CHECK-NEXT: ret;
513define i1 @test_fcmp_ole(half %a, half %b) #0 {
514  %r = fcmp ole half %a, %b
515  ret i1 %r
516}
517
518; CHECK-LABEL: test_fcmp_ord(
519; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ord_param_0];
520; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ord_param_1];
521; CHECK-F16:  setp.num.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
522; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
523; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
524; CHECK-NOF16: setp.num.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
525; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
526; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
527; CHECK-NEXT: ret;
528define i1 @test_fcmp_ord(half %a, half %b) #0 {
529  %r = fcmp ord half %a, %b
530  ret i1 %r
531}
532
533; CHECK-LABEL: test_br_cc(
534; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_br_cc_param_0];
535; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_br_cc_param_1];
536; CHECK-DAG:  ld.param.u64    %[[C:rd[0-9]+]], [test_br_cc_param_2];
537; CHECK-DAG:  ld.param.u64    %[[D:rd[0-9]+]], [test_br_cc_param_3];
538; CHECK-F16:  setp.lt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
539; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
540; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
541; CHECK-NOF16: setp.lt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
542; CHECK-NEXT: @[[PRED]] bra   [[LABEL:LBB.*]];
543; CHECK:      st.u32  [%[[C]]],
544; CHECK:      [[LABEL]]:
545; CHECK:      st.u32  [%[[D]]],
546; CHECK:      ret;
547define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
548  %c = fcmp uge half %a, %b
549  br i1 %c, label %then, label %else
550then:
551  store i32 0, i32* %p1
552  ret void
553else:
554  store i32 0, i32* %p2
555  ret void
556}
557
558; CHECK-LABEL: test_phi(
559; CHECK:      ld.param.u64    %[[P1:rd[0-9]+]], [test_phi_param_0];
560; CHECK:      ld.b16  {{%h[0-9]+}}, [%[[P1]]];
561; CHECK: [[LOOP:LBB[0-9_]+]]:
562; CHECK:      mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]];
563; CHECK:      ld.b16  [[AB:%h[0-9]+]], [%[[P1]]];
564; CHECK:      {
565; CHECK:      st.param.b64    [param0+0], %[[P1]];
566; CHECK:      call.uni (retval0),
567; CHECK-NEXT: test_dummy
568; CHECK:      }
569; CHECK:      setp.eq.b32     [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
570; CHECK:      @[[PRED]] bra   [[LOOP]];
571; CHECK:      st.param.b16    [func_retval0+0], [[R]];
572; CHECK:      ret;
573define half @test_phi(half* %p1) #0 {
574entry:
575  %a = load half, half* %p1
576  br label %loop
577loop:
578  %r = phi half [%a, %entry], [%b, %loop]
579  %b = load half, half* %p1
580  %c = call i1 @test_dummy(half* %p1)
581  br i1 %c, label %loop, label %return
582return:
583  ret half %r
584}
585declare i1 @test_dummy(half* %p1) #0
586
587; CHECK-LABEL: test_fptosi_i32(
588; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptosi_i32_param_0];
589; CHECK:      cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]];
590; CHECK:      st.param.b32    [func_retval0+0], [[R]];
591; CHECK:      ret;
592define i32 @test_fptosi_i32(half %a) #0 {
593  %r = fptosi half %a to i32
594  ret i32 %r
595}
596
597; CHECK-LABEL: test_fptosi_i64(
598; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptosi_i64_param_0];
599; CHECK:      cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]];
600; CHECK:      st.param.b64    [func_retval0+0], [[R]];
601; CHECK:      ret;
602define i64 @test_fptosi_i64(half %a) #0 {
603  %r = fptosi half %a to i64
604  ret i64 %r
605}
606
607; CHECK-LABEL: test_fptoui_i32(
608; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptoui_i32_param_0];
609; CHECK:      cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]];
610; CHECK:      st.param.b32    [func_retval0+0], [[R]];
611; CHECK:      ret;
612define i32 @test_fptoui_i32(half %a) #0 {
613  %r = fptoui half %a to i32
614  ret i32 %r
615}
616
617; CHECK-LABEL: test_fptoui_i64(
618; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptoui_i64_param_0];
619; CHECK:      cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]];
620; CHECK:      st.param.b64    [func_retval0+0], [[R]];
621; CHECK:      ret;
622define i64 @test_fptoui_i64(half %a) #0 {
623  %r = fptoui half %a to i64
624  ret i64 %r
625}
626
627; CHECK-LABEL: test_uitofp_i32(
628; CHECK:      ld.param.u32    [[A:%r[0-9]+]], [test_uitofp_i32_param_0];
629; CHECK:      cvt.rn.f16.u32  [[R:%h[0-9]+]], [[A]];
630; CHECK:      st.param.b16    [func_retval0+0], [[R]];
631; CHECK:      ret;
632define half @test_uitofp_i32(i32 %a) #0 {
633  %r = uitofp i32 %a to half
634  ret half %r
635}
636
637; CHECK-LABEL: test_uitofp_i64(
638; CHECK:      ld.param.u64    [[A:%rd[0-9]+]], [test_uitofp_i64_param_0];
639; CHECK:      cvt.rn.f16.u64  [[R:%h[0-9]+]], [[A]];
640; CHECK:      st.param.b16    [func_retval0+0], [[R]];
641; CHECK:      ret;
642define half @test_uitofp_i64(i64 %a) #0 {
643  %r = uitofp i64 %a to half
644  ret half %r
645}
646
647; CHECK-LABEL: test_sitofp_i32(
648; CHECK:      ld.param.u32    [[A:%r[0-9]+]], [test_sitofp_i32_param_0];
649; CHECK:      cvt.rn.f16.s32  [[R:%h[0-9]+]], [[A]];
650; CHECK:      st.param.b16    [func_retval0+0], [[R]];
651; CHECK:      ret;
652define half @test_sitofp_i32(i32 %a) #0 {
653  %r = sitofp i32 %a to half
654  ret half %r
655}
656
657; CHECK-LABEL: test_sitofp_i64(
658; CHECK:      ld.param.u64    [[A:%rd[0-9]+]], [test_sitofp_i64_param_0];
659; CHECK:      cvt.rn.f16.s64  [[R:%h[0-9]+]], [[A]];
660; CHECK:      st.param.b16    [func_retval0+0], [[R]];
661; CHECK:      ret;
662define half @test_sitofp_i64(i64 %a) #0 {
663  %r = sitofp i64 %a to half
664  ret half %r
665}
666
667; CHECK-LABEL: test_uitofp_i32_fadd(
668; CHECK-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0];
669; CHECK-DAG:  cvt.rn.f16.u32  [[C:%h[0-9]+]], [[A]];
670; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1];
671; CHECK-F16:       add.rn.f16      [[R:%h[0-9]+]], [[B]], [[C]];
672; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
673; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
674; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], [[C32]];
675; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
676; CHECK:      st.param.b16    [func_retval0+0], [[R]];
677; CHECK:      ret;
678define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
679  %c = uitofp i32 %a to half
680  %r = fadd half %b, %c
681  ret half %r
682}
683
684; CHECK-LABEL: test_sitofp_i32_fadd(
685; CHECK-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0];
686; CHECK-DAG:  cvt.rn.f16.s32  [[C:%h[0-9]+]], [[A]];
687; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1];
688; CHECK-F16:         add.rn.f16     [[R:%h[0-9]+]], [[B]], [[C]];
689; XCHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
690; XCHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
691; XCHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], [[C32]];
692; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
693; CHECK:      st.param.b16    [func_retval0+0], [[R]];
694; CHECK:      ret;
695define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
696  %c = sitofp i32 %a to half
697  %r = fadd half %b, %c
698  ret half %r
699}
700
701; CHECK-LABEL: test_fptrunc_float(
702; CHECK:      ld.param.f32    [[A:%f[0-9]+]], [test_fptrunc_float_param_0];
703; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[A]];
704; CHECK:      st.param.b16    [func_retval0+0], [[R]];
705; CHECK:      ret;
706define half @test_fptrunc_float(float %a) #0 {
707  %r = fptrunc float %a to half
708  ret half %r
709}
710
711; CHECK-LABEL: test_fptrunc_double(
712; CHECK:      ld.param.f64    [[A:%fd[0-9]+]], [test_fptrunc_double_param_0];
713; CHECK:      cvt.rn.f16.f64  [[R:%h[0-9]+]], [[A]];
714; CHECK:      st.param.b16    [func_retval0+0], [[R]];
715; CHECK:      ret;
716define half @test_fptrunc_double(double %a) #0 {
717  %r = fptrunc double %a to half
718  ret half %r
719}
720
721; CHECK-LABEL: test_fpext_float(
722; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fpext_float_param_0];
723; CHECK:      cvt.f32.f16     [[R:%f[0-9]+]], [[A]];
724; CHECK:      st.param.f32    [func_retval0+0], [[R]];
725; CHECK:      ret;
726define float @test_fpext_float(half %a) #0 {
727  %r = fpext half %a to float
728  ret float %r
729}
730
731; CHECK-LABEL: test_fpext_double(
732; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fpext_double_param_0];
733; CHECK:      cvt.f64.f16     [[R:%fd[0-9]+]], [[A]];
734; CHECK:      st.param.f64    [func_retval0+0], [[R]];
735; CHECK:      ret;
736define double @test_fpext_double(half %a) #0 {
737  %r = fpext half %a to double
738  ret double %r
739}
740
741
742; CHECK-LABEL: test_bitcast_halftoi16(
743; CHECK:      ld.param.b16    [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0];
744; CHECK:      mov.b16         [[AS:%rs[0-9]+]], [[AH]]
745; CHECK:      cvt.u32.u16     [[R:%r[0-9]+]], [[AS]]
746; CHECK:      st.param.b32    [func_retval0+0], [[R]];
747; CHECK:      ret;
748define i16 @test_bitcast_halftoi16(half %a) #0 {
749  %r = bitcast half %a to i16
750  ret i16 %r
751}
752
753; CHECK-LABEL: test_bitcast_i16tohalf(
754; CHECK:      ld.param.u16    [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0];
755; CHECK:      mov.b16         [[AH:%h[0-9]+]], [[AS]]
756; CHECK:      st.param.b16    [func_retval0+0], [[AH]];
757; CHECK:      ret;
758define half @test_bitcast_i16tohalf(i16 %a) #0 {
759  %r = bitcast i16 %a to half
760  ret half %r
761}
762
763
764declare half @llvm.sqrt.f16(half %a) #0
765declare half @llvm.powi.f16(half %a, i32 %b) #0
766declare half @llvm.sin.f16(half %a) #0
767declare half @llvm.cos.f16(half %a) #0
768declare half @llvm.pow.f16(half %a, half %b) #0
769declare half @llvm.exp.f16(half %a) #0
770declare half @llvm.exp2.f16(half %a) #0
771declare half @llvm.log.f16(half %a) #0
772declare half @llvm.log10.f16(half %a) #0
773declare half @llvm.log2.f16(half %a) #0
774declare half @llvm.fma.f16(half %a, half %b, half %c) #0
775declare half @llvm.fabs.f16(half %a) #0
776declare half @llvm.minnum.f16(half %a, half %b) #0
777declare half @llvm.maxnum.f16(half %a, half %b) #0
778declare half @llvm.copysign.f16(half %a, half %b) #0
779declare half @llvm.floor.f16(half %a) #0
780declare half @llvm.ceil.f16(half %a) #0
781declare half @llvm.trunc.f16(half %a) #0
782declare half @llvm.rint.f16(half %a) #0
783declare half @llvm.nearbyint.f16(half %a) #0
784declare half @llvm.round.f16(half %a) #0
785declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
786
787; CHECK-LABEL: test_sqrt(
788; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_sqrt_param_0];
789; CHECK:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
790; CHECK:      sqrt.rn.f32     [[RF:%f[0-9]+]], [[AF]];
791; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
792; CHECK:      st.param.b16    [func_retval0+0], [[R]];
793; CHECK:      ret;
794define half @test_sqrt(half %a) #0 {
795  %r = call half @llvm.sqrt.f16(half %a)
796  ret half %r
797}
798
799;;; Can't do this yet: requires libcall.
800; XCHECK-LABEL: test_powi(
801;define half @test_powi(half %a, i32 %b) #0 {
802;  %r = call half @llvm.powi.f16(half %a, i32 %b)
803;  ret half %r
804;}
805
806; CHECK-LABEL: test_sin(
807; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_sin_param_0];
808; CHECK:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
809; CHECK:      sin.approx.f32  [[RF:%f[0-9]+]], [[AF]];
810; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
811; CHECK:      st.param.b16    [func_retval0+0], [[R]];
812; CHECK:      ret;
813define half @test_sin(half %a) #0 #1 {
814  %r = call half @llvm.sin.f16(half %a)
815  ret half %r
816}
817
818; CHECK-LABEL: test_cos(
819; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_cos_param_0];
820; CHECK:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
821; CHECK:      cos.approx.f32  [[RF:%f[0-9]+]], [[AF]];
822; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
823; CHECK:      st.param.b16    [func_retval0+0], [[R]];
824; CHECK:      ret;
825define half @test_cos(half %a) #0 #1 {
826  %r = call half @llvm.cos.f16(half %a)
827  ret half %r
828}
829
830;;; Can't do this yet: requires libcall.
831; XCHECK-LABEL: test_pow(
832;define half @test_pow(half %a, half %b) #0 {
833;  %r = call half @llvm.pow.f16(half %a, half %b)
834;  ret half %r
835;}
836
837;;; Can't do this yet: requires libcall.
838; XCHECK-LABEL: test_exp(
839;define half @test_exp(half %a) #0 {
840;  %r = call half @llvm.exp.f16(half %a)
841;  ret half %r
842;}
843
844;;; Can't do this yet: requires libcall.
845; XCHECK-LABEL: test_exp2(
846;define half @test_exp2(half %a) #0 {
847;  %r = call half @llvm.exp2.f16(half %a)
848;  ret half %r
849;}
850
851;;; Can't do this yet: requires libcall.
852; XCHECK-LABEL: test_log(
853;define half @test_log(half %a) #0 {
854;  %r = call half @llvm.log.f16(half %a)
855;  ret half %r
856;}
857
858;;; Can't do this yet: requires libcall.
859; XCHECK-LABEL: test_log10(
860;define half @test_log10(half %a) #0 {
861;  %r = call half @llvm.log10.f16(half %a)
862;  ret half %r
863;}
864
865;;; Can't do this yet: requires libcall.
866; XCHECK-LABEL: test_log2(
867;define half @test_log2(half %a) #0 {
868;  %r = call half @llvm.log2.f16(half %a)
869;  ret half %r
870;}
871
872; CHECK-LABEL: test_fma(
873; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fma_param_0];
874; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fma_param_1];
875; CHECK-DAG:  ld.param.b16    [[C:%h[0-9]+]], [test_fma_param_2];
876; CHECK-F16:      fma.rn.f16      [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
877; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
878; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
879; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
880; CHECK-NOF16-NEXT: fma.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
881; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
882; CHECK:      st.param.b16    [func_retval0+0], [[R]];
883; CHECK:      ret
884define half @test_fma(half %a, half %b, half %c) #0 {
885  %r = call half @llvm.fma.f16(half %a, half %b, half %c)
886  ret half %r
887}
888
889; CHECK-LABEL: test_fabs(
890; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fabs_param_0];
891; CHECK:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
892; CHECK:      abs.f32         [[RF:%f[0-9]+]], [[AF]];
893; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
894; CHECK:      st.param.b16    [func_retval0+0], [[R]];
895; CHECK:      ret;
896define half @test_fabs(half %a) #0 {
897  %r = call half @llvm.fabs.f16(half %a)
898  ret half %r
899}
900
901; CHECK-LABEL: test_minnum(
902; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_minnum_param_0];
903; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_minnum_param_1];
904; CHECK-DAG:  cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
905; CHECK-DAG:  cvt.f32.f16     [[BF:%f[0-9]+]], [[B]];
906; CHECK:      min.f32         [[RF:%f[0-9]+]], [[AF]], [[BF]];
907; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
908; CHECK:      st.param.b16    [func_retval0+0], [[R]];
909; CHECK:      ret;
910define half @test_minnum(half %a, half %b) #0 {
911  %r = call half @llvm.minnum.f16(half %a, half %b)
912  ret half %r
913}
914
915; CHECK-LABEL: test_maxnum(
916; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_maxnum_param_0];
917; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_maxnum_param_1];
918; CHECK-DAG:  cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
919; CHECK-DAG:  cvt.f32.f16     [[BF:%f[0-9]+]], [[B]];
920; CHECK:      max.f32         [[RF:%f[0-9]+]], [[AF]], [[BF]];
921; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
922; CHECK:      st.param.b16    [func_retval0+0], [[R]];
923; CHECK:      ret;
924define half @test_maxnum(half %a, half %b) #0 {
925  %r = call half @llvm.maxnum.f16(half %a, half %b)
926  ret half %r
927}
928
929; CHECK-LABEL: test_copysign(
930; CHECK-DAG:  ld.param.b16    [[AH:%h[0-9]+]], [test_copysign_param_0];
931; CHECK-DAG:  ld.param.b16    [[BH:%h[0-9]+]], [test_copysign_param_1];
932; CHECK-DAG:  mov.b16         [[AS:%rs[0-9]+]], [[AH]];
933; CHECK-DAG:  mov.b16         [[BS:%rs[0-9]+]], [[BH]];
934; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[AS]], 32767;
935; CHECK-DAG:  and.b16         [[BX:%rs[0-9]+]], [[BS]], -32768;
936; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX]];
937; CHECK:      mov.b16         [[R:%h[0-9]+]], [[RX]];
938; CHECK:      st.param.b16    [func_retval0+0], [[R]];
939; CHECK:      ret;
940define half @test_copysign(half %a, half %b) #0 {
941  %r = call half @llvm.copysign.f16(half %a, half %b)
942  ret half %r
943}
944
945; CHECK-LABEL: test_copysign_f32(
946; CHECK-DAG:  ld.param.b16    [[AH:%h[0-9]+]], [test_copysign_f32_param_0];
947; CHECK-DAG:  ld.param.f32    [[BF:%f[0-9]+]], [test_copysign_f32_param_1];
948; CHECK-DAG:  mov.b16         [[A:%rs[0-9]+]], [[AH]];
949; CHECK-DAG:  mov.b32         [[B:%r[0-9]+]], [[BF]];
950; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[A]], 32767;
951; CHECK-DAG:  and.b32         [[BX0:%r[0-9]+]], [[B]], -2147483648;
952; CHECK-DAG:  shr.u32         [[BX1:%r[0-9]+]], [[BX0]], 16;
953; CHECK-DAG:  cvt.u16.u32     [[BX2:%rs[0-9]+]], [[BX1]];
954; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
955; CHECK:      mov.b16         [[R:%h[0-9]+]], [[RX]];
956; CHECK:      st.param.b16    [func_retval0+0], [[R]];
957; CHECK:      ret;
958define half @test_copysign_f32(half %a, float %b) #0 {
959  %tb = fptrunc float %b to half
960  %r = call half @llvm.copysign.f16(half %a, half %tb)
961  ret half %r
962}
963
964; CHECK-LABEL: test_copysign_f64(
965; CHECK-DAG:  ld.param.b16    [[AH:%h[0-9]+]], [test_copysign_f64_param_0];
966; CHECK-DAG:  ld.param.f64    [[BD:%fd[0-9]+]], [test_copysign_f64_param_1];
967; CHECK-DAG:  mov.b16         [[A:%rs[0-9]+]], [[AH]];
968; CHECK-DAG:  mov.b64         [[B:%rd[0-9]+]], [[BD]];
969; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[A]], 32767;
970; CHECK-DAG:  and.b64         [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808;
971; CHECK-DAG:  shr.u64         [[BX1:%rd[0-9]+]], [[BX0]], 48;
972; CHECK-DAG:  cvt.u16.u64     [[BX2:%rs[0-9]+]], [[BX1]];
973; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
974; CHECK:      mov.b16         [[R:%h[0-9]+]], [[RX]];
975; CHECK:      st.param.b16    [func_retval0+0], [[R]];
976; CHECK:      ret;
977define half @test_copysign_f64(half %a, double %b) #0 {
978  %tb = fptrunc double %b to half
979  %r = call half @llvm.copysign.f16(half %a, half %tb)
980  ret half %r
981}
982
983; CHECK-LABEL: test_copysign_extended(
984; CHECK-DAG:  ld.param.b16    [[AH:%h[0-9]+]], [test_copysign_extended_param_0];
985; CHECK-DAG:  ld.param.b16    [[BH:%h[0-9]+]], [test_copysign_extended_param_1];
986; CHECK-DAG:  mov.b16         [[AS:%rs[0-9]+]], [[AH]];
987; CHECK-DAG:  mov.b16         [[BS:%rs[0-9]+]], [[BH]];
988; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[AS]], 32767;
989; CHECK-DAG:  and.b16         [[BX:%rs[0-9]+]], [[BS]], -32768;
990; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX]];
991; CHECK:      mov.b16         [[R:%h[0-9]+]], [[RX]];
992; CHECK:      cvt.f32.f16     [[XR:%f[0-9]+]], [[R]];
993; CHECK:      st.param.f32    [func_retval0+0], [[XR]];
994; CHECK:      ret;
995define float @test_copysign_extended(half %a, half %b) #0 {
996  %r = call half @llvm.copysign.f16(half %a, half %b)
997  %xr = fpext half %r to float
998  ret float %xr
999}
1000
1001; CHECK-LABEL: test_floor(
1002; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_floor_param_0];
1003; CHECK:      cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]];
1004; CHECK:      st.param.b16    [func_retval0+0], [[R]];
1005; CHECK:      ret;
1006define half @test_floor(half %a) #0 {
1007  %r = call half @llvm.floor.f16(half %a)
1008  ret half %r
1009}
1010
1011; CHECK-LABEL: test_ceil(
1012; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_ceil_param_0];
1013; CHECK:      cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]];
1014; CHECK:      st.param.b16    [func_retval0+0], [[R]];
1015; CHECK:      ret;
1016define half @test_ceil(half %a) #0 {
1017  %r = call half @llvm.ceil.f16(half %a)
1018  ret half %r
1019}
1020
1021; CHECK-LABEL: test_trunc(
1022; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_trunc_param_0];
1023; CHECK:      cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]];
1024; CHECK:      st.param.b16    [func_retval0+0], [[R]];
1025; CHECK:      ret;
1026define half @test_trunc(half %a) #0 {
1027  %r = call half @llvm.trunc.f16(half %a)
1028  ret half %r
1029}
1030
1031; CHECK-LABEL: test_rint(
1032; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_rint_param_0];
1033; CHECK:      cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1034; CHECK:      st.param.b16    [func_retval0+0], [[R]];
1035; CHECK:      ret;
1036define half @test_rint(half %a) #0 {
1037  %r = call half @llvm.rint.f16(half %a)
1038  ret half %r
1039}
1040
1041; CHECK-LABEL: test_nearbyint(
1042; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_nearbyint_param_0];
1043; CHECK:      cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1044; CHECK:      st.param.b16    [func_retval0+0], [[R]];
1045; CHECK:      ret;
1046define half @test_nearbyint(half %a) #0 {
1047  %r = call half @llvm.nearbyint.f16(half %a)
1048  ret half %r
1049}
1050
1051; CHECK-LABEL: test_round(
1052; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_round_param_0];
1053; CHECK:      cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
1054; CHECK:      st.param.b16    [func_retval0+0], [[R]];
1055; CHECK:      ret;
1056define half @test_round(half %a) #0 {
1057  %r = call half @llvm.round.f16(half %a)
1058  ret half %r
1059}
1060
1061; CHECK-LABEL: test_fmuladd(
1062; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fmuladd_param_0];
1063; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fmuladd_param_1];
1064; CHECK-DAG:  ld.param.b16    [[C:%h[0-9]+]], [test_fmuladd_param_2];
1065; CHECK-F16:        fma.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
1066; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
1067; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
1068; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
1069; CHECK-NOF16-NEXT: fma.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
1070; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
1071; CHECK:      st.param.b16    [func_retval0+0], [[R]];
1072; CHECK:      ret;
1073define half @test_fmuladd(half %a, half %b, half %c) #0 {
1074  %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
1075  ret half %r
1076}
1077
1078attributes #0 = { nounwind }
1079attributes #1 = { "unsafe-fp-math" = "true" }
1080