1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
3
4declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
5
6; GCN-LABEL: {{^}}sint_to_fp_i32_to_f64
7; GCN: v_cvt_f64_i32_e32
8define amdgpu_kernel void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
9  %result = sitofp i32 %in to double
10  store double %result, double addrspace(1)* %out
11  ret void
12}
13
14; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
15; uses an SGPR (implicit vcc).
16
17; GCN-LABEL: {{^}}sint_to_fp_i1_f64:
18; VI-DAG: s_cmp_eq_u32
19; VI-DAG: s_cselect_b32 s[[SSEL:[0-9]+]], 0xbff00000, 0
20; VI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
21; VI-DAG: v_mov_b32_e32 v[[SEL:[0-9]+]], s[[SSEL]]
22; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
23; VI: s_endpgm
24
25; SI-DAG: v_cmp_eq_u32_e64 vcc,
26; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}}
27; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
28; SI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[ZERO]]:[[SEL]]{{\]}}
29; SI: s_endpgm
30define amdgpu_kernel void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
31  %cmp = icmp eq i32 %in, 0
32  %fp = sitofp i1 %cmp to double
33  store double %fp, double addrspace(1)* %out, align 4
34  ret void
35}
36
37; GCN-LABEL: {{^}}sint_to_fp_i1_f64_load:
38; GCN: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, -1
39; GCN: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
40; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
41; GCN: s_endpgm
42define amdgpu_kernel void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
43  %fp = sitofp i1 %in to double
44  store double %fp, double addrspace(1)* %out, align 8
45  ret void
46}
47
48; GCN-LABEL: @s_sint_to_fp_i64_to_f64
49define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
50  %result = sitofp i64 %in to double
51  store double %result, double addrspace(1)* %out
52  ret void
53}
54
55; GCN-LABEL: @v_sint_to_fp_i64_to_f64
56; GCN: flat_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
57; GCN-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
58; GCN-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
59; GCN-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
60; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
61; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
62define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
63  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
64  %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
65  %val = load i64, i64 addrspace(1)* %gep, align 8
66  %result = sitofp i64 %val to double
67  store double %result, double addrspace(1)* %out
68  ret void
69}
70
71; FIXME: bfe and sext on VI+
72; GCN-LABEL: {{^}}s_sint_to_fp_i8_to_f64:
73; GCN: s_load_dword [[VAL:s[0-9]+]]
74; SI-NOT: bfe
75; SI: s_sext_i32_i8 [[SEXT:s[0-9]+]], [[VAL]]
76
77; VI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x80000
78; VI: s_sext_i32_i16 [[SEXT:s[0-9]+]], [[BFE]]
79
80; GCN: v_cvt_f64_i32_e32 v{{\[[0-9]+:[0-9]+\]}}, [[SEXT]]
81define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(double addrspace(1)* %out, i8 %in) {
82  %fp = sitofp i8 %in to double
83  store double %fp, double addrspace(1)* %out
84  ret void
85}
86
87; GCN-LABEL: {{^}}v_sint_to_fp_i8_to_f64:
88; GCN: v_bfe_i32 [[SEXT:v[0-9]+]]
89; GCN: v_cvt_f64_i32_e32 v{{\[[0-9]+:[0-9]+\]}}, [[SEXT]]
90define double @v_sint_to_fp_i8_to_f64(i8 %in) {
91  %fp = sitofp i8 %in to double
92  ret double %fp
93}
94