1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
4
5; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32:
6; GCN-DAG: s_load_dwordx2 s{{\[}}[[SX:[0-9]+]]:[[SY:[0-9]+]]{{\]}}, s[0:1], 0x{{b|2c}}
7; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[SY]]
8; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, s[[SX]], [[VY]]
9; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, s[[SX]], [[VY]]
10define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {
11  %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
12  store <2 x half> %result, <2 x half> addrspace(1)* %out
13  ret void
14}
15
16; GCN-LABEL: {{^}}s_cvt_pkrtz_samereg_v2f16_f32:
17; GCN: s_load_dword [[X:s[0-9]+]]
18; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
19define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 {
20  %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x)
21  store <2 x half> %result, <2 x half> addrspace(1)* %out
22  ret void
23}
24
25; GCN-LABEL: {{^}}s_cvt_pkrtz_undef_undef:
26; GCN-NEXT: ; %bb.0
27; GCN-NEXT: s_endpgm
28define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 {
29  %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
30  store <2 x half> %result, <2 x half> addrspace(1)* %out
31  ret void
32}
33
34; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32:
35; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
36; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
37; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
38; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[A]], [[B]]
39define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
40  %tid = call i32 @llvm.amdgcn.workitem.id.x()
41  %tid.ext = sext i32 %tid to i64
42  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
43  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
44  %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
45  %a = load volatile float, float addrspace(1)* %a.gep
46  %b = load volatile float, float addrspace(1)* %b.gep
47  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b)
48  store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
49  ret void
50}
51
52; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_reg_imm:
53; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
54; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
55define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
56  %tid = call i32 @llvm.amdgcn.workitem.id.x()
57  %tid.ext = sext i32 %tid to i64
58  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
59  %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
60  %a = load volatile float, float addrspace(1)* %a.gep
61  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float 1.0)
62  store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
63  ret void
64}
65
66; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg:
67; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
68; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
69; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, 1.0, [[A]]
70define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
71  %tid = call i32 @llvm.amdgcn.workitem.id.x()
72  %tid.ext = sext i32 %tid to i64
73  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
74  %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
75  %a = load volatile float, float addrspace(1)* %a.gep
76  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 1.0, float %a)
77  store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
78  ret void
79}
80
81; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo:
82; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
83; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
84; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
85define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
86  %tid = call i32 @llvm.amdgcn.workitem.id.x()
87  %tid.ext = sext i32 %tid to i64
88  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
89  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
90  %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
91  %a = load volatile float, float addrspace(1)* %a.gep
92  %b = load volatile float, float addrspace(1)* %b.gep
93  %neg.a = fsub float -0.0, %a
94  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %b)
95  store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
96  ret void
97}
98
99; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_hi:
100; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
101; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
102; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
103define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
104  %tid = call i32 @llvm.amdgcn.workitem.id.x()
105  %tid.ext = sext i32 %tid to i64
106  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
107  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
108  %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
109  %a = load volatile float, float addrspace(1)* %a.gep
110  %b = load volatile float, float addrspace(1)* %b.gep
111  %neg.b = fsub float -0.0, %b
112  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %neg.b)
113  store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
114  ret void
115}
116
117; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
118; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
119; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
120; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
121define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
122  %tid = call i32 @llvm.amdgcn.workitem.id.x()
123  %tid.ext = sext i32 %tid to i64
124  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
125  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
126  %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
127  %a = load volatile float, float addrspace(1)* %a.gep
128  %b = load volatile float, float addrspace(1)* %b.gep
129  %neg.a = fsub float -0.0, %a
130  %neg.b = fsub float -0.0, %b
131  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %neg.b)
132  store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
133  ret void
134}
135
136; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
137; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
138; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
139; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
140define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
141  %tid = call i32 @llvm.amdgcn.workitem.id.x()
142  %tid.ext = sext i32 %tid to i64
143  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
144  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
145  %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
146  %a = load volatile float, float addrspace(1)* %a.gep
147  %b = load volatile float, float addrspace(1)* %b.gep
148  %fabs.a = call float @llvm.fabs.f32(float %a)
149  %neg.fabs.a = fsub float -0.0, %fabs.a
150  %neg.b = fsub float -0.0, %b
151  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.fabs.a, float %neg.b)
152  store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
153  ret void
154}
155
156declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
157declare float @llvm.fabs.f32(float) #1
158declare i32 @llvm.amdgcn.workitem.id.x() #1
159
160
161attributes #0 = { nounwind }
162attributes #1 = { nounwind readnone }
163