1; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2
3declare half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c)
4
5; GCN-LABEL: {{^}}div_fixup_f16
6; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
8; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
9; VI:  v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
10; GCN: buffer_store_short v[[R_F16]]
11; GCN: s_endpgm
12define amdgpu_kernel void @div_fixup_f16(
13    half addrspace(1)* %r,
14    half addrspace(1)* %a,
15    half addrspace(1)* %b,
16    half addrspace(1)* %c) {
17entry:
18  %a.val = load volatile half, half addrspace(1)* %a
19  %b.val = load volatile half, half addrspace(1)* %b
20  %c.val = load volatile half, half addrspace(1)* %c
21  %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half %c.val)
22  store half %r.val, half addrspace(1)* %r
23  ret void
24}
25
26; GCN-LABEL: {{^}}div_fixup_f16_imm_a
27; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
28; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
29; VI:  v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x4200{{$}}
30; VI:  v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
31; GCN: buffer_store_short v[[R_F16]]
32; GCN: s_endpgm
33define amdgpu_kernel void @div_fixup_f16_imm_a(
34    half addrspace(1)* %r,
35    half addrspace(1)* %b,
36    half addrspace(1)* %c) {
37entry:
38  %b.val = load volatile half, half addrspace(1)* %b
39  %c.val = load volatile half, half addrspace(1)* %c
40  %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half %c.val)
41  store half %r.val, half addrspace(1)* %r
42  ret void
43}
44
45; GCN-LABEL: {{^}}div_fixup_f16_imm_b
46; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
47; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
48; VI:  v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x4200{{$}}
49; VI:  v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
50; GCN: buffer_store_short v[[R_F16]]
51; GCN: s_endpgm
52define amdgpu_kernel void @div_fixup_f16_imm_b(
53    half addrspace(1)* %r,
54    half addrspace(1)* %a,
55    half addrspace(1)* %c) {
56entry:
57  %a.val = load volatile half, half addrspace(1)* %a
58  %c.val = load volatile half, half addrspace(1)* %c
59  %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half %c.val)
60  store half %r.val, half addrspace(1)* %r
61  ret void
62}
63
64; GCN-LABEL: {{^}}div_fixup_f16_imm_c
65; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
66; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
67; VI:  v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x4200{{$}}
68; VI:  v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
69; GCN: buffer_store_short v[[R_F16]]
70; GCN: s_endpgm
71define amdgpu_kernel void @div_fixup_f16_imm_c(
72    half addrspace(1)* %r,
73    half addrspace(1)* %a,
74    half addrspace(1)* %b) {
75entry:
76  %a.val = load volatile half, half addrspace(1)* %a
77  %b.val = load volatile half, half addrspace(1)* %b
78  %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half 3.0)
79  store half %r.val, half addrspace(1)* %r
80  ret void
81}
82
83; GCN-LABEL: {{^}}div_fixup_f16_imm_a_imm_b
84; VI-DAG:  v_mov_b32_e32 v[[AB_F16:[0-9]+]], 0x4200{{$}}
85; GCN-DAG: buffer_load_ushort v[[C_F16:[0-9]+]]
86; VI:  v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[AB_F16]], v[[AB_F16]], v[[C_F16]]
87; GCN: buffer_store_short v[[R_F16]]
88; GCN: s_endpgm
89define amdgpu_kernel void @div_fixup_f16_imm_a_imm_b(
90    half addrspace(1)* %r,
91    half addrspace(1)* %c) {
92entry:
93  %c.val = load volatile half, half addrspace(1)* %c
94  %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half 3.0, half %c.val)
95  store half %r.val, half addrspace(1)* %r
96  ret void
97}
98
99; GCN-LABEL: {{^}}div_fixup_f16_imm_b_imm_c
100; VI-DAG:  v_mov_b32_e32 v[[BC_F16:[0-9]+]], 0x4200{{$}}
101; GCN-DAG: buffer_load_ushort v[[A_F16:[0-9]+]]
102; VI:  v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[BC_F16]], v[[BC_F16]]
103; GCN: buffer_store_short v[[R_F16]]
104; GCN: s_endpgm
105define amdgpu_kernel void @div_fixup_f16_imm_b_imm_c(
106    half addrspace(1)* %r,
107    half addrspace(1)* %a) {
108entry:
109  %a.val = load half, half addrspace(1)* %a
110  %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half 3.0)
111  store half %r.val, half addrspace(1)* %r
112  ret void
113}
114
115; GCN-LABEL: {{^}}div_fixup_f16_imm_a_imm_c
116; VI-DAG:  v_mov_b32_e32 v[[AC_F16:[0-9]+]], 0x4200{{$}}
117; GCN-DAG: buffer_load_ushort v[[B_F16:[0-9]+]]
118; VI:  v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[AC_F16]], v[[B_F16]], v[[AC_F16]]
119; GCN: buffer_store_short v[[R_F16]]
120; GCN: s_endpgm
121define amdgpu_kernel void @div_fixup_f16_imm_a_imm_c(
122    half addrspace(1)* %r,
123    half addrspace(1)* %b) {
124entry:
125  %b.val = load half, half addrspace(1)* %b
126  %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half 3.0)
127  store half %r.val, half addrspace(1)* %r
128  ret void
129}
130