1; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 2 3declare half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c) 4 5; GCN-LABEL: {{^}}div_fixup_f16 6; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 7; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 8; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] 9; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] 10; GCN: buffer_store_short v[[R_F16]] 11; GCN: s_endpgm 12define amdgpu_kernel void @div_fixup_f16( 13 half addrspace(1)* %r, 14 half addrspace(1)* %a, 15 half addrspace(1)* %b, 16 half addrspace(1)* %c) { 17entry: 18 %a.val = load volatile half, half addrspace(1)* %a 19 %b.val = load volatile half, half addrspace(1)* %b 20 %c.val = load volatile half, half addrspace(1)* %c 21 %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half %c.val) 22 store half %r.val, half addrspace(1)* %r 23 ret void 24} 25 26; GCN-LABEL: {{^}}div_fixup_f16_imm_a 27; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 28; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] 29; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x4200{{$}} 30; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] 31; GCN: buffer_store_short v[[R_F16]] 32; GCN: s_endpgm 33define amdgpu_kernel void @div_fixup_f16_imm_a( 34 half addrspace(1)* %r, 35 half addrspace(1)* %b, 36 half addrspace(1)* %c) { 37entry: 38 %b.val = load volatile half, half addrspace(1)* %b 39 %c.val = load volatile half, half addrspace(1)* %c 40 %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half %c.val) 41 store half %r.val, half addrspace(1)* %r 42 ret void 43} 44 45; GCN-LABEL: {{^}}div_fixup_f16_imm_b 46; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 47; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] 48; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x4200{{$}} 49; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] 50; GCN: buffer_store_short v[[R_F16]] 51; GCN: s_endpgm 52define amdgpu_kernel void @div_fixup_f16_imm_b( 53 half addrspace(1)* %r, 54 half addrspace(1)* %a, 55 half addrspace(1)* %c) { 56entry: 57 %a.val = load volatile half, half addrspace(1)* %a 58 %c.val = load volatile half, half addrspace(1)* %c 59 %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half %c.val) 60 store half %r.val, half addrspace(1)* %r 61 ret void 62} 63 64; GCN-LABEL: {{^}}div_fixup_f16_imm_c 65; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] 66; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] 67; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x4200{{$}} 68; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] 69; GCN: buffer_store_short v[[R_F16]] 70; GCN: s_endpgm 71define amdgpu_kernel void @div_fixup_f16_imm_c( 72 half addrspace(1)* %r, 73 half addrspace(1)* %a, 74 half addrspace(1)* %b) { 75entry: 76 %a.val = load volatile half, half addrspace(1)* %a 77 %b.val = load volatile half, half addrspace(1)* %b 78 %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half 3.0) 79 store half %r.val, half addrspace(1)* %r 80 ret void 81} 82 83; GCN-LABEL: {{^}}div_fixup_f16_imm_a_imm_b 84; VI-DAG: v_mov_b32_e32 v[[AB_F16:[0-9]+]], 0x4200{{$}} 85; GCN-DAG: buffer_load_ushort v[[C_F16:[0-9]+]] 86; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[AB_F16]], v[[AB_F16]], v[[C_F16]] 87; GCN: buffer_store_short v[[R_F16]] 88; GCN: s_endpgm 89define amdgpu_kernel void @div_fixup_f16_imm_a_imm_b( 90 half addrspace(1)* %r, 91 half addrspace(1)* %c) { 92entry: 93 %c.val = load volatile half, half addrspace(1)* %c 94 %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half 3.0, half %c.val) 95 store half %r.val, half addrspace(1)* %r 96 ret void 97} 98 99; GCN-LABEL: {{^}}div_fixup_f16_imm_b_imm_c 100; VI-DAG: v_mov_b32_e32 v[[BC_F16:[0-9]+]], 0x4200{{$}} 101; GCN-DAG: buffer_load_ushort v[[A_F16:[0-9]+]] 102; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[BC_F16]], v[[BC_F16]] 103; GCN: buffer_store_short v[[R_F16]] 104; GCN: s_endpgm 105define amdgpu_kernel void @div_fixup_f16_imm_b_imm_c( 106 half addrspace(1)* %r, 107 half addrspace(1)* %a) { 108entry: 109 %a.val = load half, half addrspace(1)* %a 110 %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half 3.0) 111 store half %r.val, half addrspace(1)* %r 112 ret void 113} 114 115; GCN-LABEL: {{^}}div_fixup_f16_imm_a_imm_c 116; VI-DAG: v_mov_b32_e32 v[[AC_F16:[0-9]+]], 0x4200{{$}} 117; GCN-DAG: buffer_load_ushort v[[B_F16:[0-9]+]] 118; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[AC_F16]], v[[B_F16]], v[[AC_F16]] 119; GCN: buffer_store_short v[[R_F16]] 120; GCN: s_endpgm 121define amdgpu_kernel void @div_fixup_f16_imm_a_imm_c( 122 half addrspace(1)* %r, 123 half addrspace(1)* %b) { 124entry: 125 %b.val = load half, half addrspace(1)* %b 126 %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half 3.0) 127 store half %r.val, half addrspace(1)* %r 128 ret void 129} 130