1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}fold_mi_v_and_0: 5; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 6; GCN-NOT: [[RESULT]] 7; GCN: buffer_store_dword [[RESULT]] 8define amdgpu_kernel void @fold_mi_v_and_0(i32 addrspace(1)* %out) { 9 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 10 %size = call i32 @llvm.amdgcn.groupstaticsize() 11 %and = and i32 %size, %x 12 store i32 %and, i32 addrspace(1)* %out 13 ret void 14} 15 16; GCN-LABEL: {{^}}fold_mi_s_and_0: 17; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 18; GCN-NOT: [[RESULT]] 19; GCN: buffer_store_dword [[RESULT]] 20define amdgpu_kernel void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 { 21 %size = call i32 @llvm.amdgcn.groupstaticsize() 22 %and = and i32 %size, %x 23 store i32 %and, i32 addrspace(1)* %out 24 ret void 25} 26 27; GCN-LABEL: {{^}}fold_mi_v_or_0: 28; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]] 29; GCN-NOT: [[RESULT]] 30; GCN: buffer_store_dword [[RESULT]] 31define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) { 32 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 33 %size = call i32 @llvm.amdgcn.groupstaticsize() 34 %or = or i32 %size, %x 35 store i32 %or, i32 addrspace(1)* %out 36 ret void 37} 38 39; GCN-LABEL: {{^}}fold_mi_s_or_0: 40; GCN: s_load_dword [[SVAL:s[0-9]+]] 41; GCN-NOT: [[SVAL]] 42; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]] 43; GCN-NOT: [[VVAL]] 44; GCN: buffer_store_dword [[VVAL]] 45define amdgpu_kernel void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 { 46 %size = call i32 @llvm.amdgcn.groupstaticsize() 47 %or = or i32 %size, %x 48 store i32 %or, i32 addrspace(1)* %out 49 ret void 50} 51 52; GCN-LABEL: {{^}}fold_mi_v_xor_0: 53; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]] 54; GCN-NOT: [[RESULT]] 55; GCN: buffer_store_dword [[RESULT]] 56define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) { 57 %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 58 %size = call i32 @llvm.amdgcn.groupstaticsize() 59 %xor = xor i32 %size, %x 60 store i32 %xor, i32 addrspace(1)* %out 61 ret void 62} 63 64; GCN-LABEL: {{^}}fold_mi_s_xor_0: 65; GCN: s_load_dword [[SVAL:s[0-9]+]] 66; GCN-NOT: [[SVAL]] 67; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]] 68; GCN-NOT: [[VVAL]] 69; GCN: buffer_store_dword [[VVAL]] 70define amdgpu_kernel void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 { 71 %size = call i32 @llvm.amdgcn.groupstaticsize() 72 %xor = xor i32 %size, %x 73 store i32 %xor, i32 addrspace(1)* %out 74 ret void 75} 76 77; GCN-LABEL: {{^}}fold_mi_s_not_0: 78; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], -1{{$}} 79; GCN-NOT: [[RESULT]] 80; GCN: buffer_store_dword [[RESULT]] 81define amdgpu_kernel void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 { 82 %size = call i32 @llvm.amdgcn.groupstaticsize() 83 %xor = xor i32 %size, -1 84 store i32 %xor, i32 addrspace(1)* %out 85 ret void 86} 87 88; GCN-LABEL: {{^}}fold_mi_v_not_0: 89; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} 90; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} 91; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]] 92; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}} 93; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 94define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) { 95 %vreg = load volatile i64, i64 addrspace(1)* undef 96 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg) 97 %xor = xor i64 %ctpop, -1 98 store i64 %xor, i64 addrspace(1)* %out 99 ret void 100} 101 102; The neg1 appears after folding the not 0 103; GCN-LABEL: {{^}}fold_mi_or_neg1: 104; GCN: buffer_load_dwordx2 105; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}} 106 107; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} 108; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} 109; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]] 110; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]], v[[VREG1_LO]] 111; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]] 112; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} 113define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) { 114 %vreg0 = load volatile i64, i64 addrspace(1)* undef 115 %vreg1 = load volatile i64, i64 addrspace(1)* undef 116 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0) 117 %xor = xor i64 %ctpop, -1 118 %or = or i64 %xor, %vreg1 119 store i64 %or, i64 addrspace(1)* %out 120 ret void 121} 122 123; GCN-LABEL: {{^}}fold_mi_and_neg1: 124; GCN: v_bcnt_u32_b32 125; GCN: v_bcnt_u32_b32 126; GCN: v_not_b32 127; GCN: v_and_b32 128; GCN-NOT: v_and_b32 129define amdgpu_kernel void @fold_mi_and_neg1(i64 addrspace(1)* %out) { 130 %vreg0 = load volatile i64, i64 addrspace(1)* undef 131 %vreg1 = load volatile i64, i64 addrspace(1)* undef 132 %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0) 133 %xor = xor i64 %ctpop, -1 134 %and = and i64 %xor, %vreg1 135 store i64 %and, i64 addrspace(1)* %out 136 ret void 137} 138 139declare i64 @llvm.ctpop.i64(i64) #1 140declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 141declare i32 @llvm.amdgcn.groupstaticsize() #1 142 143attributes #0 = { nounwind } 144attributes #1 = { nounwind readnone } 145