1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s 5 6declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 7declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 8declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 9 10declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 11declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 12declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 13 14declare i32 @llvm.amdgcn.workitem.id.x() #1 15 16; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32: 17; CIVI-DAG: s_mov_b32 m0 18; GFX9-NOT: m0 19 20; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 21; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 22; MIR-LABEL: @lds_atomic_inc_ret_i32 23; MIR: DS_INC_RTN_U32 {{.*}} :: (load store 4 on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3) 24define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 25 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0 26 store i32 %result, i32 addrspace(1)* %out 27 ret void 28} 29 30!0 = distinct !{!0, !"noalias-scope"} 31 32; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: 33; CIVI-DAG: s_mov_b32 m0 34; GFX9-NOT: m0 35 36; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 37; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 38define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 39 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 40 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) 41 store i32 %result, i32 addrspace(1)* %out 42 ret void 43} 44 45; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32: 46; CIVI-DAG: s_mov_b32 m0 47; GFX9-NOT: m0 48 49; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 50; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 51; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 52; GCN: ds_inc_u32 [[VPTR]], [[DATA]] 53define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { 54 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 55 ret void 56} 57 58; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset: 59; CIVI-DAG: s_mov_b32 m0 60; GFX9-NOT: m0 61 62; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 63; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16 64define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 65 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 66 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) 67 ret void 68} 69 70; GCN-LABEL: {{^}}global_atomic_inc_ret_i32: 71; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 72; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 73; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 74define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 75 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) 76 store i32 %result, i32 addrspace(1)* %out 77 ret void 78} 79 80; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset: 81; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 82; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} 83; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} 84define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 85 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 86 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 87 store i32 %result, i32 addrspace(1)* %out 88 ret void 89} 90 91; GCN-LABEL: {{^}}global_atomic_inc_noret_i32: 92; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 93; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 94; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]$}} 95define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind { 96 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) 97 ret void 98} 99 100; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset: 101; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 102; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} 103; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} 104define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { 105 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 106 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 107 ret void 108} 109 110; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64: 111; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 112; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}} 113; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 114define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 115 %id = call i32 @llvm.amdgcn.workitem.id.x() 116 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 117 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id 118 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 119 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 120 store i32 %result, i32 addrspace(1)* %out.gep 121 ret void 122} 123 124; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64: 125; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 126; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} 127; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 128define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { 129 %id = call i32 @llvm.amdgcn.workitem.id.x() 130 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 131 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 132 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 133 ret void 134} 135 136@lds0 = addrspace(3) global [512 x i32] undef, align 4 137 138; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32: 139; CIVI-DAG: v_lshlrev_b32_e32 [[OFS:v[0-9]+]], 2, {{v[0-9]+}} 140; CIVI-DAG: v_add_{{[ui]}}32_e32 [[PTR:v[0-9]+]], vcc, lds0@abs32@lo, [[OFS]] 141; GFX9-DAG: s_mov_b32 [[BASE:s[0-9]+]], lds0@abs32@lo 142; GFX9-DAG: v_lshl_add_u32 [[PTR:v[0-9]+]], {{v[0-9]+}}, 2, [[BASE]] 143; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 144define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 145 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 146 %idx.0 = add nsw i32 %tid.x, 2 147 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 148 %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false) 149 store i32 %idx.0, i32 addrspace(1)* %add_use 150 store i32 %val0, i32 addrspace(1)* %out 151 ret void 152} 153 154; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64: 155; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 156; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 157; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 158define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 159 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) 160 store i64 %result, i64 addrspace(1)* %out 161 ret void 162} 163 164; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset: 165; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 166; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 167; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 168define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 169 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 170 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) 171 store i64 %result, i64 addrspace(1)* %out 172 ret void 173} 174 175; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64: 176; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 177; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 178; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 179define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind { 180 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) 181 ret void 182} 183 184; GCN-LABEL: {{^}}lds_atomic_inc_noret_i64_offset: 185; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 186; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 187; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 188define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 189 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 190 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) 191 ret void 192} 193 194; GCN-LABEL: {{^}}global_atomic_inc_ret_i64: 195; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 196; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 197; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 198; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 199; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 200define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 201 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) 202 store i64 %result, i64 addrspace(1)* %out 203 ret void 204} 205 206; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset: 207; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 208; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 209; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 210; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} 211; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 212define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 213 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 214 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 215 store i64 %result, i64 addrspace(1)* %out 216 ret void 217} 218 219; GCN-LABEL: {{^}}global_atomic_inc_noret_i64: 220; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 221; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 222; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 223; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 224 225; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]$}} 226define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind { 227 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) 228 ret void 229} 230 231; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset: 232; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 233; GFX9-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} 234; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 235; CIVI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} 236; GFX9: global_atomic_inc_x2 v[[ZERO]], v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 237define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { 238 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 239 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 240 ret void 241} 242 243; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64: 244; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 245; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 246; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 247; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} 248; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 249define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 250 %id = call i32 @llvm.amdgcn.workitem.id.x() 251 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 252 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id 253 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 254 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 255 store i64 %result, i64 addrspace(1)* %out.gep 256 ret void 257} 258 259; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64: 260; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 261; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 262; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 263; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} 264; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 265define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { 266 %id = call i32 @llvm.amdgcn.workitem.id.x() 267 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 268 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 269 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 270 ret void 271} 272 273; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32: 274; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 275; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 276define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { 277 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) 278 store i32 %result, i32* %out 279 ret void 280} 281 282; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset: 283; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 284; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 285; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}} 286define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 { 287 %gep = getelementptr i32, i32* %ptr, i32 4 288 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 289 store i32 %result, i32* %out 290 ret void 291} 292 293; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32: 294; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 295; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 296define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { 297 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) 298 ret void 299} 300 301; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset: 302; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 303; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 304; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}} 305define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind { 306 %gep = getelementptr i32, i32* %ptr, i32 4 307 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 308 ret void 309} 310 311; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64: 312; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 313; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 314; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}} 315define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { 316 %id = call i32 @llvm.amdgcn.workitem.id.x() 317 %gep.tid = getelementptr i32, i32* %ptr, i32 %id 318 %out.gep = getelementptr i32, i32* %out, i32 %id 319 %gep = getelementptr i32, i32* %gep.tid, i32 5 320 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 321 store i32 %result, i32* %out.gep 322 ret void 323} 324 325; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64: 326; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 327; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 328; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}} 329define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 { 330 %id = call i32 @llvm.amdgcn.workitem.id.x() 331 %gep.tid = getelementptr i32, i32* %ptr, i32 %id 332 %gep = getelementptr i32, i32* %gep.tid, i32 5 333 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 334 ret void 335} 336 337@lds1 = addrspace(3) global [512 x i64] undef, align 8 338 339; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64: 340; CIVI-DAG: v_lshlrev_b32_e32 [[OFS:v[0-9]+]], 3, {{v[0-9]+}} 341; CIVI-DAG: v_add_{{[ui]}}32_e32 [[PTR:v[0-9]+]], vcc, lds1@abs32@lo, [[OFS]] 342; GFX9-DAG: v_mov_b32_e32 [[BASE:v[0-9]+]], lds1@abs32@lo 343; GFX9-DAG: v_lshl_add_u32 [[PTR:v[0-9]+]], {{v[0-9]+}}, 3, [[BASE]] 344; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16 345define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 346 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 347 %idx.0 = add nsw i32 %tid.x, 2 348 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 349 %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false) 350 store i32 %idx.0, i32 addrspace(1)* %add_use 351 store i64 %val0, i64 addrspace(1)* %out 352 ret void 353} 354 355; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64: 356; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 357; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 358; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 359define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { 360 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) 361 store i64 %result, i64* %out 362 ret void 363} 364 365; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset: 366; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 367; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 368; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 369; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}} 370define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 { 371 %gep = getelementptr i64, i64* %ptr, i32 4 372 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 373 store i64 %result, i64* %out 374 ret void 375} 376 377; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64: 378; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 379; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 380; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 381define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { 382 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) 383 ret void 384} 385 386; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset: 387; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 388; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 389; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 390; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 391define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind { 392 %gep = getelementptr i64, i64* %ptr, i32 4 393 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 394 ret void 395} 396 397; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64: 398; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 399; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 400; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 401; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}} 402define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { 403 %id = call i32 @llvm.amdgcn.workitem.id.x() 404 %gep.tid = getelementptr i64, i64* %ptr, i32 %id 405 %out.gep = getelementptr i64, i64* %out, i32 %id 406 %gep = getelementptr i64, i64* %gep.tid, i32 5 407 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 408 store i64 %result, i64* %out.gep 409 ret void 410} 411 412; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64: 413; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 414; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 415; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 416; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}} 417define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 { 418 %id = call i32 @llvm.amdgcn.workitem.id.x() 419 %gep.tid = getelementptr i64, i64* %ptr, i32 %id 420 %gep = getelementptr i64, i64* %gep.tid, i32 5 421 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 422 ret void 423} 424 425; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32: 426; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 427; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 428; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 429define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 { 430 %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 431 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 432 433 store i32 %result0, i32 addrspace(1)* %out0 434 store i32 %result1, i32 addrspace(1)* %out1 435 ret void 436} 437 438attributes #0 = { nounwind } 439attributes #1 = { nounwind readnone } 440attributes #2 = { nounwind argmemonly } 441