1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,CIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4 5declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 6declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 7declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 8 9declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 10declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 11declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 12 13declare i32 @llvm.amdgcn.workitem.id.x() #1 14 15; Make sure no crash on invalid non-constant 16; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32: 17; CIVI-DAG: s_mov_b32 m0 18; GFX9-NOT: m0 19define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 { 20 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false) 21 store i32 %result, i32 addrspace(1)* %out 22 ret void 23} 24 25; Make sure no crash on invalid non-constant 26; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32: 27; CIVI-DAG: s_mov_b32 m0 28; GFX9-NOT: m0 29define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 { 30 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false) 31 store i32 %result, i32 addrspace(1)* %out 32 ret void 33} 34 35; Make sure no crash on invalid non-constant 36; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32: 37define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 { 38 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var) 39 store i32 %result, i32 addrspace(1)* %out 40 ret void 41} 42 43; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32: 44; CIVI-DAG: s_mov_b32 m0 45; GFX9-NOT: m0 46 47; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 48; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] 49define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 50 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 51 store i32 %result, i32 addrspace(1)* %out 52 ret void 53} 54 55; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset: 56; CIVI-DAG: s_mov_b32 m0 57; GFX9-NOT: m0 58 59; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 60; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 61define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { 62 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 63 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) 64 store i32 %result, i32 addrspace(1)* %out 65 ret void 66} 67 68; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32: 69; CIVI-DAG: s_mov_b32 m0 70; GFX9-NOT: m0 71 72; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], 73; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 74; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] 75; GCN: ds_dec_u32 [[VPTR]], [[DATA]] 76define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind { 77 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) 78 ret void 79} 80 81; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32_offset: 82; CIVI-DAG: s_mov_b32 m0 83; GFX9-NOT: m0 84 85; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 86; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16 87define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { 88 %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 89 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) 90 ret void 91} 92 93; GCN-LABEL: {{^}}global_atomic_dec_ret_i32: 94; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 95; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 96; GFX9: global_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off glc{{$}} 97define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 98 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) 99 store i32 %result, i32 addrspace(1)* %out 100 ret void 101} 102 103; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset: 104; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 105; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} 106; GFX9: global_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16 glc{{$}} 107define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 108 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 109 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 110 store i32 %result, i32 addrspace(1)* %out 111 ret void 112} 113 114; GCN-LABEL: {{^}}global_atomic_dec_noret_i32: 115; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 116; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 117; GFX9: global_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]], off{{$}} 118define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind { 119 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) 120 ret void 121} 122 123; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset: 124; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 125; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} 126; GFX9: global_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]], off offset:16{{$}} 127define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { 128 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 129 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 130 ret void 131} 132 133; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64: 134; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 135; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}} 136; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 137define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { 138 %id = call i32 @llvm.amdgcn.workitem.id.x() 139 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 140 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id 141 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 142 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 143 store i32 %result, i32 addrspace(1)* %out.gep 144 ret void 145} 146 147; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64: 148; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 149; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} 150; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 151define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { 152 %id = call i32 @llvm.amdgcn.workitem.id.x() 153 %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id 154 %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 155 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) 156 ret void 157} 158 159; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32: 160; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 161; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 162define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 { 163 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) 164 store i32 %result, i32* %out 165 ret void 166} 167 168; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset: 169; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 170; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 171; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}} 172define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 { 173 %gep = getelementptr i32, i32* %ptr, i32 4 174 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 175 store i32 %result, i32* %out 176 ret void 177} 178 179; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32: 180; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 181; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 182define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind { 183 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) 184 ret void 185} 186 187; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset: 188; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 189; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 190; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}} 191define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind { 192 %gep = getelementptr i32, i32* %ptr, i32 4 193 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 194 ret void 195} 196 197; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64: 198; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 199; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} 200; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}} 201define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { 202 %id = call i32 @llvm.amdgcn.workitem.id.x() 203 %gep.tid = getelementptr i32, i32* %ptr, i32 %id 204 %out.gep = getelementptr i32, i32* %out, i32 %id 205 %gep = getelementptr i32, i32* %gep.tid, i32 5 206 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 207 store i32 %result, i32* %out.gep 208 ret void 209} 210 211; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64: 212; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 213; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} 214; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}} 215define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 { 216 %id = call i32 @llvm.amdgcn.workitem.id.x() 217 %gep.tid = getelementptr i32, i32* %ptr, i32 %id 218 %gep = getelementptr i32, i32* %gep.tid, i32 5 219 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) 220 ret void 221} 222 223; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64: 224; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 225; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 226; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 227define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 { 228 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) 229 store i64 %result, i64* %out 230 ret void 231} 232 233; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset: 234; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 235; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 236; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 237; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 glc{{$}} 238define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 { 239 %gep = getelementptr i64, i64* %ptr, i32 4 240 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 241 store i64 %result, i64* %out 242 ret void 243} 244 245; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64: 246; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 247; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 248; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 249define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind { 250 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) 251 ret void 252} 253 254; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset: 255; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 256; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 257; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 258; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 259define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind { 260 %gep = getelementptr i64, i64* %ptr, i32 4 261 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 262 ret void 263} 264 265; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64: 266; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 267; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 268; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 269; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40 glc{{$}} 270define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { 271 %id = call i32 @llvm.amdgcn.workitem.id.x() 272 %gep.tid = getelementptr i64, i64* %ptr, i32 %id 273 %out.gep = getelementptr i64, i64* %out, i32 %id 274 %gep = getelementptr i64, i64* %gep.tid, i32 5 275 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 276 store i64 %result, i64* %out.gep 277 ret void 278} 279 280; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64: 281; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 282; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 283; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} 284; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:40{{$}} 285define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 { 286 %id = call i32 @llvm.amdgcn.workitem.id.x() 287 %gep.tid = getelementptr i64, i64* %ptr, i32 %id 288 %gep = getelementptr i64, i64* %gep.tid, i32 5 289 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) 290 ret void 291} 292 293@lds0 = addrspace(3) global [512 x i32] undef 294 295; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0: 296; CIVI-DAG: s_mov_b32 m0 297; GFX9-NOT: m0 298 299; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} 300; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 301define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 302 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 303 %idx.0 = add nsw i32 %tid.x, 2 304 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 305 %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false) 306 store i32 %idx.0, i32 addrspace(1)* %add_use 307 store i32 %val0, i32 addrspace(1)* %out 308 ret void 309} 310 311; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64: 312; CIVI-DAG: s_mov_b32 m0 313; GFX9-NOT: m0 314 315; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 316; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 317; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 318define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 319 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) 320 store i64 %result, i64 addrspace(1)* %out 321 ret void 322} 323 324; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset: 325; CIVI-DAG: s_mov_b32 m0 326; GFX9-NOT: m0 327 328; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 329; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 330; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 331define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { 332 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 333 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) 334 store i64 %result, i64 addrspace(1)* %out 335 ret void 336} 337 338; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64: 339; CIVI-DAG: s_mov_b32 m0 340; GFX9-NOT: m0 341 342; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 343; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 344; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 345define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind { 346 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) 347 ret void 348} 349 350; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64_offset: 351; CIVI-DAG: s_mov_b32 m0 352; GFX9-NOT: m0 353 354; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 355; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 356; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} 357define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 358 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 359 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) 360 ret void 361} 362 363; GCN-LABEL: {{^}}global_atomic_dec_ret_i64: 364; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 365; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 366; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} 367; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off glc{{$}} 368define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 369 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) 370 store i64 %result, i64 addrspace(1)* %out 371 ret void 372} 373 374; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset: 375; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 376; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 377; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} 378; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32 glc{{$}} 379define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 380 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 381 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 382 store i64 %result, i64 addrspace(1)* %out 383 ret void 384} 385 386; GCN-LABEL: {{^}}global_atomic_dec_noret_i64: 387; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 388; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 389; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 390; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off{{$}} 391define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind { 392 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) 393 ret void 394} 395 396; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset: 397; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 398; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 399; CIVI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} 400; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off offset:32{{$}} 401define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { 402 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 403 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 404 ret void 405} 406 407; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64: 408; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 409; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 410; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 411; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} 412; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} 413define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { 414 %id = call i32 @llvm.amdgcn.workitem.id.x() 415 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 416 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id 417 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 418 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 419 store i64 %result, i64 addrspace(1)* %out.gep 420 ret void 421} 422 423; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64: 424; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 425; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 426; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} 427; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} 428; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} 429define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { 430 %id = call i32 @llvm.amdgcn.workitem.id.x() 431 %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id 432 %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 433 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) 434 ret void 435} 436 437@lds1 = addrspace(3) global [512 x i64] undef, align 8 438 439; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64: 440; CIVI-DAG: s_mov_b32 m0 441; GFX9-NOT: m0 442 443; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}} 444; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16 445define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { 446 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 447 %idx.0 = add nsw i32 %tid.x, 2 448 %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 449 %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false) 450 store i32 %idx.0, i32 addrspace(1)* %add_use 451 store i64 %val0, i64 addrspace(1)* %out 452 ret void 453} 454 455attributes #0 = { nounwind } 456attributes #1 = { nounwind readnone } 457attributes #2 = { nounwind argmemonly } 458