1; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4 5; GCN-LABEL: {{^}}atomic_add_i32_offset: 6; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 7; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 8define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { 9entry: 10 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 11 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 12 ret void 13} 14 15; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset: 16; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}} 17define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) { 18entry: 19 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024 20 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 21 ret void 22} 23 24; GCN-LABEL: {{^}}atomic_add_i32_soffset: 25; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0 26; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} 27 28; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}} 29; GFX9: global_atomic_add [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}} 30define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) { 31entry: 32 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000 33 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 34 ret void 35} 36 37; GCN-LABEL: {{^}}atomic_add_i32_huge_offset: 38; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac 39; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd 40; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 41 42; VI: flat_atomic_add 43 44; GFX9: v_mov_b32_e32 [[HIGH_K:v[0-9]+]], 0xabcd 45; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xd000, 46; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, [[HIGH_K]], v{{[0-9]+}}, vcc 47; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3756{{$}} 48define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) { 49entry: 50 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595 51 52 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 53 ret void 54} 55 56; GCN-LABEL: {{^}}atomic_add_i32_ret_offset: 57; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 58; SIVI: buffer_store_dword [[RET]] 59 60; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} 61define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 62entry: 63 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 64 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 65 store i32 %val, i32 addrspace(1)* %out2 66 ret void 67} 68 69; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: 70; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 71; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 72; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 73define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 74entry: 75 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 76 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 77 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 78 ret void 79} 80 81; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: 82; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 83; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 84; SIVI: buffer_store_dword [[RET]] 85 86; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 87; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s 88define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 89entry: 90 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 91 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 92 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 93 store i32 %val, i32 addrspace(1)* %out2 94 ret void 95} 96 97; GCN-LABEL: {{^}}atomic_add_i32: 98; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 99; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 100define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { 101entry: 102 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst 103 ret void 104} 105 106; GCN-LABEL: {{^}}atomic_add_i32_ret: 107; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 108; SIVI: buffer_store_dword [[RET]] 109 110; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 111; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s 112define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 113entry: 114 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst 115 store i32 %val, i32 addrspace(1)* %out2 116 ret void 117} 118 119; GCN-LABEL: {{^}}atomic_add_i32_addr64: 120; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 121; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 122; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 123define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 124entry: 125 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 126 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst 127 ret void 128} 129 130; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64: 131; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 132; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 133; SIVI: buffer_store_dword [[RET]] 134 135; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 136define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 137entry: 138 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 139 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst 140 store i32 %val, i32 addrspace(1)* %out2 141 ret void 142} 143 144; GCN-LABEL: {{^}}atomic_and_i32_offset: 145; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 146 147; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 148define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { 149entry: 150 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 151 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 152 ret void 153} 154 155; GCN-LABEL: {{^}}atomic_and_i32_ret_offset: 156; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 157; SIVI: buffer_store_dword [[RET]] 158 159; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 160define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 161entry: 162 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 163 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 164 store i32 %val, i32 addrspace(1)* %out2 165 ret void 166} 167 168; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: 169; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 170; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 171 172; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 173define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 174entry: 175 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 176 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 177 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 178 ret void 179} 180 181; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: 182; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 183; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 184; SIVI: buffer_store_dword [[RET]] 185 186; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 187define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 188entry: 189 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 190 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 191 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 192 store i32 %val, i32 addrspace(1)* %out2 193 ret void 194} 195 196; GCN-LABEL: {{^}}atomic_and_i32: 197; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 198 199; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 200define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { 201entry: 202 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst 203 ret void 204} 205 206; GCN-LABEL: {{^}}atomic_and_i32_ret: 207; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 208; SIVI: buffer_store_dword [[RET]] 209 210; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 211define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 212entry: 213 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst 214 store i32 %val, i32 addrspace(1)* %out2 215 ret void 216} 217 218; GCN-LABEL: {{^}}atomic_and_i32_addr64: 219; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 220; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 221 222; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 223define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 224entry: 225 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 226 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst 227 ret void 228} 229 230; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64: 231; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 232; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 233; SIVI: buffer_store_dword [[RET]] 234 235; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 236define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 237entry: 238 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 239 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst 240 store i32 %val, i32 addrspace(1)* %out2 241 ret void 242} 243 244; GCN-LABEL: {{^}}atomic_sub_i32_offset: 245; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 246 247; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} 248define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { 249entry: 250 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 251 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 252 ret void 253} 254 255; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset: 256; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 257; SIVI: buffer_store_dword [[RET]] 258 259; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} 260define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 261entry: 262 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 263 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 264 store i32 %val, i32 addrspace(1)* %out2 265 ret void 266} 267 268; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: 269; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 270; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 271 272; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 273define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 274entry: 275 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 276 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 277 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 278 ret void 279} 280 281; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: 282; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 283; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 284; SIVI: buffer_store_dword [[RET]] 285 286; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 287define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 288entry: 289 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 290 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 291 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 292 store i32 %val, i32 addrspace(1)* %out2 293 ret void 294} 295 296; GCN-LABEL: {{^}}atomic_sub_i32: 297; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 298 299; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 300define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { 301entry: 302 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst 303 ret void 304} 305 306; GCN-LABEL: {{^}}atomic_sub_i32_ret: 307; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 308; SIVI: buffer_store_dword [[RET]] 309 310; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 311define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 312entry: 313 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst 314 store i32 %val, i32 addrspace(1)* %out2 315 ret void 316} 317 318; GCN-LABEL: {{^}}atomic_sub_i32_addr64: 319; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 320; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 321 322; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} 323define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 324entry: 325 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 326 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst 327 ret void 328} 329 330; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64: 331; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 332; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 333; SIVI: buffer_store_dword [[RET]] 334 335; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 336define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 337entry: 338 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 339 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst 340 store i32 %val, i32 addrspace(1)* %out2 341 ret void 342} 343 344; GCN-LABEL: {{^}}atomic_max_i32_offset: 345; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 346 347; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 348define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { 349entry: 350 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 351 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 352 ret void 353} 354 355; GCN-LABEL: {{^}}atomic_max_i32_ret_offset: 356; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 357; SIVI: buffer_store_dword [[RET]] 358 359; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 360define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 361entry: 362 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 363 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 364 store i32 %val, i32 addrspace(1)* %out2 365 ret void 366} 367 368; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: 369; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 370; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 371 372; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 373define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 374entry: 375 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 376 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 377 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 378 ret void 379} 380 381; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: 382; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 383; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 384; SIVI: buffer_store_dword [[RET]] 385 386; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 387define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 388entry: 389 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 390 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 391 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 392 store i32 %val, i32 addrspace(1)* %out2 393 ret void 394} 395 396; GCN-LABEL: {{^}}atomic_max_i32: 397; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 398 399; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 400define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { 401entry: 402 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst 403 ret void 404} 405 406; GCN-LABEL: {{^}}atomic_max_i32_ret: 407; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 408; SIVI: buffer_store_dword [[RET]] 409 410; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 411define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 412entry: 413 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst 414 store i32 %val, i32 addrspace(1)* %out2 415 ret void 416} 417 418; GCN-LABEL: {{^}}atomic_max_i32_addr64: 419; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 420; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 421 422; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 423define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 424entry: 425 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 426 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst 427 ret void 428} 429 430; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64: 431; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 432; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 433; SIVI: buffer_store_dword [[RET]] 434 435; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 436define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 437entry: 438 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 439 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst 440 store i32 %val, i32 addrspace(1)* %out2 441 ret void 442} 443 444; GCN-LABEL: {{^}}atomic_umax_i32_offset: 445; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 446 447; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 448define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { 449entry: 450 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 451 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 452 ret void 453} 454 455; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset: 456; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 457; SIVI: buffer_store_dword [[RET]] 458 459; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 460define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 461entry: 462 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 463 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 464 store i32 %val, i32 addrspace(1)* %out2 465 ret void 466} 467 468; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: 469; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 470; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 471; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 472define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 473entry: 474 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 475 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 476 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 477 ret void 478} 479 480; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: 481; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 482; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 483; SIVI: buffer_store_dword [[RET]] 484 485; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 486define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 487entry: 488 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 489 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 490 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 491 store i32 %val, i32 addrspace(1)* %out2 492 ret void 493} 494 495; GCN-LABEL: {{^}}atomic_umax_i32: 496; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 497 498; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 499define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { 500entry: 501 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst 502 ret void 503} 504 505; GCN-LABEL: {{^}}atomic_umax_i32_ret: 506; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 507; SIVI: buffer_store_dword [[RET]] 508 509; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 510define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 511entry: 512 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst 513 store i32 %val, i32 addrspace(1)* %out2 514 ret void 515} 516 517; GCN-LABEL: {{^}}atomic_umax_i32_addr64: 518; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 519; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 520; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 521define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 522entry: 523 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 524 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst 525 ret void 526} 527 528; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64: 529; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 530; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 531; SIVI: buffer_store_dword [[RET]] 532 533; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 534define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 535entry: 536 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 537 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst 538 store i32 %val, i32 addrspace(1)* %out2 539 ret void 540} 541 542; GCN-LABEL: {{^}}atomic_min_i32_offset: 543; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 544 545; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 546define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { 547entry: 548 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 549 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 550 ret void 551} 552 553; GCN-LABEL: {{^}}atomic_min_i32_ret_offset: 554; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 555; SIVI: buffer_store_dword [[RET]] 556 557; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 558define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 559entry: 560 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 561 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 562 store i32 %val, i32 addrspace(1)* %out2 563 ret void 564} 565 566; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: 567; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 568; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 569; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 570define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 571entry: 572 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 573 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 574 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 575 ret void 576} 577 578; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: 579; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 580; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 581; SIVI: buffer_store_dword [[RET]] 582 583; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 584define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 585entry: 586 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 587 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 588 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 589 store i32 %val, i32 addrspace(1)* %out2 590 ret void 591} 592 593; GCN-LABEL: {{^}}atomic_min_i32: 594; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 595 596; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 597define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { 598entry: 599 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst 600 ret void 601} 602 603; GCN-LABEL: {{^}}atomic_min_i32_ret: 604; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 605; SIVI: buffer_store_dword [[RET]] 606 607; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 608define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 609entry: 610 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst 611 store i32 %val, i32 addrspace(1)* %out2 612 ret void 613} 614 615; GCN-LABEL: {{^}}atomic_min_i32_addr64: 616; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 617; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 618; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 619define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 620entry: 621 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 622 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst 623 ret void 624} 625 626; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64: 627; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 628; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 629; SIVI: buffer_store_dword [[RET]] 630 631; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 632define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 633entry: 634 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 635 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst 636 store i32 %val, i32 addrspace(1)* %out2 637 ret void 638} 639 640; GCN-LABEL: {{^}}atomic_umin_i32_offset: 641; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 642 643; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 644define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { 645entry: 646 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 647 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 648 ret void 649} 650 651; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset: 652; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 653; SIVI: buffer_store_dword [[RET]] 654 655; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 656define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 657entry: 658 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 659 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 660 store i32 %val, i32 addrspace(1)* %out2 661 ret void 662} 663 664; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: 665; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 666; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 667; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 668define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 669entry: 670 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 671 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 672 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 673 ret void 674} 675 676; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: 677; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 678; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 679; SIVI: buffer_store_dword [[RET]] 680 681; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 682define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 683entry: 684 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 685 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 686 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 687 store i32 %val, i32 addrspace(1)* %out2 688 ret void 689} 690 691; GCN-LABEL: {{^}}atomic_umin_i32: 692; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 693; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 694define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { 695entry: 696 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst 697 ret void 698} 699 700; GCN-LABEL: {{^}}atomic_umin_i32_ret: 701; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 702; SIVI: buffer_store_dword [[RET]] 703 704; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 705define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 706entry: 707 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst 708 store i32 %val, i32 addrspace(1)* %out2 709 ret void 710} 711 712; GCN-LABEL: {{^}}atomic_umin_i32_addr64: 713; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 714; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 715; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 716define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 717entry: 718 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 719 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst 720 ret void 721} 722 723; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64: 724; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 725; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 726; SIVI: buffer_store_dword [[RET]] 727 728; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 729define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 730entry: 731 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 732 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst 733 store i32 %val, i32 addrspace(1)* %out2 734 ret void 735} 736 737; GCN-LABEL: {{^}}atomic_or_i32_offset: 738; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 739 740; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 741define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { 742entry: 743 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 744 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 745 ret void 746} 747 748; GCN-LABEL: {{^}}atomic_or_i32_ret_offset: 749; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 750; SIVI: buffer_store_dword [[RET]] 751 752; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 753define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 754entry: 755 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 756 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 757 store i32 %val, i32 addrspace(1)* %out2 758 ret void 759} 760 761; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: 762; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 763; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 764; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 765define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 766entry: 767 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 768 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 769 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 770 ret void 771} 772 773; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: 774; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 775; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 776; SIVI: buffer_store_dword [[RET]] 777 778; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 779define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 780entry: 781 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 782 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 783 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 784 store i32 %val, i32 addrspace(1)* %out2 785 ret void 786} 787 788; GCN-LABEL: {{^}}atomic_or_i32: 789; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 790 791; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 792define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { 793entry: 794 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst 795 ret void 796} 797 798; GCN-LABEL: {{^}}atomic_or_i32_ret: 799; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 800; SIVI: buffer_store_dword [[RET]] 801 802; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 803define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 804entry: 805 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst 806 store i32 %val, i32 addrspace(1)* %out2 807 ret void 808} 809 810; GCN-LABEL: {{^}}atomic_or_i32_addr64: 811; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 812; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 813; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 814define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 815entry: 816 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 817 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst 818 ret void 819} 820 821; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64: 822; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 823; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 824; SIVI: buffer_store_dword [[RET]] 825 826; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 827define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 828entry: 829 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 830 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst 831 store i32 %val, i32 addrspace(1)* %out2 832 ret void 833} 834 835; GCN-LABEL: {{^}}atomic_xchg_i32_offset: 836; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 837 838; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 839define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { 840entry: 841 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 842 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 843 ret void 844} 845 846; GCN-LABEL: {{^}}atomic_xchg_f32_offset: 847; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 848 849; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 850define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) { 851entry: 852 %gep = getelementptr float, float addrspace(1)* %out, i64 4 853 %val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst 854 ret void 855} 856 857; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: 858; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 859; SIVI: buffer_store_dword [[RET]] 860 861; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 862define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 863entry: 864 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 865 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 866 store i32 %val, i32 addrspace(1)* %out2 867 ret void 868} 869 870; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: 871; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 872; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 873; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 874define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 875entry: 876 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 877 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 878 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 879 ret void 880} 881 882; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: 883; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 884; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 885; SIVI: buffer_store_dword [[RET]] 886 887; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 888define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 889entry: 890 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 891 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 892 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 893 store i32 %val, i32 addrspace(1)* %out2 894 ret void 895} 896 897; GCN-LABEL: {{^}}atomic_xchg_i32: 898; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 899; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 900define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { 901entry: 902 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 903 ret void 904} 905 906; GCN-LABEL: {{^}}atomic_xchg_i32_ret: 907; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 908; SIVI: buffer_store_dword [[RET]] 909 910; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}} 911define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 912entry: 913 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 914 store i32 %val, i32 addrspace(1)* %out2 915 ret void 916} 917 918; GCN-LABEL: {{^}}atomic_xchg_i32_addr64: 919; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 920; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 921; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 922define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 923entry: 924 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 925 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst 926 ret void 927} 928 929; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64: 930; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 931; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 932; SIVI: buffer_store_dword [[RET]] 933 934; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 935define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 936entry: 937 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 938 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst 939 store i32 %val, i32 addrspace(1)* %out2 940 ret void 941} 942 943; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: 944; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 945 946; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16{{$}} 947define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) { 948entry: 949 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 950 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 951 ret void 952} 953 954; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: 955; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 956; SIVI: buffer_store_dword v[[RET]] 957 958; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 959define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { 960entry: 961 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 962 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 963 %extract0 = extractvalue { i32, i1 } %val, 0 964 store i32 %extract0, i32 addrspace(1)* %out2 965 ret void 966} 967 968; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: 969; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 970 971; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 972; GFX9: global_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16{{$}} 973define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { 974entry: 975 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 976 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 977 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 978 ret void 979} 980 981; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: 982; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 983; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 984; SIVI: buffer_store_dword v[[RET]] 985 986; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}} 987define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) { 988entry: 989 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 990 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 991 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 992 %extract0 = extractvalue { i32, i1 } %val, 0 993 store i32 %extract0, i32 addrspace(1)* %out2 994 ret void 995} 996 997; GCN-LABEL: {{^}}atomic_cmpxchg_i32: 998; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 999 1000; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}} 1001define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) { 1002entry: 1003 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst 1004 ret void 1005} 1006 1007; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret: 1008; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1009; SIVI: buffer_store_dword v[[RET]] 1010 1011; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1012define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { 1013entry: 1014 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst 1015 %extract0 = extractvalue { i32, i1 } %val, 0 1016 store i32 %extract0, i32 addrspace(1)* %out2 1017 ret void 1018} 1019 1020; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64: 1021; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1022; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 1023; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} 1024define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { 1025entry: 1026 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1027 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst 1028 ret void 1029} 1030 1031; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: 1032; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1033; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1034; SIVI: buffer_store_dword v[[RET]] 1035 1036; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}} 1037define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) { 1038entry: 1039 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1040 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst 1041 %extract0 = extractvalue { i32, i1 } %val, 0 1042 store i32 %extract0, i32 addrspace(1)* %out2 1043 ret void 1044} 1045 1046; GCN-LABEL: {{^}}atomic_xor_i32_offset: 1047; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1048 1049; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 1050define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { 1051entry: 1052 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1053 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1054 ret void 1055} 1056 1057; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset: 1058; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1059; SIVI: buffer_store_dword [[RET]] 1060 1061; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 1062define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 1063entry: 1064 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1065 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1066 store i32 %val, i32 addrspace(1)* %out2 1067 ret void 1068} 1069 1070; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: 1071; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 1072; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 1073; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 1074define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 1075entry: 1076 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1077 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1078 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1079 ret void 1080} 1081 1082; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: 1083; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 1084; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 1085; SIVI: buffer_store_dword [[RET]] 1086 1087; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 1088define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 1089entry: 1090 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1091 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1092 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1093 store i32 %val, i32 addrspace(1)* %out2 1094 ret void 1095} 1096 1097; GCN-LABEL: {{^}}atomic_xor_i32: 1098; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1099; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}} 1100define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { 1101entry: 1102 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst 1103 ret void 1104} 1105 1106; GCN-LABEL: {{^}}atomic_xor_i32_ret: 1107; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1108; SIVI: buffer_store_dword [[RET]] 1109 1110; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 1111define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 1112entry: 1113 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst 1114 store i32 %val, i32 addrspace(1)* %out2 1115 ret void 1116} 1117 1118; GCN-LABEL: {{^}}atomic_xor_i32_addr64: 1119; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1120; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 1121; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 1122define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 1123entry: 1124 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1125 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst 1126 ret void 1127} 1128 1129; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64: 1130; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1131; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 1132; SIVI: buffer_store_dword [[RET]] 1133 1134; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 1135define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 1136entry: 1137 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1138 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst 1139 store i32 %val, i32 addrspace(1)* %out2 1140 ret void 1141} 1142 1143; GCN-LABEL: {{^}}atomic_load_i32_offset: 1144; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1145; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1146; SIVI: buffer_store_dword [[RET]] 1147 1148; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 1149define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 1150entry: 1151 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4 1152 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 1153 store i32 %val, i32 addrspace(1)* %out 1154 ret void 1155} 1156 1157; GCN-LABEL: {{^}}atomic_load_i32_negoffset: 1158; SI: buffer_load_dword [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1159 1160; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00 1161; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1 1162; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1163 1164; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}} 1165define amdgpu_kernel void @atomic_load_i32_negoffset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 1166entry: 1167 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 -128 1168 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 1169 store i32 %val, i32 addrspace(1)* %out 1170 ret void 1171} 1172 1173; GCN-LABEL: {{^}}atomic_load_f32_offset: 1174; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1175; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1176; SIVI: buffer_store_dword [[RET]] 1177 1178; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 1179define amdgpu_kernel void @atomic_load_f32_offset(float addrspace(1)* %in, float addrspace(1)* %out) { 1180entry: 1181 %gep = getelementptr float, float addrspace(1)* %in, i64 4 1182 %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4 1183 store float %val, float addrspace(1)* %out 1184 ret void 1185} 1186 1187; GCN-LABEL: {{^}}atomic_load_i32: 1188; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1189; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc 1190; SIVI: buffer_store_dword [[RET]] 1191 1192; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc 1193define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 1194entry: 1195 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4 1196 store i32 %val, i32 addrspace(1)* %out 1197 ret void 1198} 1199 1200; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset: 1201; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 1202; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1203; SIVI: buffer_store_dword [[RET]] 1204 1205; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}} 1206define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { 1207entry: 1208 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index 1209 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1210 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 1211 store i32 %val, i32 addrspace(1)* %out 1212 ret void 1213} 1214 1215; GCN-LABEL: {{^}}atomic_load_i32_addr64: 1216; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1217; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1218; SIVI: buffer_store_dword [[RET]] 1219 1220; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off glc{{$}} 1221define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { 1222entry: 1223 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index 1224 %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4 1225 store i32 %val, i32 addrspace(1)* %out 1226 ret void 1227} 1228 1229; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset: 1230; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 1231; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1232; SIVI: buffer_store_dword [[RET]] 1233 1234; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}} 1235define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) { 1236entry: 1237 %ptr = getelementptr float, float addrspace(1)* %in, i64 %index 1238 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4 1239 %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4 1240 store float %val, float addrspace(1)* %out 1241 ret void 1242} 1243 1244; GCN-LABEL: {{^}}atomic_store_i32_offset: 1245; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1246; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1247; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} 1248define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { 1249entry: 1250 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1251 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 1252 ret void 1253} 1254 1255; GCN-LABEL: {{^}}atomic_store_i32: 1256; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1257; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1258; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} 1259define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { 1260entry: 1261 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 1262 ret void 1263} 1264 1265; GCN-LABEL: {{^}}atomic_store_f32: 1266; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1267; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1268; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1269define amdgpu_kernel void @atomic_store_f32(float %in, float addrspace(1)* %out) { 1270entry: 1271 store atomic float %in, float addrspace(1)* %out seq_cst, align 4 1272 ret void 1273} 1274 1275; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: 1276; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 1277; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1278; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}} 1279define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) { 1280entry: 1281 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1282 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1283 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 1284 ret void 1285} 1286 1287; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset: 1288; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 1289; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1290; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}} 1291define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) { 1292entry: 1293 %ptr = getelementptr float, float addrspace(1)* %out, i64 %index 1294 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4 1295 store atomic float %in, float addrspace(1)* %gep seq_cst, align 4 1296 ret void 1297} 1298 1299; GCN-LABEL: {{^}}atomic_store_i32_addr64: 1300; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1301; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1302; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}} 1303define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) { 1304entry: 1305 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1306 store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4 1307 ret void 1308} 1309 1310; GCN-LABEL: {{^}}atomic_store_f32_addr64: 1311; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1312; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1313; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}} 1314define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) { 1315entry: 1316 %ptr = getelementptr float, float addrspace(1)* %out, i64 %index 1317 store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4 1318 ret void 1319} 1320