1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4 5; GCN-LABEL: {{^}}atomic_add_i32_offset: 6; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 7; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 8define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { 9entry: 10 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 11 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 12 ret void 13} 14 15; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset: 16; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:-4096{{$}} 17define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) { 18entry: 19 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024 20 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 21 ret void 22} 23 24; GCN-LABEL: {{^}}atomic_add_i32_soffset: 25; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0 26; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} 27 28; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} 29define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) { 30entry: 31 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000 32 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 33 ret void 34} 35 36; GCN-LABEL: {{^}}atomic_add_i32_huge_offset: 37; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac 38; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd 39; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 40 41; VI: flat_atomic_add 42 43; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} 44define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) { 45entry: 46 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595 47 48 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 49 ret void 50} 51 52; GCN-LABEL: {{^}}atomic_add_i32_ret_offset: 53; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 54; SIVI: buffer_store_dword [[RET]] 55 56; GFX9: global_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 57define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 58entry: 59 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 60 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 61 store i32 %val, i32 addrspace(1)* %out2 62 ret void 63} 64 65; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: 66; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 67; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 68; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 69define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 70entry: 71 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 72 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 73 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 74 ret void 75} 76 77; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: 78; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 79; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 80; SIVI: buffer_store_dword [[RET]] 81 82; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 83; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 84define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 85entry: 86 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 87 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 88 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst 89 store i32 %val, i32 addrspace(1)* %out2 90 ret void 91} 92 93; GCN-LABEL: {{^}}atomic_add_i32: 94; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 95; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} 96define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { 97entry: 98 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst 99 ret void 100} 101 102; GCN-LABEL: {{^}}atomic_add_i32_ret: 103; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 104; SIVI: buffer_store_dword [[RET]] 105 106; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 107; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 108define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 109entry: 110 %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst 111 store i32 %val, i32 addrspace(1)* %out2 112 ret void 113} 114 115; GCN-LABEL: {{^}}atomic_add_i32_addr64: 116; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 117; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 118; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 119define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 120entry: 121 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 122 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst 123 ret void 124} 125 126; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64: 127; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 128; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 129; SIVI: buffer_store_dword [[RET]] 130 131; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 132define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 133entry: 134 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 135 %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst 136 store i32 %val, i32 addrspace(1)* %out2 137 ret void 138} 139 140; GCN-LABEL: {{^}}atomic_and_i32_offset: 141; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 142 143; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 144define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { 145entry: 146 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 147 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 148 ret void 149} 150 151; GCN-LABEL: {{^}}atomic_and_i32_ret_offset: 152; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 153; SIVI: buffer_store_dword [[RET]] 154 155; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 156define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 157entry: 158 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 159 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 160 store i32 %val, i32 addrspace(1)* %out2 161 ret void 162} 163 164; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: 165; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 166; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 167 168; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 169define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 170entry: 171 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 172 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 173 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 174 ret void 175} 176 177; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: 178; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 179; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 180; SIVI: buffer_store_dword [[RET]] 181 182; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 183define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 184entry: 185 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 186 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 187 %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst 188 store i32 %val, i32 addrspace(1)* %out2 189 ret void 190} 191 192; GCN-LABEL: {{^}}atomic_and_i32: 193; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 194 195; GFX9: global_atomic_and v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} 196define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { 197entry: 198 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst 199 ret void 200} 201 202; GCN-LABEL: {{^}}atomic_and_i32_ret: 203; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 204; SIVI: buffer_store_dword [[RET]] 205 206; GFX9: global_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 207define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 208entry: 209 %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst 210 store i32 %val, i32 addrspace(1)* %out2 211 ret void 212} 213 214; GCN-LABEL: {{^}}atomic_and_i32_addr64: 215; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 216; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 217 218; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 219define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 220entry: 221 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 222 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst 223 ret void 224} 225 226; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64: 227; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 228; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 229; SIVI: buffer_store_dword [[RET]] 230 231; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 232define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 233entry: 234 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 235 %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst 236 store i32 %val, i32 addrspace(1)* %out2 237 ret void 238} 239 240; GCN-LABEL: {{^}}atomic_sub_i32_offset: 241; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 242 243; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 244define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { 245entry: 246 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 247 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 248 ret void 249} 250 251; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset: 252; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 253; SIVI: buffer_store_dword [[RET]] 254 255; GFX9: global_atomic_sub v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 256define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 257entry: 258 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 259 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 260 store i32 %val, i32 addrspace(1)* %out2 261 ret void 262} 263 264; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: 265; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 266; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 267 268; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 269define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 270entry: 271 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 272 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 273 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 274 ret void 275} 276 277; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: 278; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 279; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 280; SIVI: buffer_store_dword [[RET]] 281 282; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 283define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 284entry: 285 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 286 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 287 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst 288 store i32 %val, i32 addrspace(1)* %out2 289 ret void 290} 291 292; GCN-LABEL: {{^}}atomic_sub_i32: 293; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 294 295; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} 296define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { 297entry: 298 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst 299 ret void 300} 301 302; GCN-LABEL: {{^}}atomic_sub_i32_ret: 303; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 304; SIVI: buffer_store_dword [[RET]] 305 306; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 307define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 308entry: 309 %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst 310 store i32 %val, i32 addrspace(1)* %out2 311 ret void 312} 313 314; GCN-LABEL: {{^}}atomic_sub_i32_addr64: 315; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 316; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 317 318; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} 319define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 320entry: 321 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 322 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst 323 ret void 324} 325 326; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64: 327; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 328; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 329; SIVI: buffer_store_dword [[RET]] 330 331; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 332define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 333entry: 334 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 335 %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst 336 store i32 %val, i32 addrspace(1)* %out2 337 ret void 338} 339 340; GCN-LABEL: {{^}}atomic_max_i32_offset: 341; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 342 343; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 344define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { 345entry: 346 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 347 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 348 ret void 349} 350 351; GCN-LABEL: {{^}}atomic_max_i32_ret_offset: 352; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 353; SIVI: buffer_store_dword [[RET]] 354 355; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 356define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 357entry: 358 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 359 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 360 store i32 %val, i32 addrspace(1)* %out2 361 ret void 362} 363 364; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: 365; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 366; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 367 368; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 369define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 370entry: 371 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 372 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 373 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 374 ret void 375} 376 377; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: 378; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 379; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 380; SIVI: buffer_store_dword [[RET]] 381 382; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 383define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 384entry: 385 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 386 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 387 %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst 388 store i32 %val, i32 addrspace(1)* %out2 389 ret void 390} 391 392; GCN-LABEL: {{^}}atomic_max_i32: 393; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 394 395; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 396define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { 397entry: 398 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst 399 ret void 400} 401 402; GCN-LABEL: {{^}}atomic_max_i32_ret: 403; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 404; SIVI: buffer_store_dword [[RET]] 405 406; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 407define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 408entry: 409 %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst 410 store i32 %val, i32 addrspace(1)* %out2 411 ret void 412} 413 414; GCN-LABEL: {{^}}atomic_max_i32_addr64: 415; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 416; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 417 418; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 419define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 420entry: 421 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 422 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst 423 ret void 424} 425 426; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64: 427; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 428; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 429; SIVI: buffer_store_dword [[RET]] 430 431; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 432define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 433entry: 434 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 435 %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst 436 store i32 %val, i32 addrspace(1)* %out2 437 ret void 438} 439 440; GCN-LABEL: {{^}}atomic_umax_i32_offset: 441; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 442 443; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 444define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { 445entry: 446 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 447 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 448 ret void 449} 450 451; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset: 452; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 453; SIVI: buffer_store_dword [[RET]] 454 455; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 456define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 457entry: 458 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 459 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 460 store i32 %val, i32 addrspace(1)* %out2 461 ret void 462} 463 464; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: 465; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 466; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 467; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 468define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 469entry: 470 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 471 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 472 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 473 ret void 474} 475 476; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: 477; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 478; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 479; SIVI: buffer_store_dword [[RET]] 480 481; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 482define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 483entry: 484 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 485 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 486 %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst 487 store i32 %val, i32 addrspace(1)* %out2 488 ret void 489} 490 491; GCN-LABEL: {{^}}atomic_umax_i32: 492; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 493 494; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 495define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { 496entry: 497 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst 498 ret void 499} 500 501; GCN-LABEL: {{^}}atomic_umax_i32_ret: 502; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 503; SIVI: buffer_store_dword [[RET]] 504 505; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 506define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 507entry: 508 %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst 509 store i32 %val, i32 addrspace(1)* %out2 510 ret void 511} 512 513; GCN-LABEL: {{^}}atomic_umax_i32_addr64: 514; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 515; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 516; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 517define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 518entry: 519 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 520 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst 521 ret void 522} 523 524; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64: 525; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 526; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 527; SIVI: buffer_store_dword [[RET]] 528 529; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 530define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 531entry: 532 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 533 %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst 534 store i32 %val, i32 addrspace(1)* %out2 535 ret void 536} 537 538; GCN-LABEL: {{^}}atomic_min_i32_offset: 539; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 540 541; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 542define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { 543entry: 544 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 545 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 546 ret void 547} 548 549; GCN-LABEL: {{^}}atomic_min_i32_ret_offset: 550; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 551; SIVI: buffer_store_dword [[RET]] 552 553; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 554define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 555entry: 556 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 557 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 558 store i32 %val, i32 addrspace(1)* %out2 559 ret void 560} 561 562; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: 563; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 564; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 565; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 566define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 567entry: 568 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 569 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 570 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 571 ret void 572} 573 574; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: 575; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 576; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 577; SIVI: buffer_store_dword [[RET]] 578 579; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 580define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 581entry: 582 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 583 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 584 %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst 585 store i32 %val, i32 addrspace(1)* %out2 586 ret void 587} 588 589; GCN-LABEL: {{^}}atomic_min_i32: 590; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 591 592; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 593define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { 594entry: 595 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst 596 ret void 597} 598 599; GCN-LABEL: {{^}}atomic_min_i32_ret: 600; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 601; SIVI: buffer_store_dword [[RET]] 602 603; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 604define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 605entry: 606 %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst 607 store i32 %val, i32 addrspace(1)* %out2 608 ret void 609} 610 611; GCN-LABEL: {{^}}atomic_min_i32_addr64: 612; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 613; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 614; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 615define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 616entry: 617 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 618 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst 619 ret void 620} 621 622; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64: 623; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 624; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 625; SIVI: buffer_store_dword [[RET]] 626 627; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 628define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 629entry: 630 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 631 %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst 632 store i32 %val, i32 addrspace(1)* %out2 633 ret void 634} 635 636; GCN-LABEL: {{^}}atomic_umin_i32_offset: 637; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 638 639; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 640define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { 641entry: 642 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 643 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 644 ret void 645} 646 647; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset: 648; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 649; SIVI: buffer_store_dword [[RET]] 650 651; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 652define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 653entry: 654 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 655 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 656 store i32 %val, i32 addrspace(1)* %out2 657 ret void 658} 659 660; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: 661; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 662; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 663; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 664define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 665entry: 666 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 667 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 668 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 669 ret void 670} 671 672; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: 673; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 674; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 675; SIVI: buffer_store_dword [[RET]] 676 677; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 678define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 679entry: 680 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 681 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 682 %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst 683 store i32 %val, i32 addrspace(1)* %out2 684 ret void 685} 686 687; GCN-LABEL: {{^}}atomic_umin_i32: 688; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 689; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 690define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { 691entry: 692 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst 693 ret void 694} 695 696; GCN-LABEL: {{^}}atomic_umin_i32_ret: 697; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 698; SIVI: buffer_store_dword [[RET]] 699 700; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 701define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 702entry: 703 %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst 704 store i32 %val, i32 addrspace(1)* %out2 705 ret void 706} 707 708; GCN-LABEL: {{^}}atomic_umin_i32_addr64: 709; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 710; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 711; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 712define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 713entry: 714 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 715 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst 716 ret void 717} 718 719; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64: 720; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 721; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 722; SIVI: buffer_store_dword [[RET]] 723 724; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 725define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 726entry: 727 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 728 %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst 729 store i32 %val, i32 addrspace(1)* %out2 730 ret void 731} 732 733; GCN-LABEL: {{^}}atomic_or_i32_offset: 734; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 735 736; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 737define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { 738entry: 739 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 740 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 741 ret void 742} 743 744; GCN-LABEL: {{^}}atomic_or_i32_ret_offset: 745; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 746; SIVI: buffer_store_dword [[RET]] 747 748; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 749define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 750entry: 751 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 752 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 753 store i32 %val, i32 addrspace(1)* %out2 754 ret void 755} 756 757; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: 758; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 759; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 760; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 761define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 762entry: 763 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 764 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 765 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 766 ret void 767} 768 769; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: 770; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 771; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 772; SIVI: buffer_store_dword [[RET]] 773 774; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 775define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 776entry: 777 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 778 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 779 %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst 780 store i32 %val, i32 addrspace(1)* %out2 781 ret void 782} 783 784; GCN-LABEL: {{^}}atomic_or_i32: 785; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 786 787; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 788define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { 789entry: 790 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst 791 ret void 792} 793 794; GCN-LABEL: {{^}}atomic_or_i32_ret: 795; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 796; SIVI: buffer_store_dword [[RET]] 797 798; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 799define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 800entry: 801 %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst 802 store i32 %val, i32 addrspace(1)* %out2 803 ret void 804} 805 806; GCN-LABEL: {{^}}atomic_or_i32_addr64: 807; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 808; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 809; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 810define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 811entry: 812 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 813 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst 814 ret void 815} 816 817; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64: 818; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 819; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 820; SIVI: buffer_store_dword [[RET]] 821 822; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 823define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 824entry: 825 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 826 %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst 827 store i32 %val, i32 addrspace(1)* %out2 828 ret void 829} 830 831; GCN-LABEL: {{^}}atomic_xchg_i32_offset: 832; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 833 834; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 835define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { 836entry: 837 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 838 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 839 ret void 840} 841 842; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: 843; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 844; SIVI: buffer_store_dword [[RET]] 845 846; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 847define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 848entry: 849 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 850 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 851 store i32 %val, i32 addrspace(1)* %out2 852 ret void 853} 854 855; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: 856; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 857; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 858; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 859define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 860entry: 861 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 862 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 863 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 864 ret void 865} 866 867; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: 868; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 869; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 870; SIVI: buffer_store_dword [[RET]] 871 872; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 873define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 874entry: 875 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 876 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 877 %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst 878 store i32 %val, i32 addrspace(1)* %out2 879 ret void 880} 881 882; GCN-LABEL: {{^}}atomic_xchg_i32: 883; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 884; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 885define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { 886entry: 887 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 888 ret void 889} 890 891; GCN-LABEL: {{^}}atomic_xchg_i32_ret: 892; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 893; SIVI: buffer_store_dword [[RET]] 894 895; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 896define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 897entry: 898 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 899 store i32 %val, i32 addrspace(1)* %out2 900 ret void 901} 902 903; GCN-LABEL: {{^}}atomic_xchg_i32_addr64: 904; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 905; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 906; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 907define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 908entry: 909 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 910 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst 911 ret void 912} 913 914; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64: 915; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 916; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 917; SIVI: buffer_store_dword [[RET]] 918 919; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 920define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 921entry: 922 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 923 %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst 924 store i32 %val, i32 addrspace(1)* %out2 925 ret void 926} 927 928; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: 929; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 930 931; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16{{$}} 932define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) { 933entry: 934 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 935 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 936 ret void 937} 938 939; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: 940; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 941; SIVI: buffer_store_dword v[[RET]] 942 943; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:16 glc{{$}} 944define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { 945entry: 946 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 947 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 948 %extract0 = extractvalue { i32, i1 } %val, 0 949 store i32 %extract0, i32 addrspace(1)* %out2 950 ret void 951} 952 953; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: 954; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 955 956; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 957; GFX9: global_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16{{$}} 958define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { 959entry: 960 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 961 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 962 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 963 ret void 964} 965 966; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: 967; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 968; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 969; SIVI: buffer_store_dword v[[RET]] 970 971; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}} 972define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) { 973entry: 974 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 975 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 976 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 977 %extract0 = extractvalue { i32, i1 } %val, 0 978 store i32 %extract0, i32 addrspace(1)* %out2 979 ret void 980} 981 982; GCN-LABEL: {{^}}atomic_cmpxchg_i32: 983; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 984 985; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} 986define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) { 987entry: 988 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst 989 ret void 990} 991 992; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret: 993; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 994; SIVI: buffer_store_dword v[[RET]] 995 996; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off glc{{$}} 997define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { 998entry: 999 %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst 1000 %extract0 = extractvalue { i32, i1 } %val, 0 1001 store i32 %extract0, i32 addrspace(1)* %out2 1002 ret void 1003} 1004 1005; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64: 1006; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1007; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 1008; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} 1009define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { 1010entry: 1011 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1012 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst 1013 ret void 1014} 1015 1016; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: 1017; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1018; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1019; SIVI: buffer_store_dword v[[RET]] 1020 1021; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}} 1022define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) { 1023entry: 1024 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1025 %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst 1026 %extract0 = extractvalue { i32, i1 } %val, 0 1027 store i32 %extract0, i32 addrspace(1)* %out2 1028 ret void 1029} 1030 1031; GCN-LABEL: {{^}}atomic_xor_i32_offset: 1032; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1033 1034; GFX9: global_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 1035define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { 1036entry: 1037 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1038 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1039 ret void 1040} 1041 1042; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset: 1043; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1044; SIVI: buffer_store_dword [[RET]] 1045 1046; GFX9: global_atomic_xor v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 1047define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 1048entry: 1049 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1050 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1051 store i32 %val, i32 addrspace(1)* %out2 1052 ret void 1053} 1054 1055; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: 1056; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 1057; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 1058; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} 1059define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { 1060entry: 1061 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1062 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1063 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1064 ret void 1065} 1066 1067; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: 1068; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 1069; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 1070; SIVI: buffer_store_dword [[RET]] 1071 1072; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}} 1073define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 1074entry: 1075 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1076 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1077 %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst 1078 store i32 %val, i32 addrspace(1)* %out2 1079 ret void 1080} 1081 1082; GCN-LABEL: {{^}}atomic_xor_i32: 1083; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1084; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 1085define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { 1086entry: 1087 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst 1088 ret void 1089} 1090 1091; GCN-LABEL: {{^}}atomic_xor_i32_ret: 1092; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1093; SIVI: buffer_store_dword [[RET]] 1094 1095; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 1096define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { 1097entry: 1098 %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst 1099 store i32 %val, i32 addrspace(1)* %out2 1100 ret void 1101} 1102 1103; GCN-LABEL: {{^}}atomic_xor_i32_addr64: 1104; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1105; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 1106; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}} 1107define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { 1108entry: 1109 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1110 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst 1111 ret void 1112} 1113 1114; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64: 1115; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1116; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 1117; SIVI: buffer_store_dword [[RET]] 1118 1119; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}} 1120define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { 1121entry: 1122 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1123 %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst 1124 store i32 %val, i32 addrspace(1)* %out2 1125 ret void 1126} 1127 1128; GCN-LABEL: {{^}}atomic_load_i32_offset: 1129; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} 1130; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1131; SIVI: buffer_store_dword [[RET]] 1132 1133; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16 glc{{$}} 1134define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 1135entry: 1136 %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4 1137 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 1138 store i32 %val, i32 addrspace(1)* %out 1139 ret void 1140} 1141 1142; GCN-LABEL: {{^}}atomic_load_i32: 1143; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1144; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc 1145; SIVI: buffer_store_dword [[RET]] 1146 1147; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off glc 1148define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 1149entry: 1150 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4 1151 store i32 %val, i32 addrspace(1)* %out 1152 ret void 1153} 1154 1155; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset: 1156; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} 1157; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1158; SIVI: buffer_store_dword [[RET]] 1159 1160; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}} 1161define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { 1162entry: 1163 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index 1164 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1165 %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 1166 store i32 %val, i32 addrspace(1)* %out 1167 ret void 1168} 1169 1170; GCN-LABEL: {{^}}atomic_load_i32_addr64: 1171; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1172; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1173; SIVI: buffer_store_dword [[RET]] 1174 1175; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off glc{{$}} 1176define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { 1177entry: 1178 %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index 1179 %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4 1180 store i32 %val, i32 addrspace(1)* %out 1181 ret void 1182} 1183 1184; GCN-LABEL: {{^}}atomic_store_i32_offset: 1185; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} 1186; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1187; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}} 1188define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { 1189entry: 1190 %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 1191 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 1192 ret void 1193} 1194 1195; GCN-LABEL: {{^}}atomic_store_i32: 1196; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1197; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1198; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}} 1199define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { 1200entry: 1201 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 1202 ret void 1203} 1204 1205; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: 1206; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} 1207; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1208; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}} 1209define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) { 1210entry: 1211 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1212 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 1213 store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 1214 ret void 1215} 1216 1217; GCN-LABEL: {{^}}atomic_store_i32_addr64: 1218; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1219; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} 1220; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}} 1221define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) { 1222entry: 1223 %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index 1224 store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4 1225 ret void 1226} 1227