1; RUN: llc -march=amdgcn -mcpu=bonaire -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4 5; GCN-LABEL: {{^}}atomic_add_i64_offset: 6; CIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 7 8; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}} 9define amdgpu_kernel void @atomic_add_i64_offset(i64 addrspace(1)* %out, i64 %in) { 10entry: 11 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 12 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst 13 ret void 14} 15 16; GCN-LABEL: {{^}}atomic_add_i64_ret_offset: 17; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 18; CIVI: buffer_store_dwordx2 [[RET]] 19 20; GFX9: global_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}} 21define amdgpu_kernel void @atomic_add_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 22entry: 23 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 24 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst 25 store i64 %tmp0, i64 addrspace(1)* %out2 26 ret void 27} 28 29; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset: 30; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 31; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} 32; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 33define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 34entry: 35 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 36 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 37 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst 38 ret void 39} 40 41; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset: 42; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 43; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 44; CIVI: buffer_store_dwordx2 [[RET]] 45 46; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 47define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 48entry: 49 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 50 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 51 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst 52 store i64 %tmp0, i64 addrspace(1)* %out2 53 ret void 54} 55 56; GCN-LABEL: {{^}}atomic_add_i64: 57; SIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 58; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}} 59define amdgpu_kernel void @atomic_add_i64(i64 addrspace(1)* %out, i64 %in) { 60entry: 61 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst 62 ret void 63} 64 65; GCN-LABEL: {{^}}atomic_add_i64_ret: 66; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 67; CIVI: buffer_store_dwordx2 [[RET]] 68 69; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 70define amdgpu_kernel void @atomic_add_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 71entry: 72 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst 73 store i64 %tmp0, i64 addrspace(1)* %out2 74 ret void 75} 76 77; GCN-LABEL: {{^}}atomic_add_i64_addr64: 78; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 79; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 80; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 81define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 82entry: 83 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 84 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst 85 ret void 86} 87 88; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64: 89; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 90; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 91; CIVI: buffer_store_dwordx2 [[RET]] 92 93; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 94define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 95entry: 96 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 97 %tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst 98 store i64 %tmp0, i64 addrspace(1)* %out2 99 ret void 100} 101 102; GCN-LABEL: {{^}}atomic_and_i64_offset: 103; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 104; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 105define amdgpu_kernel void @atomic_and_i64_offset(i64 addrspace(1)* %out, i64 %in) { 106entry: 107 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 108 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst 109 ret void 110} 111 112; GCN-LABEL: {{^}}atomic_and_i64_ret_offset: 113; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 114; CIVI: buffer_store_dwordx2 [[RET]] 115 116; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 117define amdgpu_kernel void @atomic_and_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 118entry: 119 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 120 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst 121 store i64 %tmp0, i64 addrspace(1)* %out2 122 ret void 123} 124 125; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset: 126; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 127; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 128; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 129define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 130entry: 131 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 132 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 133 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst 134 ret void 135} 136 137; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset: 138; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 139; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 140; CIVI: buffer_store_dwordx2 [[RET]] 141 142; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 143define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 144entry: 145 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 146 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 147 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst 148 store i64 %tmp0, i64 addrspace(1)* %out2 149 ret void 150} 151 152; GCN-LABEL: {{^}}atomic_and_i64: 153; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 154; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}} 155define amdgpu_kernel void @atomic_and_i64(i64 addrspace(1)* %out, i64 %in) { 156entry: 157 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst 158 ret void 159} 160 161; GCN-LABEL: {{^}}atomic_and_i64_ret: 162; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 163; CIVI: buffer_store_dwordx2 [[RET]] 164 165; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 166define amdgpu_kernel void @atomic_and_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 167entry: 168 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst 169 store i64 %tmp0, i64 addrspace(1)* %out2 170 ret void 171} 172 173; GCN-LABEL: {{^}}atomic_and_i64_addr64: 174; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 175; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 176; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 177define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 178entry: 179 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 180 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst 181 ret void 182} 183 184; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64: 185; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 186; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 187; CIVI: buffer_store_dwordx2 [[RET]] 188 189; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 190define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 191entry: 192 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 193 %tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst 194 store i64 %tmp0, i64 addrspace(1)* %out2 195 ret void 196} 197 198; GCN-LABEL: {{^}}atomic_sub_i64_offset: 199; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 200; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 201define amdgpu_kernel void @atomic_sub_i64_offset(i64 addrspace(1)* %out, i64 %in) { 202entry: 203 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 204 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst 205 ret void 206} 207 208; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset: 209; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 210; CIVI: buffer_store_dwordx2 [[RET]] 211 212; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 213define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 214entry: 215 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 216 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst 217 store i64 %tmp0, i64 addrspace(1)* %out2 218 ret void 219} 220 221; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset: 222; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 223; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 224; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 225define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 226entry: 227 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 228 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 229 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst 230 ret void 231} 232 233; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset: 234; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 235; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 236; CIVI: buffer_store_dwordx2 [[RET]] 237 238; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 239define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 240entry: 241 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 242 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 243 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst 244 store i64 %tmp0, i64 addrspace(1)* %out2 245 ret void 246} 247 248; GCN-LABEL: {{^}}atomic_sub_i64: 249; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 250; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}} 251define amdgpu_kernel void @atomic_sub_i64(i64 addrspace(1)* %out, i64 %in) { 252entry: 253 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst 254 ret void 255} 256 257; GCN-LABEL: {{^}}atomic_sub_i64_ret: 258; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 259; CIVI: buffer_store_dwordx2 [[RET]] 260 261; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 262define amdgpu_kernel void @atomic_sub_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 263entry: 264 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst 265 store i64 %tmp0, i64 addrspace(1)* %out2 266 ret void 267} 268 269; GCN-LABEL: {{^}}atomic_sub_i64_addr64: 270; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 271; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 272; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 273define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 274entry: 275 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 276 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst 277 ret void 278} 279 280; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64: 281; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 282; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 283; CIVI: buffer_store_dwordx2 [[RET]] 284 285; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 286define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 287entry: 288 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 289 %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst 290 store i64 %tmp0, i64 addrspace(1)* %out2 291 ret void 292} 293 294; GCN-LABEL: {{^}}atomic_max_i64_offset: 295; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 296; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 297define amdgpu_kernel void @atomic_max_i64_offset(i64 addrspace(1)* %out, i64 %in) { 298entry: 299 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 300 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst 301 ret void 302} 303 304; GCN-LABEL: {{^}}atomic_max_i64_ret_offset: 305; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 306; CIVI: buffer_store_dwordx2 [[RET]] 307 308; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 309define amdgpu_kernel void @atomic_max_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 310entry: 311 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 312 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst 313 store i64 %tmp0, i64 addrspace(1)* %out2 314 ret void 315} 316 317; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset: 318; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 319; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 320; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 321define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 322entry: 323 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 324 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 325 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst 326 ret void 327} 328 329; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset: 330; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 331; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 332; CIVI: buffer_store_dwordx2 [[RET]] 333 334; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 335define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 336entry: 337 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 338 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 339 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst 340 store i64 %tmp0, i64 addrspace(1)* %out2 341 ret void 342} 343 344; GCN-LABEL: {{^}}atomic_max_i64: 345; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 346; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}} 347define amdgpu_kernel void @atomic_max_i64(i64 addrspace(1)* %out, i64 %in) { 348entry: 349 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst 350 ret void 351} 352 353; GCN-LABEL: {{^}}atomic_max_i64_ret: 354; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 355; CIVI: buffer_store_dwordx2 [[RET]] 356 357; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 358define amdgpu_kernel void @atomic_max_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 359entry: 360 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst 361 store i64 %tmp0, i64 addrspace(1)* %out2 362 ret void 363} 364 365; GCN-LABEL: {{^}}atomic_max_i64_addr64: 366; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 367; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 368; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 369define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 370entry: 371 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 372 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst 373 ret void 374} 375 376; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64: 377; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 378; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 379; CIVI: buffer_store_dwordx2 [[RET]] 380 381; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 382define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 383entry: 384 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 385 %tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst 386 store i64 %tmp0, i64 addrspace(1)* %out2 387 ret void 388} 389 390; GCN-LABEL: {{^}}atomic_umax_i64_offset: 391; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 392; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 393define amdgpu_kernel void @atomic_umax_i64_offset(i64 addrspace(1)* %out, i64 %in) { 394entry: 395 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 396 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst 397 ret void 398} 399 400; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset: 401; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 402; CIVI: buffer_store_dwordx2 [[RET]] 403 404; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 405define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 406entry: 407 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 408 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst 409 store i64 %tmp0, i64 addrspace(1)* %out2 410 ret void 411} 412 413; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset: 414; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 415; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 416; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 417define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 418entry: 419 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 420 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 421 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst 422 ret void 423} 424 425; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset: 426; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 427; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 428; CIVI: buffer_store_dwordx2 [[RET]] 429 430; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 431define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 432entry: 433 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 434 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 435 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst 436 store i64 %tmp0, i64 addrspace(1)* %out2 437 ret void 438} 439 440; GCN-LABEL: {{^}}atomic_umax_i64: 441; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 442; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}} 443define amdgpu_kernel void @atomic_umax_i64(i64 addrspace(1)* %out, i64 %in) { 444entry: 445 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst 446 ret void 447} 448 449; GCN-LABEL: {{^}}atomic_umax_i64_ret: 450; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 451; CIVI: buffer_store_dwordx2 [[RET]] 452 453; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 454define amdgpu_kernel void @atomic_umax_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 455entry: 456 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst 457 store i64 %tmp0, i64 addrspace(1)* %out2 458 ret void 459} 460 461; GCN-LABEL: {{^}}atomic_umax_i64_addr64: 462; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 463; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 464; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 465define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 466entry: 467 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 468 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst 469 ret void 470} 471 472; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64: 473; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 474; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 475; CIVI: buffer_store_dwordx2 [[RET]] 476 477; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 478define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 479entry: 480 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 481 %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst 482 store i64 %tmp0, i64 addrspace(1)* %out2 483 ret void 484} 485 486; GCN-LABEL: {{^}}atomic_min_i64_offset: 487; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 488; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 489define amdgpu_kernel void @atomic_min_i64_offset(i64 addrspace(1)* %out, i64 %in) { 490entry: 491 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 492 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst 493 ret void 494} 495 496; GCN-LABEL: {{^}}atomic_min_i64_ret_offset: 497; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 498; CIVI: buffer_store_dwordx2 [[RET]] 499 500; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 501define amdgpu_kernel void @atomic_min_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 502entry: 503 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 504 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst 505 store i64 %tmp0, i64 addrspace(1)* %out2 506 ret void 507} 508 509; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset: 510; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 511; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 512; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 513define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 514entry: 515 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 516 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 517 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst 518 ret void 519} 520 521; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset: 522; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 523; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 524; CIVI: buffer_store_dwordx2 [[RET]] 525 526; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 527define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 528entry: 529 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 530 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 531 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst 532 store i64 %tmp0, i64 addrspace(1)* %out2 533 ret void 534} 535 536; GCN-LABEL: {{^}}atomic_min_i64: 537; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 538; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}} 539define amdgpu_kernel void @atomic_min_i64(i64 addrspace(1)* %out, i64 %in) { 540entry: 541 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst 542 ret void 543} 544 545; GCN-LABEL: {{^}}atomic_min_i64_ret: 546; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 547; CIVI: buffer_store_dwordx2 [[RET]] 548 549; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 550define amdgpu_kernel void @atomic_min_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 551entry: 552 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst 553 store i64 %tmp0, i64 addrspace(1)* %out2 554 ret void 555} 556 557; GCN-LABEL: {{^}}atomic_min_i64_addr64: 558; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 559; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 560; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 561define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 562entry: 563 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 564 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst 565 ret void 566} 567 568; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64: 569; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 570; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 571; CIVI: buffer_store_dwordx2 [[RET]] 572 573; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 574define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 575entry: 576 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 577 %tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst 578 store i64 %tmp0, i64 addrspace(1)* %out2 579 ret void 580} 581 582; GCN-LABEL: {{^}}atomic_umin_i64_offset: 583; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 584 585; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 586define amdgpu_kernel void @atomic_umin_i64_offset(i64 addrspace(1)* %out, i64 %in) { 587entry: 588 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 589 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst 590 ret void 591} 592 593; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset: 594; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 595; CIVI: buffer_store_dwordx2 [[RET]] 596 597; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 598define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 599entry: 600 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 601 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst 602 store i64 %tmp0, i64 addrspace(1)* %out2 603 ret void 604} 605 606; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset: 607; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 608; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 609; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 610define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 611entry: 612 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 613 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 614 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst 615 ret void 616} 617 618; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset: 619; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 620; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 621; CIVI: buffer_store_dwordx2 [[RET]] 622 623; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 624define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 625entry: 626 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 627 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 628 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst 629 store i64 %tmp0, i64 addrspace(1)* %out2 630 ret void 631} 632 633; GCN-LABEL: {{^}}atomic_umin_i64: 634; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 635; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]$}} 636define amdgpu_kernel void @atomic_umin_i64(i64 addrspace(1)* %out, i64 %in) { 637entry: 638 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst 639 ret void 640} 641 642; GCN-LABEL: {{^}}atomic_umin_i64_ret: 643; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 644; CIVI: buffer_store_dwordx2 [[RET]] 645 646; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 647define amdgpu_kernel void @atomic_umin_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 648entry: 649 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst 650 store i64 %tmp0, i64 addrspace(1)* %out2 651 ret void 652} 653 654; GCN-LABEL: {{^}}atomic_umin_i64_addr64: 655; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 656; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 657; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 658define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 659entry: 660 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 661 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst 662 ret void 663} 664 665; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64: 666; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 667; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 668; CIVI: buffer_store_dwordx2 [[RET]] 669 670; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 671define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 672entry: 673 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 674 %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst 675 store i64 %tmp0, i64 addrspace(1)* %out2 676 ret void 677} 678 679; GCN-LABEL: {{^}}atomic_or_i64_offset: 680; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 681; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 682define amdgpu_kernel void @atomic_or_i64_offset(i64 addrspace(1)* %out, i64 %in) { 683entry: 684 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 685 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst 686 ret void 687} 688 689; GCN-LABEL: {{^}}atomic_or_i64_ret_offset: 690; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 691; CIVI: buffer_store_dwordx2 [[RET]] 692 693; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 694define amdgpu_kernel void @atomic_or_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 695entry: 696 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 697 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst 698 store i64 %tmp0, i64 addrspace(1)* %out2 699 ret void 700} 701 702; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset: 703; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 704; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 705; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 706define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 707entry: 708 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 709 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 710 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst 711 ret void 712} 713 714; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset: 715; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 716; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 717; CIVI: buffer_store_dwordx2 [[RET]] 718 719; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 720define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 721entry: 722 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 723 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 724 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst 725 store i64 %tmp0, i64 addrspace(1)* %out2 726 ret void 727} 728 729; GCN-LABEL: {{^}}atomic_or_i64: 730; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 731; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}} 732define amdgpu_kernel void @atomic_or_i64(i64 addrspace(1)* %out, i64 %in) { 733entry: 734 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst 735 ret void 736} 737 738; GCN-LABEL: {{^}}atomic_or_i64_ret: 739; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 740; CIVI: buffer_store_dwordx2 [[RET]] 741 742; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 743define amdgpu_kernel void @atomic_or_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 744entry: 745 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst 746 store i64 %tmp0, i64 addrspace(1)* %out2 747 ret void 748} 749 750; GCN-LABEL: {{^}}atomic_or_i64_addr64: 751; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 752; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 753; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 754define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 755entry: 756 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 757 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst 758 ret void 759} 760 761; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64: 762; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 763; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 764; CIVI: buffer_store_dwordx2 [[RET]] 765 766; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 767define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 768entry: 769 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 770 %tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst 771 store i64 %tmp0, i64 addrspace(1)* %out2 772 ret void 773} 774 775; GCN-LABEL: {{^}}atomic_xchg_i64_offset: 776; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 777 778; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 779define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) { 780entry: 781 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 782 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst 783 ret void 784} 785 786; GCN-LABEL: {{^}}atomic_xchg_f64_offset: 787; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 788 789; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 790define amdgpu_kernel void @atomic_xchg_f64_offset(double addrspace(1)* %out, double %in) { 791entry: 792 %gep = getelementptr double, double addrspace(1)* %out, i64 4 793 %tmp0 = atomicrmw volatile xchg double addrspace(1)* %gep, double %in seq_cst 794 ret void 795} 796 797; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: 798; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 799; CIVI: buffer_store_dwordx2 [[RET]] 800 801; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 802define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 803entry: 804 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 805 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst 806 store i64 %tmp0, i64 addrspace(1)* %out2 807 ret void 808} 809 810; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset: 811; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 812; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} 813; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 814define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 815entry: 816 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 817 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 818 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst 819 ret void 820} 821 822; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset: 823; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 824; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 825; CIVI: buffer_store_dwordx2 [[RET]] 826 827; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 828define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 829entry: 830 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 831 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 832 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst 833 store i64 %tmp0, i64 addrspace(1)* %out2 834 ret void 835} 836 837; GCN-LABEL: {{^}}atomic_xchg_i64: 838; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 839; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}} 840define amdgpu_kernel void @atomic_xchg_i64(i64 addrspace(1)* %out, i64 %in) { 841entry: 842 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst 843 ret void 844} 845 846; GCN-LABEL: {{^}}atomic_xchg_i64_ret: 847; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 848; CIVI: buffer_store_dwordx2 [[RET]] 849 850; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 851define amdgpu_kernel void @atomic_xchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 852entry: 853 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst 854 store i64 %tmp0, i64 addrspace(1)* %out2 855 ret void 856} 857 858; GCN-LABEL: {{^}}atomic_xchg_i64_addr64: 859; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 860; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 861; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 862define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 863entry: 864 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 865 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst 866 ret void 867} 868 869; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64: 870; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 871; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 872; CIVI: buffer_store_dwordx2 [[RET]] 873 874; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 875define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 876entry: 877 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 878 %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst 879 store i64 %tmp0, i64 addrspace(1)* %out2 880 ret void 881} 882 883; GCN-LABEL: {{^}}atomic_xor_i64_offset: 884; CIVI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 885; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 886define amdgpu_kernel void @atomic_xor_i64_offset(i64 addrspace(1)* %out, i64 %in) { 887entry: 888 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 889 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst 890 ret void 891} 892 893; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset: 894; CIVI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 895; CIVI: buffer_store_dwordx2 [[RET]] 896 897; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 898define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 899entry: 900 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 901 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst 902 store i64 %tmp0, i64 addrspace(1)* %out2 903 ret void 904} 905 906; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset: 907; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 908; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 909; GFX9: global_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} 910define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { 911entry: 912 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 913 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 914 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst 915 ret void 916} 917 918; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset: 919; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 920; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 921; CIVI: buffer_store_dwordx2 [[RET]] 922 923; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}} 924define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 925entry: 926 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 927 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 928 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst 929 store i64 %tmp0, i64 addrspace(1)* %out2 930 ret void 931} 932 933; GCN-LABEL: {{^}}atomic_xor_i64: 934; CIVI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 935; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}} 936define amdgpu_kernel void @atomic_xor_i64(i64 addrspace(1)* %out, i64 %in) { 937entry: 938 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst 939 ret void 940} 941 942; GCN-LABEL: {{^}}atomic_xor_i64_ret: 943; CIVI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 944; CIVI: buffer_store_dwordx2 [[RET]] 945 946; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} 947define amdgpu_kernel void @atomic_xor_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { 948entry: 949 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst 950 store i64 %tmp0, i64 addrspace(1)* %out2 951 ret void 952} 953 954; GCN-LABEL: {{^}}atomic_xor_i64_addr64: 955; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 956; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} 957; GFX9: global_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} 958define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { 959entry: 960 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 961 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst 962 ret void 963} 964 965; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64: 966; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 967; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} 968; CIVI: buffer_store_dwordx2 [[RET]] 969 970; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}} 971define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { 972entry: 973 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 974 %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst 975 store i64 %tmp0, i64 addrspace(1)* %out2 976 ret void 977} 978 979 980; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset: 981; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 982; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} 983define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64 addrspace(1)* %out, i64 %in, i64 %old) { 984entry: 985 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 986 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst 987 ret void 988} 989 990; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset: 991; CIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x11940 992; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} 993 994; GFX9: v_mov_b32_e32 [[VOFFSET:v[0-9]+]], 0x11000{{$}} 995; GFX9: global_atomic_cmpswap_x2 [[VOFFSET]], v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:2368{{$}} 996define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(1)* %out, i64 %in, i64 %old) { 997entry: 998 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 9000 999 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst 1000 ret void 1001} 1002 1003; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset: 1004; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 1005; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]: 1006 1007; GFX9: global_atomic_cmpswap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} 1008define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) { 1009entry: 1010 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 1011 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst 1012 %extract0 = extractvalue { i64, i1 } %val, 0 1013 store i64 %extract0, i64 addrspace(1)* %out2 1014 ret void 1015} 1016 1017; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset: 1018; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 1019; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1020; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:32{{$}} 1021define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) { 1022entry: 1023 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 1024 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 1025 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst 1026 ret void 1027} 1028 1029; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset: 1030; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 1031; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1032; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]: 1033 1034; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:32 glc{{$}} 1035define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) { 1036entry: 1037 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 1038 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 1039 %val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst 1040 %extract0 = extractvalue { i64, i1 } %val, 0 1041 store i64 %extract0, i64 addrspace(1)* %out2 1042 ret void 1043} 1044 1045; GCN-LABEL: {{^}}atomic_cmpxchg_i64: 1046; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1047; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}} 1048define amdgpu_kernel void @atomic_cmpxchg_i64(i64 addrspace(1)* %out, i64 %in, i64 %old) { 1049entry: 1050 %val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst 1051 ret void 1052} 1053 1054; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret: 1055; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1056; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]: 1057 1058; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}} 1059define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) { 1060entry: 1061 %val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst 1062 %extract0 = extractvalue { i64, i1 } %val, 0 1063 store i64 %extract0, i64 addrspace(1)* %out2 1064 ret void 1065} 1066 1067; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64: 1068; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1069; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 1070; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} 1071define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) { 1072entry: 1073 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 1074 %val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst 1075 ret void 1076} 1077 1078; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64: 1079; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1080; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1081; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]: 1082 1083; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}} 1084define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) { 1085entry: 1086 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 1087 %val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst 1088 %extract0 = extractvalue { i64, i1 } %val, 0 1089 store i64 %extract0, i64 addrspace(1)* %out2 1090 ret void 1091} 1092 1093; GCN-LABEL: {{^}}atomic_load_i64_offset: 1094; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} 1095; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1096; CIVI: buffer_store_dwordx2 [[RET]] 1097 1098; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}} 1099define amdgpu_kernel void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { 1100entry: 1101 %gep = getelementptr i64, i64 addrspace(1)* %in, i64 4 1102 %val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8 1103 store i64 %val, i64 addrspace(1)* %out 1104 ret void 1105} 1106 1107; GCN-LABEL: {{^}}atomic_load_i64_neg_offset: 1108; CI: v_mov_b32_e32 v[[LO:[0-9]+]], 0xffffffe0 1109; CI: v_mov_b32_e32 v[[HI:[0-9]+]], -1 1110; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1111 1112; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xffffffe0 1113; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1 1114; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1115 1116; CIVI: buffer_store_dwordx2 [[RET]] 1117 1118; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-32 glc{{$}} 1119define amdgpu_kernel void @atomic_load_i64_neg_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { 1120entry: 1121 %gep = getelementptr i64, i64 addrspace(1)* %in, i64 -4 1122 %val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8 1123 store i64 %val, i64 addrspace(1)* %out 1124 ret void 1125} 1126 1127; GCN-LABEL: {{^}}atomic_load_i64: 1128; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc 1129; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc 1130; CIVI: buffer_store_dwordx2 [[RET]] 1131 1132; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 1133define amdgpu_kernel void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { 1134entry: 1135 %val = load atomic i64, i64 addrspace(1)* %in seq_cst, align 8 1136 store i64 %val, i64 addrspace(1)* %out 1137 ret void 1138} 1139 1140; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset: 1141; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 1142; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1143; CIVI: buffer_store_dwordx2 [[RET]] 1144 1145; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], off offset:32 glc{{$}} 1146define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) { 1147entry: 1148 %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index 1149 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 1150 %val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8 1151 store i64 %val, i64 addrspace(1)* %out 1152 ret void 1153} 1154 1155; GCN-LABEL: {{^}}atomic_load_i64_addr64: 1156; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} 1157; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1158; CIVI: buffer_store_dwordx2 [[RET]] 1159 1160; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], off glc{{$}} 1161define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) { 1162entry: 1163 %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index 1164 %val = load atomic i64, i64 addrspace(1)* %ptr seq_cst, align 8 1165 store i64 %val, i64 addrspace(1)* %out 1166 ret void 1167} 1168 1169; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset: 1170; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} 1171; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 1172; CIVI: buffer_store_dwordx2 [[RET]] 1173 1174; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], off offset:32 glc{{$}} 1175define amdgpu_kernel void @atomic_load_f64_addr64_offset(double addrspace(1)* %in, double addrspace(1)* %out, i64 %index) { 1176entry: 1177 %ptr = getelementptr double, double addrspace(1)* %in, i64 %index 1178 %gep = getelementptr double, double addrspace(1)* %ptr, i64 4 1179 %val = load atomic double, double addrspace(1)* %gep seq_cst, align 8 1180 store double %val, double addrspace(1)* %out 1181 ret void 1182} 1183 1184; GCN-LABEL: {{^}}atomic_store_i64_offset: 1185; CI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} 1186; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1187; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}} 1188define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) { 1189entry: 1190 %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 1191 store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8 1192 ret void 1193} 1194 1195; GCN-LABEL: {{^}}atomic_store_i64: 1196; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 1197; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1198; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}} 1199define amdgpu_kernel void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) { 1200entry: 1201 store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8 1202 ret void 1203} 1204 1205; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset: 1206; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 1207; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 1208; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], off offset:32{{$}} 1209define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) { 1210entry: 1211 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 1212 %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 1213 store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8 1214 ret void 1215} 1216 1217; GCN-LABEL: {{^}}atomic_store_i64_addr64: 1218; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} 1219; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 1220; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], off{{$}} 1221define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) { 1222entry: 1223 %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index 1224 store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8 1225 ret void 1226} 1227 1228; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset: 1229; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} 1230; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} 1231; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], off offset:32{{$}} 1232define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double addrspace(1)* %out, i64 %index) { 1233entry: 1234 %ptr = getelementptr double, double addrspace(1)* %out, i64 %index 1235 %gep = getelementptr double, double addrspace(1)* %ptr, i64 4 1236 store atomic double %in, double addrspace(1)* %gep seq_cst, align 8 1237 ret void 1238} 1239