1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4 5; GCN-LABEL: {{^}}atomic_add_i32_offset: 6; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 7; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 8define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) { 9entry: 10 %gep = getelementptr i32, i32* %out, i32 4 11 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 12 ret void 13} 14 15; GCN-LABEL: {{^}}atomic_add_i32_max_offset: 16; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 17; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}} 18define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) { 19entry: 20 %gep = getelementptr i32, i32* %out, i32 1023 21 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 22 ret void 23} 24 25; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1: 26; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 27define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) { 28entry: 29 %gep = getelementptr i32, i32* %out, i32 1024 30 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 31 ret void 32} 33 34; GCN-LABEL: {{^}}atomic_add_i32_ret_offset: 35; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 36; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 37; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 38define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 39entry: 40 %gep = getelementptr i32, i32* %out, i32 4 41 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 42 store i32 %val, i32* %out2 43 ret void 44} 45 46; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: 47; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 48; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 49define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 50entry: 51 %ptr = getelementptr i32, i32* %out, i64 %index 52 %gep = getelementptr i32, i32* %ptr, i32 4 53 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 54 ret void 55} 56 57; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: 58; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 59; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 60; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 61define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 62entry: 63 %ptr = getelementptr i32, i32* %out, i64 %index 64 %gep = getelementptr i32, i32* %ptr, i32 4 65 %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst 66 store i32 %val, i32* %out2 67 ret void 68} 69 70; GCN-LABEL: {{^}}atomic_add_i32: 71; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 72define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) { 73entry: 74 %val = atomicrmw volatile add i32* %out, i32 %in seq_cst 75 ret void 76} 77 78; GCN-LABEL: {{^}}atomic_add_i32_ret: 79; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 80; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 81define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) { 82entry: 83 %val = atomicrmw volatile add i32* %out, i32 %in seq_cst 84 store i32 %val, i32* %out2 85 ret void 86} 87 88; GCN-LABEL: {{^}}atomic_add_i32_addr64: 89; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 90define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) { 91entry: 92 %ptr = getelementptr i32, i32* %out, i64 %index 93 %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst 94 ret void 95} 96 97; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64: 98; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 99; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 100define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 101entry: 102 %ptr = getelementptr i32, i32* %out, i64 %index 103 %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst 104 store i32 %val, i32* %out2 105 ret void 106} 107 108; GCN-LABEL: {{^}}atomic_and_i32_offset: 109; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 110; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 111define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) { 112entry: 113 %gep = getelementptr i32, i32* %out, i32 4 114 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 115 ret void 116} 117 118; GCN-LABEL: {{^}}atomic_and_i32_ret_offset: 119; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 120; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 121; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 122define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 123entry: 124 %gep = getelementptr i32, i32* %out, i32 4 125 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 126 store i32 %val, i32* %out2 127 ret void 128} 129 130; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: 131; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 132; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 133define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 134entry: 135 %ptr = getelementptr i32, i32* %out, i64 %index 136 %gep = getelementptr i32, i32* %ptr, i32 4 137 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 138 ret void 139} 140 141; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: 142; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 143; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 144; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 145define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 146entry: 147 %ptr = getelementptr i32, i32* %out, i64 %index 148 %gep = getelementptr i32, i32* %ptr, i32 4 149 %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst 150 store i32 %val, i32* %out2 151 ret void 152} 153 154; GCN-LABEL: {{^}}atomic_and_i32: 155; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 156define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) { 157entry: 158 %val = atomicrmw volatile and i32* %out, i32 %in seq_cst 159 ret void 160} 161 162; GCN-LABEL: {{^}}atomic_and_i32_ret: 163; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 164; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 165define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) { 166entry: 167 %val = atomicrmw volatile and i32* %out, i32 %in seq_cst 168 store i32 %val, i32* %out2 169 ret void 170} 171 172; GCN-LABEL: {{^}}atomic_and_i32_addr64: 173; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 174define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) { 175entry: 176 %ptr = getelementptr i32, i32* %out, i64 %index 177 %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst 178 ret void 179} 180 181; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64: 182; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 183; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 184define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 185entry: 186 %ptr = getelementptr i32, i32* %out, i64 %index 187 %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst 188 store i32 %val, i32* %out2 189 ret void 190} 191 192; GCN-LABEL: {{^}}atomic_sub_i32_offset: 193; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 194; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 195define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) { 196entry: 197 %gep = getelementptr i32, i32* %out, i32 4 198 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 199 ret void 200} 201 202; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset: 203; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 204; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 205; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 206define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 207entry: 208 %gep = getelementptr i32, i32* %out, i32 4 209 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 210 store i32 %val, i32* %out2 211 ret void 212} 213 214; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: 215; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 216; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 217define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 218entry: 219 %ptr = getelementptr i32, i32* %out, i64 %index 220 %gep = getelementptr i32, i32* %ptr, i32 4 221 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 222 ret void 223} 224 225; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: 226; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 227; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 228; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 229define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 230entry: 231 %ptr = getelementptr i32, i32* %out, i64 %index 232 %gep = getelementptr i32, i32* %ptr, i32 4 233 %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst 234 store i32 %val, i32* %out2 235 ret void 236} 237 238; GCN-LABEL: {{^}}atomic_sub_i32: 239; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 240define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) { 241entry: 242 %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst 243 ret void 244} 245 246; GCN-LABEL: {{^}}atomic_sub_i32_ret: 247; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 248; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 249define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) { 250entry: 251 %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst 252 store i32 %val, i32* %out2 253 ret void 254} 255 256; GCN-LABEL: {{^}}atomic_sub_i32_addr64: 257; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 258define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) { 259entry: 260 %ptr = getelementptr i32, i32* %out, i64 %index 261 %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst 262 ret void 263} 264 265; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64: 266; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 267; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 268define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 269entry: 270 %ptr = getelementptr i32, i32* %out, i64 %index 271 %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst 272 store i32 %val, i32* %out2 273 ret void 274} 275 276; GCN-LABEL: {{^}}atomic_max_i32_offset: 277; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 278; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 279define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) { 280entry: 281 %gep = getelementptr i32, i32* %out, i32 4 282 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 283 ret void 284} 285 286; GCN-LABEL: {{^}}atomic_max_i32_ret_offset: 287; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 288; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 289; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 290define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 291entry: 292 %gep = getelementptr i32, i32* %out, i32 4 293 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 294 store i32 %val, i32* %out2 295 ret void 296} 297 298; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: 299; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 300; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 301define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 302entry: 303 %ptr = getelementptr i32, i32* %out, i64 %index 304 %gep = getelementptr i32, i32* %ptr, i32 4 305 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 306 ret void 307} 308 309; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: 310; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 311; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 312; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 313define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 314entry: 315 %ptr = getelementptr i32, i32* %out, i64 %index 316 %gep = getelementptr i32, i32* %ptr, i32 4 317 %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst 318 store i32 %val, i32* %out2 319 ret void 320} 321 322; GCN-LABEL: {{^}}atomic_max_i32: 323; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 324define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) { 325entry: 326 %val = atomicrmw volatile max i32* %out, i32 %in seq_cst 327 ret void 328} 329 330; GCN-LABEL: {{^}}atomic_max_i32_ret: 331; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 332; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 333define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) { 334entry: 335 %val = atomicrmw volatile max i32* %out, i32 %in seq_cst 336 store i32 %val, i32* %out2 337 ret void 338} 339 340; GCN-LABEL: {{^}}atomic_max_i32_addr64: 341; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 342define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) { 343entry: 344 %ptr = getelementptr i32, i32* %out, i64 %index 345 %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst 346 ret void 347} 348 349; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64: 350; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 351; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 352define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 353entry: 354 %ptr = getelementptr i32, i32* %out, i64 %index 355 %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst 356 store i32 %val, i32* %out2 357 ret void 358} 359 360; GCN-LABEL: {{^}}atomic_umax_i32_offset: 361; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 362; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 363define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) { 364entry: 365 %gep = getelementptr i32, i32* %out, i32 4 366 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 367 ret void 368} 369 370; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset: 371; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 372; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 373; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 374define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 375entry: 376 %gep = getelementptr i32, i32* %out, i32 4 377 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 378 store i32 %val, i32* %out2 379 ret void 380} 381 382; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: 383; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 384; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 385define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 386entry: 387 %ptr = getelementptr i32, i32* %out, i64 %index 388 %gep = getelementptr i32, i32* %ptr, i32 4 389 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 390 ret void 391} 392 393; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: 394; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 395; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 396; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 397define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 398entry: 399 %ptr = getelementptr i32, i32* %out, i64 %index 400 %gep = getelementptr i32, i32* %ptr, i32 4 401 %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst 402 store i32 %val, i32* %out2 403 ret void 404} 405 406; GCN-LABEL: {{^}}atomic_umax_i32: 407; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 408define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) { 409entry: 410 %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst 411 ret void 412} 413 414; GCN-LABEL: {{^}}atomic_umax_i32_ret: 415; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 416; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 417define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) { 418entry: 419 %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst 420 store i32 %val, i32* %out2 421 ret void 422} 423 424; GCN-LABEL: {{^}}atomic_umax_i32_addr64: 425; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 426define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) { 427entry: 428 %ptr = getelementptr i32, i32* %out, i64 %index 429 %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst 430 ret void 431} 432 433; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64: 434; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 435; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 436define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 437entry: 438 %ptr = getelementptr i32, i32* %out, i64 %index 439 %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst 440 store i32 %val, i32* %out2 441 ret void 442} 443 444; GCN-LABEL: {{^}}atomic_min_i32_offset: 445; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 446; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 447define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) { 448entry: 449 %gep = getelementptr i32, i32* %out, i32 4 450 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 451 ret void 452} 453 454; GCN-LABEL: {{^}}atomic_min_i32_ret_offset: 455; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 456; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 457; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 458define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 459entry: 460 %gep = getelementptr i32, i32* %out, i32 4 461 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 462 store i32 %val, i32* %out2 463 ret void 464} 465 466; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: 467; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 468; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 469define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 470entry: 471 %ptr = getelementptr i32, i32* %out, i64 %index 472 %gep = getelementptr i32, i32* %ptr, i32 4 473 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 474 ret void 475} 476 477; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: 478; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 479; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 480; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 481define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 482entry: 483 %ptr = getelementptr i32, i32* %out, i64 %index 484 %gep = getelementptr i32, i32* %ptr, i32 4 485 %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst 486 store i32 %val, i32* %out2 487 ret void 488} 489 490; GCN-LABEL: {{^}}atomic_min_i32: 491; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 492define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) { 493entry: 494 %val = atomicrmw volatile min i32* %out, i32 %in seq_cst 495 ret void 496} 497 498; GCN-LABEL: {{^}}atomic_min_i32_ret: 499; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 500; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 501define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) { 502entry: 503 %val = atomicrmw volatile min i32* %out, i32 %in seq_cst 504 store i32 %val, i32* %out2 505 ret void 506} 507 508; GCN-LABEL: {{^}}atomic_min_i32_addr64: 509; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 510define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) { 511entry: 512 %ptr = getelementptr i32, i32* %out, i64 %index 513 %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst 514 ret void 515} 516 517; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64: 518; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 519; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 520define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 521entry: 522 %ptr = getelementptr i32, i32* %out, i64 %index 523 %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst 524 store i32 %val, i32* %out2 525 ret void 526} 527 528; GCN-LABEL: {{^}}atomic_umin_i32_offset: 529; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 530; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 531define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) { 532entry: 533 %gep = getelementptr i32, i32* %out, i32 4 534 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 535 ret void 536} 537 538; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset: 539; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 540; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 541; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 542define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 543entry: 544 %gep = getelementptr i32, i32* %out, i32 4 545 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 546 store i32 %val, i32* %out2 547 ret void 548} 549 550; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: 551; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 552; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 553define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 554entry: 555 %ptr = getelementptr i32, i32* %out, i64 %index 556 %gep = getelementptr i32, i32* %ptr, i32 4 557 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 558 ret void 559} 560 561; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: 562; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 563; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 564; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 565define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 566entry: 567 %ptr = getelementptr i32, i32* %out, i64 %index 568 %gep = getelementptr i32, i32* %ptr, i32 4 569 %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst 570 store i32 %val, i32* %out2 571 ret void 572} 573 574; GCN-LABEL: {{^}}atomic_umin_i32: 575; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 576define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) { 577entry: 578 %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst 579 ret void 580} 581 582; GCN-LABEL: {{^}}atomic_umin_i32_ret: 583; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 584; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 585define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) { 586entry: 587 %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst 588 store i32 %val, i32* %out2 589 ret void 590} 591 592; GCN-LABEL: {{^}}atomic_umin_i32_addr64: 593; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 594define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) { 595entry: 596 %ptr = getelementptr i32, i32* %out, i64 %index 597 %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst 598 ret void 599} 600 601; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64: 602; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 603; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}} 604 define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 605entry: 606 %ptr = getelementptr i32, i32* %out, i64 %index 607 %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst 608 store i32 %val, i32* %out2 609 ret void 610} 611 612; GCN-LABEL: {{^}}atomic_or_i32_offset: 613; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 614; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 615define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) { 616entry: 617 %gep = getelementptr i32, i32* %out, i32 4 618 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 619 ret void 620} 621 622; GCN-LABEL: {{^}}atomic_or_i32_ret_offset: 623; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 624; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 625; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 626define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 627entry: 628 %gep = getelementptr i32, i32* %out, i32 4 629 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 630 store i32 %val, i32* %out2 631 ret void 632} 633 634; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: 635; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 636; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 637define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 638entry: 639 %ptr = getelementptr i32, i32* %out, i64 %index 640 %gep = getelementptr i32, i32* %ptr, i32 4 641 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 642 ret void 643} 644 645; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: 646; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 647; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 648; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 649define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 650entry: 651 %ptr = getelementptr i32, i32* %out, i64 %index 652 %gep = getelementptr i32, i32* %ptr, i32 4 653 %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst 654 store i32 %val, i32* %out2 655 ret void 656} 657 658; GCN-LABEL: {{^}}atomic_or_i32: 659; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 660define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) { 661entry: 662 %val = atomicrmw volatile or i32* %out, i32 %in seq_cst 663 ret void 664} 665 666; GCN-LABEL: {{^}}atomic_or_i32_ret: 667; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 668; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 669define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) { 670entry: 671 %val = atomicrmw volatile or i32* %out, i32 %in seq_cst 672 store i32 %val, i32* %out2 673 ret void 674} 675 676; GCN-LABEL: {{^}}atomic_or_i32_addr64: 677; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 678define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) { 679entry: 680 %ptr = getelementptr i32, i32* %out, i64 %index 681 %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst 682 ret void 683} 684 685; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64: 686; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 687; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 688define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 689entry: 690 %ptr = getelementptr i32, i32* %out, i64 %index 691 %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst 692 store i32 %val, i32* %out2 693 ret void 694} 695 696; GCN-LABEL: {{^}}atomic_xchg_i32_offset: 697; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 698; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 699define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) { 700entry: 701 %gep = getelementptr i32, i32* %out, i32 4 702 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 703 ret void 704} 705 706; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: 707; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 708; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 709; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 710define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 711entry: 712 %gep = getelementptr i32, i32* %out, i32 4 713 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 714 store i32 %val, i32* %out2 715 ret void 716} 717 718; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: 719; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 720; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 721define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 722entry: 723 %ptr = getelementptr i32, i32* %out, i64 %index 724 %gep = getelementptr i32, i32* %ptr, i32 4 725 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 726 ret void 727} 728 729; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: 730; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 731; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 732; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 733define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 734entry: 735 %ptr = getelementptr i32, i32* %out, i64 %index 736 %gep = getelementptr i32, i32* %ptr, i32 4 737 %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst 738 store i32 %val, i32* %out2 739 ret void 740} 741 742; GCN-LABEL: {{^}}atomic_xchg_i32: 743; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 744define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) { 745entry: 746 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst 747 ret void 748} 749 750; GCN-LABEL: {{^}}atomic_xchg_i32_ret: 751; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 752; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 753define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) { 754entry: 755 %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst 756 store i32 %val, i32* %out2 757 ret void 758} 759 760; GCN-LABEL: {{^}}atomic_xchg_i32_addr64: 761; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 762define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) { 763entry: 764 %ptr = getelementptr i32, i32* %out, i64 %index 765 %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst 766 ret void 767} 768 769; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64: 770; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 771; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 772define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 773entry: 774 %ptr = getelementptr i32, i32* %out, i64 %index 775 %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst 776 store i32 %val, i32* %out2 777 ret void 778} 779 780; CMP_SWAP 781 782; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: 783; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 784; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 785define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) { 786entry: 787 %gep = getelementptr i32, i32* %out, i32 4 788 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 789 ret void 790} 791 792; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: 793; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 794; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 795; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 796define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) { 797entry: 798 %gep = getelementptr i32, i32* %out, i32 4 799 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 800 %flag = extractvalue { i32, i1 } %val, 0 801 store i32 %flag, i32* %out2 802 ret void 803} 804 805; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: 806; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 807; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} 808define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) { 809entry: 810 %ptr = getelementptr i32, i32* %out, i64 %index 811 %gep = getelementptr i32, i32* %ptr, i32 4 812 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 813 ret void 814} 815 816; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: 817; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 818; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 819; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 820define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) { 821entry: 822 %ptr = getelementptr i32, i32* %out, i64 %index 823 %gep = getelementptr i32, i32* %ptr, i32 4 824 %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst 825 %flag = extractvalue { i32, i1 } %val, 0 826 store i32 %flag, i32* %out2 827 ret void 828} 829 830; GCN-LABEL: {{^}}atomic_cmpxchg_i32: 831; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} 832define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) { 833entry: 834 %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst 835 ret void 836} 837 838; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret: 839; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc 840; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 841define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) { 842entry: 843 %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst 844 %flag = extractvalue { i32, i1 } %val, 0 845 store i32 %flag, i32* %out2 846 ret void 847} 848 849; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64: 850; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} 851define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) { 852entry: 853 %ptr = getelementptr i32, i32* %out, i64 %index 854 %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst 855 ret void 856} 857 858; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: 859; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} 860; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] 861define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) { 862entry: 863 %ptr = getelementptr i32, i32* %out, i64 %index 864 %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst 865 %flag = extractvalue { i32, i1 } %val, 0 866 store i32 %flag, i32* %out2 867 ret void 868} 869 870; GCN-LABEL: {{^}}atomic_xor_i32_offset: 871; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 872; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 873define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) { 874entry: 875 %gep = getelementptr i32, i32* %out, i32 4 876 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 877 ret void 878} 879 880; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset: 881; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 882; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 883; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 884define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { 885entry: 886 %gep = getelementptr i32, i32* %out, i32 4 887 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 888 store i32 %val, i32* %out2 889 ret void 890} 891 892; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: 893; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 894; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} 895define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { 896entry: 897 %ptr = getelementptr i32, i32* %out, i64 %index 898 %gep = getelementptr i32, i32* %ptr, i32 4 899 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 900 ret void 901} 902 903; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: 904; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 905; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} 906; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 907define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { 908entry: 909 %ptr = getelementptr i32, i32* %out, i64 %index 910 %gep = getelementptr i32, i32* %ptr, i32 4 911 %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst 912 store i32 %val, i32* %out2 913 ret void 914} 915 916; GCN-LABEL: {{^}}atomic_xor_i32: 917; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} 918define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) { 919entry: 920 %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst 921 ret void 922} 923 924; GCN-LABEL: {{^}}atomic_xor_i32_ret: 925; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} 926; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 927define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) { 928entry: 929 %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst 930 store i32 %val, i32* %out2 931 ret void 932} 933 934; GCN-LABEL: {{^}}atomic_xor_i32_addr64: 935; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} 936define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) { 937entry: 938 %ptr = getelementptr i32, i32* %out, i64 %index 939 %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst 940 ret void 941} 942 943; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64: 944; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} 945; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 946define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { 947entry: 948 %ptr = getelementptr i32, i32* %out, i64 %index 949 %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst 950 store i32 %val, i32* %out2 951 ret void 952} 953 954; GCN-LABEL: {{^}}atomic_load_i32_offset: 955; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} 956; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} 957; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 958define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) { 959entry: 960 %gep = getelementptr i32, i32* %in, i32 4 961 %val = load atomic i32, i32* %gep seq_cst, align 4 962 store i32 %val, i32* %out 963 ret void 964} 965 966; GCN-LABEL: {{^}}atomic_load_i32: 967; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc 968; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 969define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) { 970entry: 971 %val = load atomic i32, i32* %in seq_cst, align 4 972 store i32 %val, i32* %out 973 ret void 974} 975 976; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset: 977; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 978; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} 979; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 980define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) { 981entry: 982 %ptr = getelementptr i32, i32* %in, i64 %index 983 %gep = getelementptr i32, i32* %ptr, i32 4 984 %val = load atomic i32, i32* %gep seq_cst, align 4 985 store i32 %val, i32* %out 986 ret void 987} 988 989; GCN-LABEL: {{^}}atomic_load_i32_addr64: 990; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} 991; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] 992define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) { 993entry: 994 %ptr = getelementptr i32, i32* %in, i64 %index 995 %val = load atomic i32, i32* %ptr seq_cst, align 4 996 store i32 %val, i32* %out 997 ret void 998} 999 1000; GCN-LABEL: {{^}}atomic_store_i32_offset: 1001; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1002; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} 1003define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) { 1004entry: 1005 %gep = getelementptr i32, i32* %out, i32 4 1006 store atomic i32 %in, i32* %gep seq_cst, align 4 1007 ret void 1008} 1009 1010; GCN-LABEL: {{^}}atomic_store_i32: 1011; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1012define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) { 1013entry: 1014 store atomic i32 %in, i32* %out seq_cst, align 4 1015 ret void 1016} 1017 1018; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: 1019; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1020; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} 1021define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) { 1022entry: 1023 %ptr = getelementptr i32, i32* %out, i64 %index 1024 %gep = getelementptr i32, i32* %ptr, i32 4 1025 store atomic i32 %in, i32* %gep seq_cst, align 4 1026 ret void 1027} 1028 1029; GCN-LABEL: {{^}}atomic_store_i32_addr64: 1030; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} 1031define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) { 1032entry: 1033 %ptr = getelementptr i32, i32* %out, i64 %index 1034 store atomic i32 %in, i32* %ptr seq_cst, align 4 1035 ret void 1036} 1037