1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,VI,SICIVI,GFX89 %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s 4 5; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64: 6; SICIVI: s_mov_b32 m0 7; GFX9-NOT: m0 8 9; GCN: ds_wrxchg_rtn_b64 10; GCN: s_endpgm 11define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 12 %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst 13 store i64 %result, i64 addrspace(1)* %out, align 8 14 ret void 15} 16 17; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset: 18; SICIVI: s_mov_b32 m0 19; GFX9-NOT: m0 20 21; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 22; GCN: s_endpgm 23define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 24 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 25 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst 26 store i64 %result, i64 addrspace(1)* %out, align 8 27 ret void 28} 29 30; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: 31; SICIVI: s_mov_b32 m0 32; GFX9-NOT: m0 33 34; GCN: ds_add_rtn_u64 35; GCN: s_endpgm 36define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 37 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst 38 store i64 %result, i64 addrspace(1)* %out, align 8 39 ret void 40} 41 42; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset: 43; SICIVI-DAG: s_mov_b32 m0 44; GFX9-NOT: m0 45 46; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 47; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 48; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 49; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 50; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 51; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 52; GCN: buffer_store_dwordx2 [[RESULT]], 53; GCN: s_endpgm 54define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 55 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4 56 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst 57 store i64 %result, i64 addrspace(1)* %out, align 8 58 ret void 59} 60 61; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64: 62; SICIVI-DAG: s_mov_b32 m0 63; GFX9-NOT: m0 64 65; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 66; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 67; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 68; GCN: buffer_store_dwordx2 [[RESULT]], 69; GCN: s_endpgm 70define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 71 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst 72 store i64 %result, i64 addrspace(1)* %out, align 8 73 ret void 74} 75 76; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset: 77; SICIVI: s_mov_b32 m0 78; GFX9-NOT: m0 79 80; GCN: ds_add_rtn_u64 {{.*}} offset:32 81; GCN: s_endpgm 82define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 83 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 84 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst 85 store i64 %result, i64 addrspace(1)* %out, align 8 86 ret void 87} 88 89; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64: 90; SICIVI: s_mov_b32 m0 91; GFX9-NOT: m0 92 93; GCN: ds_sub_rtn_u64 94; GCN: s_endpgm 95define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 96 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst 97 store i64 %result, i64 addrspace(1)* %out, align 8 98 ret void 99} 100 101; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset: 102; SICIVI: s_mov_b32 m0 103; GFX9-NOT: m0 104 105; GCN: ds_sub_rtn_u64 {{.*}} offset:32 106; GCN: s_endpgm 107define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 108 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 109 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst 110 store i64 %result, i64 addrspace(1)* %out, align 8 111 ret void 112} 113 114; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64: 115; SICIVI-DAG: s_mov_b32 m0 116; GFX9-NOT: m0 117 118; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 119; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 120; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 121; GCN: buffer_store_dwordx2 [[RESULT]], 122; GCN: s_endpgm 123define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 124 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst 125 store i64 %result, i64 addrspace(1)* %out, align 8 126 ret void 127} 128 129; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset: 130; SICIVI: s_mov_b32 m0 131; GFX9-NOT: m0 132 133; GCN: ds_sub_rtn_u64 {{.*}} offset:32 134; GCN: s_endpgm 135define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 136 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 137 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst 138 store i64 %result, i64 addrspace(1)* %out, align 8 139 ret void 140} 141 142; GCN-LABEL: {{^}}lds_atomic_and_ret_i64: 143; SICIVI: s_mov_b32 m0 144; GFX9-NOT: m0 145 146; GCN: ds_and_rtn_b64 147; GCN: s_endpgm 148define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 149 %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst 150 store i64 %result, i64 addrspace(1)* %out, align 8 151 ret void 152} 153 154; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset: 155; SICIVI: s_mov_b32 m0 156; GFX9-NOT: m0 157 158; GCN: ds_and_rtn_b64 {{.*}} offset:32 159; GCN: s_endpgm 160define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 161 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 162 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst 163 store i64 %result, i64 addrspace(1)* %out, align 8 164 ret void 165} 166 167; GCN-LABEL: {{^}}lds_atomic_or_ret_i64: 168; SICIVI: s_mov_b32 m0 169; GFX9-NOT: m0 170 171; GCN: ds_or_rtn_b64 172; GCN: s_endpgm 173define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 174 %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst 175 store i64 %result, i64 addrspace(1)* %out, align 8 176 ret void 177} 178 179; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset: 180; SICIVI: s_mov_b32 m0 181; GFX9-NOT: m0 182 183; GCN: ds_or_rtn_b64 {{.*}} offset:32 184; GCN: s_endpgm 185define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 186 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 187 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst 188 store i64 %result, i64 addrspace(1)* %out, align 8 189 ret void 190} 191 192; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64: 193; SICIVI: s_mov_b32 m0 194; GFX9-NOT: m0 195 196; GCN: ds_xor_rtn_b64 197; GCN: s_endpgm 198define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 199 %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst 200 store i64 %result, i64 addrspace(1)* %out, align 8 201 ret void 202} 203 204; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset: 205; SICIVI: s_mov_b32 m0 206; GFX9-NOT: m0 207 208; GCN: ds_xor_rtn_b64 {{.*}} offset:32 209; GCN: s_endpgm 210define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 211 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 212 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst 213 store i64 %result, i64 addrspace(1)* %out, align 8 214 ret void 215} 216 217; FIXME: There is no atomic nand instr 218; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this. 219; define amdgpu_kernel void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 220; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst 221; store i64 %result, i64 addrspace(1)* %out, align 8 222; ret void 223; } 224 225; GCN-LABEL: {{^}}lds_atomic_min_ret_i64: 226; SICIVI: s_mov_b32 m0 227; GFX9-NOT: m0 228 229; GCN: ds_min_rtn_i64 230; GCN: s_endpgm 231define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 232 %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst 233 store i64 %result, i64 addrspace(1)* %out, align 8 234 ret void 235} 236 237; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset: 238; SICIVI: s_mov_b32 m0 239; GFX9-NOT: m0 240 241; GCN: ds_min_rtn_i64 {{.*}} offset:32 242; GCN: s_endpgm 243define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 244 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 245 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst 246 store i64 %result, i64 addrspace(1)* %out, align 8 247 ret void 248} 249 250; GCN-LABEL: {{^}}lds_atomic_max_ret_i64: 251; SICIVI: s_mov_b32 m0 252; GFX9-NOT: m0 253 254; GCN: ds_max_rtn_i64 255; GCN: s_endpgm 256define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 257 %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst 258 store i64 %result, i64 addrspace(1)* %out, align 8 259 ret void 260} 261 262; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset: 263; SICIVI: s_mov_b32 m0 264; GFX9-NOT: m0 265 266; GCN: ds_max_rtn_i64 {{.*}} offset:32 267; GCN: s_endpgm 268define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 269 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 270 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst 271 store i64 %result, i64 addrspace(1)* %out, align 8 272 ret void 273} 274 275; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64: 276; SICIVI: s_mov_b32 m0 277; GFX9-NOT: m0 278 279; GCN: ds_min_rtn_u64 280; GCN: s_endpgm 281define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 282 %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst 283 store i64 %result, i64 addrspace(1)* %out, align 8 284 ret void 285} 286 287; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset: 288; SICIVI: s_mov_b32 m0 289; GFX9-NOT: m0 290 291; GCN: ds_min_rtn_u64 {{.*}} offset:32 292; GCN: s_endpgm 293define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 294 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 295 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst 296 store i64 %result, i64 addrspace(1)* %out, align 8 297 ret void 298} 299 300; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64: 301; SICIVI: s_mov_b32 m0 302; GFX9-NOT: m0 303 304; GCN: ds_max_rtn_u64 305; GCN: s_endpgm 306define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 307 %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst 308 store i64 %result, i64 addrspace(1)* %out, align 8 309 ret void 310} 311 312; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset: 313; SICIVI: s_mov_b32 m0 314; GFX9-NOT: m0 315 316; GCN: ds_max_rtn_u64 {{.*}} offset:32 317; GCN: s_endpgm 318define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { 319 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 320 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst 321 store i64 %result, i64 addrspace(1)* %out, align 8 322 ret void 323} 324 325; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64: 326; SICIVI: s_mov_b32 m0 327; GFX9-NOT: m0 328 329; GCN: ds_wrxchg_rtn_b64 330; GCN: s_endpgm 331define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind { 332 %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst 333 ret void 334} 335 336; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset: 337; SICIVI: s_mov_b32 m0 338; GFX9-NOT: m0 339 340; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 341; GCN: s_endpgm 342define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 343 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 344 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst 345 ret void 346} 347 348; GCN-LABEL: {{^}}lds_atomic_add_noret_i64: 349; SICIVI: s_mov_b32 m0 350; GFX9-NOT: m0 351 352; GCN: ds_add_u64 353; GCN: s_endpgm 354define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { 355 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst 356 ret void 357} 358 359; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset: 360; SICIVI-DAG: s_mov_b32 m0 361; GFX9-NOT: m0 362 363; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 364; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 365; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 366; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 367; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 368; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 369; GCN: s_endpgm 370define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 371 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4 372 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst 373 ret void 374} 375 376; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64: 377; SICIVI-DAG: s_mov_b32 m0 378; GFX9-NOT: m0 379 380; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 381; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 382; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 383; GCN: s_endpgm 384define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind { 385 %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst 386 ret void 387} 388 389; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset: 390; SICIVI: s_mov_b32 m0 391; GFX9-NOT: m0 392 393; GCN: ds_add_u64 {{.*}} offset:32 394; GCN: s_endpgm 395define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 396 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 397 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst 398 ret void 399} 400 401; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64: 402; SICIVI: s_mov_b32 m0 403; GFX9-NOT: m0 404 405; GCN: ds_sub_u64 406; GCN: s_endpgm 407define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind { 408 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst 409 ret void 410} 411 412; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset: 413; SICIVI: s_mov_b32 m0 414; GFX9-NOT: m0 415 416; GCN: ds_sub_u64 {{.*}} offset:32 417; GCN: s_endpgm 418define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 419 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 420 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst 421 ret void 422} 423 424; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64: 425; SICIVI-DAG: s_mov_b32 m0 426; GFX9-NOT: m0 427 428; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 429; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 430; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} 431; GCN: s_endpgm 432define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind { 433 %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst 434 ret void 435} 436 437; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset: 438; SICIVI: s_mov_b32 m0 439; GFX9-NOT: m0 440 441; GCN: ds_sub_u64 {{.*}} offset:32 442; GCN: s_endpgm 443define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 444 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 445 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst 446 ret void 447} 448 449; GCN-LABEL: {{^}}lds_atomic_and_noret_i64: 450; SICIVI: s_mov_b32 m0 451; GFX9-NOT: m0 452 453; GCN: ds_and_b64 454; GCN: s_endpgm 455define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind { 456 %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst 457 ret void 458} 459 460; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset: 461; SICIVI: s_mov_b32 m0 462; GFX9-NOT: m0 463 464; GCN: ds_and_b64 {{.*}} offset:32 465; GCN: s_endpgm 466define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 467 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 468 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst 469 ret void 470} 471 472; GCN-LABEL: {{^}}lds_atomic_or_noret_i64: 473; SICIVI: s_mov_b32 m0 474; GFX9-NOT: m0 475 476; GCN: ds_or_b64 477; GCN: s_endpgm 478define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind { 479 %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst 480 ret void 481} 482 483; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset: 484; SICIVI: s_mov_b32 m0 485; GFX9-NOT: m0 486 487; GCN: ds_or_b64 {{.*}} offset:32 488; GCN: s_endpgm 489define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 490 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 491 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst 492 ret void 493} 494 495; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64: 496; SICIVI: s_mov_b32 m0 497; GFX9-NOT: m0 498 499; GCN: ds_xor_b64 500; GCN: s_endpgm 501define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind { 502 %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst 503 ret void 504} 505 506; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset: 507; SICIVI: s_mov_b32 m0 508; GFX9-NOT: m0 509 510; GCN: ds_xor_b64 {{.*}} offset:32 511; GCN: s_endpgm 512define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 513 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 514 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst 515 ret void 516} 517 518; FIXME: There is no atomic nand instr 519; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this. 520; define amdgpu_kernel void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind { 521; %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst 522; ret void 523; } 524 525; GCN-LABEL: {{^}}lds_atomic_min_noret_i64: 526; SICIVI: s_mov_b32 m0 527; GFX9-NOT: m0 528 529; GCN: ds_min_i64 530; GCN: s_endpgm 531define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind { 532 %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst 533 ret void 534} 535 536; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset: 537; SICIVI: s_mov_b32 m0 538; GFX9-NOT: m0 539 540; GCN: ds_min_i64 {{.*}} offset:32 541; GCN: s_endpgm 542define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 543 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 544 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst 545 ret void 546} 547 548; GCN-LABEL: {{^}}lds_atomic_max_noret_i64: 549; SICIVI: s_mov_b32 m0 550; GFX9-NOT: m0 551 552; GCN: ds_max_i64 553; GCN: s_endpgm 554define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind { 555 %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst 556 ret void 557} 558 559; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset: 560; SICIVI: s_mov_b32 m0 561; GFX9-NOT: m0 562 563; GCN: ds_max_i64 {{.*}} offset:32 564; GCN: s_endpgm 565define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 566 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 567 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst 568 ret void 569} 570 571; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64: 572; SICIVI: s_mov_b32 m0 573; GFX9-NOT: m0 574 575; GCN: ds_min_u64 576; GCN: s_endpgm 577define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind { 578 %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst 579 ret void 580} 581 582; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset: 583; SICIVI: s_mov_b32 m0 584; GFX9-NOT: m0 585 586; GCN: ds_min_u64 {{.*}} offset:32 587; GCN: s_endpgm 588define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 589 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 590 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst 591 ret void 592} 593 594; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64: 595; SICIVI: s_mov_b32 m0 596; GFX9-NOT: m0 597 598; GCN: ds_max_u64 599; GCN: s_endpgm 600define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind { 601 %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst 602 ret void 603} 604 605; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset: 606; SICIVI: s_mov_b32 m0 607; GFX9-NOT: m0 608 609; GCN: ds_max_u64 {{.*}} offset:32 610; GCN: s_endpgm 611define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { 612 %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 613 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst 614 ret void 615} 616