1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7 8define amdgpu_kernel void @global_system_unordered_load( 9; GFX6-LABEL: global_system_unordered_load: 10; GFX6: ; %bb.0: ; %entry 11; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 12; GFX6-NEXT: s_mov_b32 s3, 0xf000 13; GFX6-NEXT: s_mov_b32 s2, -1 14; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15; GFX6-NEXT: s_mov_b32 s0, s4 16; GFX6-NEXT: s_mov_b32 s1, s5 17; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 18; GFX6-NEXT: s_mov_b32 s4, s6 19; GFX6-NEXT: s_mov_b32 s5, s7 20; GFX6-NEXT: s_mov_b32 s6, s2 21; GFX6-NEXT: s_mov_b32 s7, s3 22; GFX6-NEXT: s_waitcnt vmcnt(0) 23; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 24; GFX6-NEXT: s_endpgm 25; 26; GFX7-LABEL: global_system_unordered_load: 27; GFX7: ; %bb.0: ; %entry 28; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 29; GFX7-NEXT: s_waitcnt lgkmcnt(0) 30; GFX7-NEXT: v_mov_b32_e32 v0, s0 31; GFX7-NEXT: v_mov_b32_e32 v1, s1 32; GFX7-NEXT: flat_load_dword v0, v[0:1] 33; GFX7-NEXT: v_mov_b32_e32 v2, s2 34; GFX7-NEXT: v_mov_b32_e32 v3, s3 35; GFX7-NEXT: s_waitcnt vmcnt(0) 36; GFX7-NEXT: flat_store_dword v[2:3], v0 37; GFX7-NEXT: s_endpgm 38; 39; GFX10-WGP-LABEL: global_system_unordered_load: 40; GFX10-WGP: ; %bb.0: ; %entry 41; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 42; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 43; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 44; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] 45; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 46; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 47; GFX10-WGP-NEXT: s_endpgm 48; 49; GFX10-CU-LABEL: global_system_unordered_load: 50; GFX10-CU: ; %bb.0: ; %entry 51; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 52; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 53; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 54; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] 55; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 56; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 57; GFX10-CU-NEXT: s_endpgm 58; 59; SKIP-CACHE-INV-LABEL: global_system_unordered_load: 60; SKIP-CACHE-INV: ; %bb.0: ; %entry 61; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 62; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 63; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 64; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 65; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 66; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 67; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 68; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 69; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 70; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 71; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 72; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 73; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 74; SKIP-CACHE-INV-NEXT: s_endpgm 75 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 76entry: 77 %val = load atomic i32, i32 addrspace(1)* %in unordered, align 4 78 store i32 %val, i32 addrspace(1)* %out 79 ret void 80} 81 82define amdgpu_kernel void @global_system_monotonic_load( 83; GFX6-LABEL: global_system_monotonic_load: 84; GFX6: ; %bb.0: ; %entry 85; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 86; GFX6-NEXT: s_mov_b32 s3, 0xf000 87; GFX6-NEXT: s_mov_b32 s2, -1 88; GFX6-NEXT: s_waitcnt lgkmcnt(0) 89; GFX6-NEXT: s_mov_b32 s0, s4 90; GFX6-NEXT: s_mov_b32 s1, s5 91; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 92; GFX6-NEXT: s_mov_b32 s4, s6 93; GFX6-NEXT: s_mov_b32 s5, s7 94; GFX6-NEXT: s_mov_b32 s6, s2 95; GFX6-NEXT: s_mov_b32 s7, s3 96; GFX6-NEXT: s_waitcnt vmcnt(0) 97; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 98; GFX6-NEXT: s_endpgm 99; 100; GFX7-LABEL: global_system_monotonic_load: 101; GFX7: ; %bb.0: ; %entry 102; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 103; GFX7-NEXT: s_waitcnt lgkmcnt(0) 104; GFX7-NEXT: v_mov_b32_e32 v0, s0 105; GFX7-NEXT: v_mov_b32_e32 v1, s1 106; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 107; GFX7-NEXT: v_mov_b32_e32 v2, s2 108; GFX7-NEXT: v_mov_b32_e32 v3, s3 109; GFX7-NEXT: s_waitcnt vmcnt(0) 110; GFX7-NEXT: flat_store_dword v[2:3], v0 111; GFX7-NEXT: s_endpgm 112; 113; GFX10-WGP-LABEL: global_system_monotonic_load: 114; GFX10-WGP: ; %bb.0: ; %entry 115; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 116; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 117; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 118; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 119; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 120; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 121; GFX10-WGP-NEXT: s_endpgm 122; 123; GFX10-CU-LABEL: global_system_monotonic_load: 124; GFX10-CU: ; %bb.0: ; %entry 125; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 126; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 127; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 128; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 129; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 130; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 131; GFX10-CU-NEXT: s_endpgm 132; 133; SKIP-CACHE-INV-LABEL: global_system_monotonic_load: 134; SKIP-CACHE-INV: ; %bb.0: ; %entry 135; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 136; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 137; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 138; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 139; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 140; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 141; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 142; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 143; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 144; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 145; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 146; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 147; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 148; SKIP-CACHE-INV-NEXT: s_endpgm 149 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 150entry: 151 %val = load atomic i32, i32 addrspace(1)* %in monotonic, align 4 152 store i32 %val, i32 addrspace(1)* %out 153 ret void 154} 155 156define amdgpu_kernel void @global_system_acquire_load( 157; GFX6-LABEL: global_system_acquire_load: 158; GFX6: ; %bb.0: ; %entry 159; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 160; GFX6-NEXT: s_mov_b32 s3, 0xf000 161; GFX6-NEXT: s_mov_b32 s2, -1 162; GFX6-NEXT: s_waitcnt lgkmcnt(0) 163; GFX6-NEXT: s_mov_b32 s0, s4 164; GFX6-NEXT: s_mov_b32 s1, s5 165; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 166; GFX6-NEXT: s_waitcnt vmcnt(0) 167; GFX6-NEXT: buffer_wbinvl1 168; GFX6-NEXT: s_mov_b32 s4, s6 169; GFX6-NEXT: s_mov_b32 s5, s7 170; GFX6-NEXT: s_mov_b32 s6, s2 171; GFX6-NEXT: s_mov_b32 s7, s3 172; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 173; GFX6-NEXT: s_endpgm 174; 175; GFX7-LABEL: global_system_acquire_load: 176; GFX7: ; %bb.0: ; %entry 177; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 178; GFX7-NEXT: s_waitcnt lgkmcnt(0) 179; GFX7-NEXT: v_mov_b32_e32 v0, s0 180; GFX7-NEXT: v_mov_b32_e32 v1, s1 181; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 182; GFX7-NEXT: s_waitcnt vmcnt(0) 183; GFX7-NEXT: buffer_wbinvl1_vol 184; GFX7-NEXT: v_mov_b32_e32 v2, s2 185; GFX7-NEXT: v_mov_b32_e32 v3, s3 186; GFX7-NEXT: flat_store_dword v[2:3], v0 187; GFX7-NEXT: s_endpgm 188; 189; GFX10-WGP-LABEL: global_system_acquire_load: 190; GFX10-WGP: ; %bb.0: ; %entry 191; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 192; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 193; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 194; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 195; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 196; GFX10-WGP-NEXT: buffer_gl0_inv 197; GFX10-WGP-NEXT: buffer_gl1_inv 198; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 199; GFX10-WGP-NEXT: s_endpgm 200; 201; GFX10-CU-LABEL: global_system_acquire_load: 202; GFX10-CU: ; %bb.0: ; %entry 203; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 204; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 205; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 206; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 207; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 208; GFX10-CU-NEXT: buffer_gl0_inv 209; GFX10-CU-NEXT: buffer_gl1_inv 210; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 211; GFX10-CU-NEXT: s_endpgm 212; 213; SKIP-CACHE-INV-LABEL: global_system_acquire_load: 214; SKIP-CACHE-INV: ; %bb.0: ; %entry 215; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 216; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 217; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 218; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 219; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 220; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 221; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 222; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 223; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 224; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 225; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 226; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 227; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 228; SKIP-CACHE-INV-NEXT: s_endpgm 229 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 230entry: 231 %val = load atomic i32, i32 addrspace(1)* %in acquire, align 4 232 store i32 %val, i32 addrspace(1)* %out 233 ret void 234} 235 236define amdgpu_kernel void @global_system_seq_cst_load( 237; GFX6-LABEL: global_system_seq_cst_load: 238; GFX6: ; %bb.0: ; %entry 239; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 240; GFX6-NEXT: s_mov_b32 s3, 0xf000 241; GFX6-NEXT: s_mov_b32 s2, -1 242; GFX6-NEXT: s_waitcnt lgkmcnt(0) 243; GFX6-NEXT: s_mov_b32 s0, s4 244; GFX6-NEXT: s_mov_b32 s1, s5 245; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 246; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 247; GFX6-NEXT: s_waitcnt vmcnt(0) 248; GFX6-NEXT: buffer_wbinvl1 249; GFX6-NEXT: s_mov_b32 s4, s6 250; GFX6-NEXT: s_mov_b32 s5, s7 251; GFX6-NEXT: s_mov_b32 s6, s2 252; GFX6-NEXT: s_mov_b32 s7, s3 253; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 254; GFX6-NEXT: s_endpgm 255; 256; GFX7-LABEL: global_system_seq_cst_load: 257; GFX7: ; %bb.0: ; %entry 258; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 259; GFX7-NEXT: s_waitcnt lgkmcnt(0) 260; GFX7-NEXT: v_mov_b32_e32 v0, s0 261; GFX7-NEXT: v_mov_b32_e32 v1, s1 262; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 263; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 264; GFX7-NEXT: s_waitcnt vmcnt(0) 265; GFX7-NEXT: buffer_wbinvl1_vol 266; GFX7-NEXT: v_mov_b32_e32 v2, s2 267; GFX7-NEXT: v_mov_b32_e32 v3, s3 268; GFX7-NEXT: flat_store_dword v[2:3], v0 269; GFX7-NEXT: s_endpgm 270; 271; GFX10-WGP-LABEL: global_system_seq_cst_load: 272; GFX10-WGP: ; %bb.0: ; %entry 273; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 274; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 275; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 276; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 277; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 278; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 279; GFX10-WGP-NEXT: buffer_gl0_inv 280; GFX10-WGP-NEXT: buffer_gl1_inv 281; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 282; GFX10-WGP-NEXT: s_endpgm 283; 284; GFX10-CU-LABEL: global_system_seq_cst_load: 285; GFX10-CU: ; %bb.0: ; %entry 286; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 287; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 288; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 289; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 290; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 291; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 292; GFX10-CU-NEXT: buffer_gl0_inv 293; GFX10-CU-NEXT: buffer_gl1_inv 294; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 295; GFX10-CU-NEXT: s_endpgm 296; 297; SKIP-CACHE-INV-LABEL: global_system_seq_cst_load: 298; SKIP-CACHE-INV: ; %bb.0: ; %entry 299; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 300; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 301; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 302; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 303; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 304; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 305; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 306; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 307; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 308; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 309; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 310; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 311; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 312; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 313; SKIP-CACHE-INV-NEXT: s_endpgm 314 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 315entry: 316 %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4 317 store i32 %val, i32 addrspace(1)* %out 318 ret void 319} 320 321define amdgpu_kernel void @global_system_unordered_store( 322; GFX6-LABEL: global_system_unordered_store: 323; GFX6: ; %bb.0: ; %entry 324; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 325; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 326; GFX6-NEXT: s_mov_b32 s3, 0xf000 327; GFX6-NEXT: s_mov_b32 s2, -1 328; GFX6-NEXT: s_waitcnt lgkmcnt(0) 329; GFX6-NEXT: v_mov_b32_e32 v0, s4 330; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 331; GFX6-NEXT: s_endpgm 332; 333; GFX7-LABEL: global_system_unordered_store: 334; GFX7: ; %bb.0: ; %entry 335; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 336; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 337; GFX7-NEXT: s_waitcnt lgkmcnt(0) 338; GFX7-NEXT: v_mov_b32_e32 v2, s2 339; GFX7-NEXT: v_mov_b32_e32 v0, s0 340; GFX7-NEXT: v_mov_b32_e32 v1, s1 341; GFX7-NEXT: flat_store_dword v[0:1], v2 342; GFX7-NEXT: s_endpgm 343; 344; GFX10-WGP-LABEL: global_system_unordered_store: 345; GFX10-WGP: ; %bb.0: ; %entry 346; GFX10-WGP-NEXT: s_clause 0x1 347; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 348; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 349; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 350; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 351; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 352; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 353; GFX10-WGP-NEXT: s_endpgm 354; 355; GFX10-CU-LABEL: global_system_unordered_store: 356; GFX10-CU: ; %bb.0: ; %entry 357; GFX10-CU-NEXT: s_clause 0x1 358; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 359; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 360; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 361; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 362; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 363; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 364; GFX10-CU-NEXT: s_endpgm 365; 366; SKIP-CACHE-INV-LABEL: global_system_unordered_store: 367; SKIP-CACHE-INV: ; %bb.0: ; %entry 368; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 369; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 370; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 371; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 372; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 373; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 374; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 375; SKIP-CACHE-INV-NEXT: s_endpgm 376 i32 %in, i32 addrspace(1)* %out) { 377entry: 378 store atomic i32 %in, i32 addrspace(1)* %out unordered, align 4 379 ret void 380} 381 382define amdgpu_kernel void @global_system_monotonic_store( 383; GFX6-LABEL: global_system_monotonic_store: 384; GFX6: ; %bb.0: ; %entry 385; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 386; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 387; GFX6-NEXT: s_mov_b32 s3, 0xf000 388; GFX6-NEXT: s_mov_b32 s2, -1 389; GFX6-NEXT: s_waitcnt lgkmcnt(0) 390; GFX6-NEXT: v_mov_b32_e32 v0, s4 391; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 392; GFX6-NEXT: s_endpgm 393; 394; GFX7-LABEL: global_system_monotonic_store: 395; GFX7: ; %bb.0: ; %entry 396; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 397; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 398; GFX7-NEXT: s_waitcnt lgkmcnt(0) 399; GFX7-NEXT: v_mov_b32_e32 v2, s2 400; GFX7-NEXT: v_mov_b32_e32 v0, s0 401; GFX7-NEXT: v_mov_b32_e32 v1, s1 402; GFX7-NEXT: flat_store_dword v[0:1], v2 403; GFX7-NEXT: s_endpgm 404; 405; GFX10-WGP-LABEL: global_system_monotonic_store: 406; GFX10-WGP: ; %bb.0: ; %entry 407; GFX10-WGP-NEXT: s_clause 0x1 408; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 409; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 410; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 411; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 412; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 413; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 414; GFX10-WGP-NEXT: s_endpgm 415; 416; GFX10-CU-LABEL: global_system_monotonic_store: 417; GFX10-CU: ; %bb.0: ; %entry 418; GFX10-CU-NEXT: s_clause 0x1 419; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 420; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 421; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 422; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 423; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 424; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 425; GFX10-CU-NEXT: s_endpgm 426; 427; SKIP-CACHE-INV-LABEL: global_system_monotonic_store: 428; SKIP-CACHE-INV: ; %bb.0: ; %entry 429; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 430; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 431; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 432; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 433; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 434; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 435; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 436; SKIP-CACHE-INV-NEXT: s_endpgm 437 i32 %in, i32 addrspace(1)* %out) { 438entry: 439 store atomic i32 %in, i32 addrspace(1)* %out monotonic, align 4 440 ret void 441} 442 443define amdgpu_kernel void @global_system_release_store( 444; GFX6-LABEL: global_system_release_store: 445; GFX6: ; %bb.0: ; %entry 446; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 447; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 448; GFX6-NEXT: s_mov_b32 s3, 0xf000 449; GFX6-NEXT: s_mov_b32 s2, -1 450; GFX6-NEXT: s_waitcnt lgkmcnt(0) 451; GFX6-NEXT: v_mov_b32_e32 v0, s4 452; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 453; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 454; GFX6-NEXT: s_endpgm 455; 456; GFX7-LABEL: global_system_release_store: 457; GFX7: ; %bb.0: ; %entry 458; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 459; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 460; GFX7-NEXT: s_waitcnt lgkmcnt(0) 461; GFX7-NEXT: v_mov_b32_e32 v2, s2 462; GFX7-NEXT: v_mov_b32_e32 v0, s0 463; GFX7-NEXT: v_mov_b32_e32 v1, s1 464; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 465; GFX7-NEXT: flat_store_dword v[0:1], v2 466; GFX7-NEXT: s_endpgm 467; 468; GFX10-WGP-LABEL: global_system_release_store: 469; GFX10-WGP: ; %bb.0: ; %entry 470; GFX10-WGP-NEXT: s_clause 0x1 471; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 472; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 473; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 474; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 475; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 476; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 477; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 478; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 479; GFX10-WGP-NEXT: s_endpgm 480; 481; GFX10-CU-LABEL: global_system_release_store: 482; GFX10-CU: ; %bb.0: ; %entry 483; GFX10-CU-NEXT: s_clause 0x1 484; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 485; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 486; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 487; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 488; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 489; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 490; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 491; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 492; GFX10-CU-NEXT: s_endpgm 493; 494; SKIP-CACHE-INV-LABEL: global_system_release_store: 495; SKIP-CACHE-INV: ; %bb.0: ; %entry 496; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 497; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 498; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 499; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 500; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 501; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 502; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 503; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 504; SKIP-CACHE-INV-NEXT: s_endpgm 505 i32 %in, i32 addrspace(1)* %out) { 506entry: 507 store atomic i32 %in, i32 addrspace(1)* %out release, align 4 508 ret void 509} 510 511define amdgpu_kernel void @global_system_seq_cst_store( 512; GFX6-LABEL: global_system_seq_cst_store: 513; GFX6: ; %bb.0: ; %entry 514; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 515; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 516; GFX6-NEXT: s_mov_b32 s3, 0xf000 517; GFX6-NEXT: s_mov_b32 s2, -1 518; GFX6-NEXT: s_waitcnt lgkmcnt(0) 519; GFX6-NEXT: v_mov_b32_e32 v0, s4 520; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 521; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 522; GFX6-NEXT: s_endpgm 523; 524; GFX7-LABEL: global_system_seq_cst_store: 525; GFX7: ; %bb.0: ; %entry 526; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 527; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 528; GFX7-NEXT: s_waitcnt lgkmcnt(0) 529; GFX7-NEXT: v_mov_b32_e32 v2, s2 530; GFX7-NEXT: v_mov_b32_e32 v0, s0 531; GFX7-NEXT: v_mov_b32_e32 v1, s1 532; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 533; GFX7-NEXT: flat_store_dword v[0:1], v2 534; GFX7-NEXT: s_endpgm 535; 536; GFX10-WGP-LABEL: global_system_seq_cst_store: 537; GFX10-WGP: ; %bb.0: ; %entry 538; GFX10-WGP-NEXT: s_clause 0x1 539; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 540; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 541; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 542; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 543; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 544; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 545; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 546; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 547; GFX10-WGP-NEXT: s_endpgm 548; 549; GFX10-CU-LABEL: global_system_seq_cst_store: 550; GFX10-CU: ; %bb.0: ; %entry 551; GFX10-CU-NEXT: s_clause 0x1 552; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 553; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 554; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 555; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 556; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 557; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 558; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 559; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 560; GFX10-CU-NEXT: s_endpgm 561; 562; SKIP-CACHE-INV-LABEL: global_system_seq_cst_store: 563; SKIP-CACHE-INV: ; %bb.0: ; %entry 564; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 565; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 566; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 567; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 568; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 569; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 570; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 571; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 572; SKIP-CACHE-INV-NEXT: s_endpgm 573 i32 %in, i32 addrspace(1)* %out) { 574entry: 575 store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 576 ret void 577} 578 579define amdgpu_kernel void @global_system_monotonic_atomicrmw( 580; GFX6-LABEL: global_system_monotonic_atomicrmw: 581; GFX6: ; %bb.0: ; %entry 582; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 583; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 584; GFX6-NEXT: s_mov_b32 s7, 0xf000 585; GFX6-NEXT: s_mov_b32 s6, -1 586; GFX6-NEXT: s_waitcnt lgkmcnt(0) 587; GFX6-NEXT: v_mov_b32_e32 v0, s0 588; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 589; GFX6-NEXT: s_endpgm 590; 591; GFX7-LABEL: global_system_monotonic_atomicrmw: 592; GFX7: ; %bb.0: ; %entry 593; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 594; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 595; GFX7-NEXT: s_waitcnt lgkmcnt(0) 596; GFX7-NEXT: v_mov_b32_e32 v0, s0 597; GFX7-NEXT: v_mov_b32_e32 v1, s1 598; GFX7-NEXT: v_mov_b32_e32 v2, s2 599; GFX7-NEXT: flat_atomic_swap v[0:1], v2 600; GFX7-NEXT: s_endpgm 601; 602; GFX10-WGP-LABEL: global_system_monotonic_atomicrmw: 603; GFX10-WGP: ; %bb.0: ; %entry 604; GFX10-WGP-NEXT: s_clause 0x1 605; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 606; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 607; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 608; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 609; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 610; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 611; GFX10-WGP-NEXT: s_endpgm 612; 613; GFX10-CU-LABEL: global_system_monotonic_atomicrmw: 614; GFX10-CU: ; %bb.0: ; %entry 615; GFX10-CU-NEXT: s_clause 0x1 616; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 617; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 618; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 619; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 620; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 621; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 622; GFX10-CU-NEXT: s_endpgm 623; 624; SKIP-CACHE-INV-LABEL: global_system_monotonic_atomicrmw: 625; SKIP-CACHE-INV: ; %bb.0: ; %entry 626; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 627; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 628; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 629; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 630; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 631; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 632; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 633; SKIP-CACHE-INV-NEXT: s_endpgm 634 i32 addrspace(1)* %out, i32 %in) { 635entry: 636 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in monotonic 637 ret void 638} 639 640define amdgpu_kernel void @global_system_acquire_atomicrmw( 641; GFX6-LABEL: global_system_acquire_atomicrmw: 642; GFX6: ; %bb.0: ; %entry 643; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 644; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 645; GFX6-NEXT: s_mov_b32 s7, 0xf000 646; GFX6-NEXT: s_mov_b32 s6, -1 647; GFX6-NEXT: s_waitcnt lgkmcnt(0) 648; GFX6-NEXT: v_mov_b32_e32 v0, s0 649; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 650; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 651; GFX6-NEXT: buffer_wbinvl1 652; GFX6-NEXT: s_endpgm 653; 654; GFX7-LABEL: global_system_acquire_atomicrmw: 655; GFX7: ; %bb.0: ; %entry 656; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 657; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 658; GFX7-NEXT: s_waitcnt lgkmcnt(0) 659; GFX7-NEXT: v_mov_b32_e32 v0, s0 660; GFX7-NEXT: v_mov_b32_e32 v1, s1 661; GFX7-NEXT: v_mov_b32_e32 v2, s2 662; GFX7-NEXT: flat_atomic_swap v[0:1], v2 663; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 664; GFX7-NEXT: buffer_wbinvl1_vol 665; GFX7-NEXT: s_endpgm 666; 667; GFX10-WGP-LABEL: global_system_acquire_atomicrmw: 668; GFX10-WGP: ; %bb.0: ; %entry 669; GFX10-WGP-NEXT: s_clause 0x1 670; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 671; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 672; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 673; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 674; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 675; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 676; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 677; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 678; GFX10-WGP-NEXT: buffer_gl0_inv 679; GFX10-WGP-NEXT: buffer_gl1_inv 680; GFX10-WGP-NEXT: s_endpgm 681; 682; GFX10-CU-LABEL: global_system_acquire_atomicrmw: 683; GFX10-CU: ; %bb.0: ; %entry 684; GFX10-CU-NEXT: s_clause 0x1 685; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 686; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 687; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 688; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 689; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 690; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 691; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 692; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 693; GFX10-CU-NEXT: buffer_gl0_inv 694; GFX10-CU-NEXT: buffer_gl1_inv 695; GFX10-CU-NEXT: s_endpgm 696; 697; SKIP-CACHE-INV-LABEL: global_system_acquire_atomicrmw: 698; SKIP-CACHE-INV: ; %bb.0: ; %entry 699; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 700; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 701; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 702; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 703; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 704; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 705; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 706; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 707; SKIP-CACHE-INV-NEXT: s_endpgm 708 i32 addrspace(1)* %out, i32 %in) { 709entry: 710 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acquire 711 ret void 712} 713 714define amdgpu_kernel void @global_system_release_atomicrmw( 715; GFX6-LABEL: global_system_release_atomicrmw: 716; GFX6: ; %bb.0: ; %entry 717; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 718; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 719; GFX6-NEXT: s_mov_b32 s7, 0xf000 720; GFX6-NEXT: s_mov_b32 s6, -1 721; GFX6-NEXT: s_waitcnt lgkmcnt(0) 722; GFX6-NEXT: v_mov_b32_e32 v0, s0 723; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 724; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 725; GFX6-NEXT: s_endpgm 726; 727; GFX7-LABEL: global_system_release_atomicrmw: 728; GFX7: ; %bb.0: ; %entry 729; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 730; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 731; GFX7-NEXT: s_waitcnt lgkmcnt(0) 732; GFX7-NEXT: v_mov_b32_e32 v0, s0 733; GFX7-NEXT: v_mov_b32_e32 v1, s1 734; GFX7-NEXT: v_mov_b32_e32 v2, s2 735; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 736; GFX7-NEXT: flat_atomic_swap v[0:1], v2 737; GFX7-NEXT: s_endpgm 738; 739; GFX10-WGP-LABEL: global_system_release_atomicrmw: 740; GFX10-WGP: ; %bb.0: ; %entry 741; GFX10-WGP-NEXT: s_clause 0x1 742; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 743; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 744; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 745; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 746; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 747; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 748; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 749; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 750; GFX10-WGP-NEXT: s_endpgm 751; 752; GFX10-CU-LABEL: global_system_release_atomicrmw: 753; GFX10-CU: ; %bb.0: ; %entry 754; GFX10-CU-NEXT: s_clause 0x1 755; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 756; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 757; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 758; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 759; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 760; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 761; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 762; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 763; GFX10-CU-NEXT: s_endpgm 764; 765; SKIP-CACHE-INV-LABEL: global_system_release_atomicrmw: 766; SKIP-CACHE-INV: ; %bb.0: ; %entry 767; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 768; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 769; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 770; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 771; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 772; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 773; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 774; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 775; SKIP-CACHE-INV-NEXT: s_endpgm 776 i32 addrspace(1)* %out, i32 %in) { 777entry: 778 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in release 779 ret void 780} 781 782define amdgpu_kernel void @global_system_acq_rel_atomicrmw( 783; GFX6-LABEL: global_system_acq_rel_atomicrmw: 784; GFX6: ; %bb.0: ; %entry 785; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 786; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 787; GFX6-NEXT: s_mov_b32 s7, 0xf000 788; GFX6-NEXT: s_mov_b32 s6, -1 789; GFX6-NEXT: s_waitcnt lgkmcnt(0) 790; GFX6-NEXT: v_mov_b32_e32 v0, s0 791; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 792; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 793; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 794; GFX6-NEXT: buffer_wbinvl1 795; GFX6-NEXT: s_endpgm 796; 797; GFX7-LABEL: global_system_acq_rel_atomicrmw: 798; GFX7: ; %bb.0: ; %entry 799; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 800; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 801; GFX7-NEXT: s_waitcnt lgkmcnt(0) 802; GFX7-NEXT: v_mov_b32_e32 v0, s0 803; GFX7-NEXT: v_mov_b32_e32 v1, s1 804; GFX7-NEXT: v_mov_b32_e32 v2, s2 805; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 806; GFX7-NEXT: flat_atomic_swap v[0:1], v2 807; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 808; GFX7-NEXT: buffer_wbinvl1_vol 809; GFX7-NEXT: s_endpgm 810; 811; GFX10-WGP-LABEL: global_system_acq_rel_atomicrmw: 812; GFX10-WGP: ; %bb.0: ; %entry 813; GFX10-WGP-NEXT: s_clause 0x1 814; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 815; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 816; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 817; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 818; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 819; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 820; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 821; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 822; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 823; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 824; GFX10-WGP-NEXT: buffer_gl0_inv 825; GFX10-WGP-NEXT: buffer_gl1_inv 826; GFX10-WGP-NEXT: s_endpgm 827; 828; GFX10-CU-LABEL: global_system_acq_rel_atomicrmw: 829; GFX10-CU: ; %bb.0: ; %entry 830; GFX10-CU-NEXT: s_clause 0x1 831; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 832; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 833; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 834; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 835; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 836; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 837; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 838; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 839; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 840; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 841; GFX10-CU-NEXT: buffer_gl0_inv 842; GFX10-CU-NEXT: buffer_gl1_inv 843; GFX10-CU-NEXT: s_endpgm 844; 845; SKIP-CACHE-INV-LABEL: global_system_acq_rel_atomicrmw: 846; SKIP-CACHE-INV: ; %bb.0: ; %entry 847; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 848; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 849; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 850; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 851; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 852; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 853; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 854; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 855; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 856; SKIP-CACHE-INV-NEXT: s_endpgm 857 i32 addrspace(1)* %out, i32 %in) { 858entry: 859 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acq_rel 860 ret void 861} 862 863define amdgpu_kernel void @global_system_seq_cst_atomicrmw( 864; GFX6-LABEL: global_system_seq_cst_atomicrmw: 865; GFX6: ; %bb.0: ; %entry 866; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 867; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 868; GFX6-NEXT: s_mov_b32 s7, 0xf000 869; GFX6-NEXT: s_mov_b32 s6, -1 870; GFX6-NEXT: s_waitcnt lgkmcnt(0) 871; GFX6-NEXT: v_mov_b32_e32 v0, s0 872; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 873; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 874; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 875; GFX6-NEXT: buffer_wbinvl1 876; GFX6-NEXT: s_endpgm 877; 878; GFX7-LABEL: global_system_seq_cst_atomicrmw: 879; GFX7: ; %bb.0: ; %entry 880; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 881; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 882; GFX7-NEXT: s_waitcnt lgkmcnt(0) 883; GFX7-NEXT: v_mov_b32_e32 v0, s0 884; GFX7-NEXT: v_mov_b32_e32 v1, s1 885; GFX7-NEXT: v_mov_b32_e32 v2, s2 886; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 887; GFX7-NEXT: flat_atomic_swap v[0:1], v2 888; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 889; GFX7-NEXT: buffer_wbinvl1_vol 890; GFX7-NEXT: s_endpgm 891; 892; GFX10-WGP-LABEL: global_system_seq_cst_atomicrmw: 893; GFX10-WGP: ; %bb.0: ; %entry 894; GFX10-WGP-NEXT: s_clause 0x1 895; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 896; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 897; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 898; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 899; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 900; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 901; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 902; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 903; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 904; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 905; GFX10-WGP-NEXT: buffer_gl0_inv 906; GFX10-WGP-NEXT: buffer_gl1_inv 907; GFX10-WGP-NEXT: s_endpgm 908; 909; GFX10-CU-LABEL: global_system_seq_cst_atomicrmw: 910; GFX10-CU: ; %bb.0: ; %entry 911; GFX10-CU-NEXT: s_clause 0x1 912; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 913; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 914; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 915; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 916; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 917; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 918; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 919; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 920; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 921; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 922; GFX10-CU-NEXT: buffer_gl0_inv 923; GFX10-CU-NEXT: buffer_gl1_inv 924; GFX10-CU-NEXT: s_endpgm 925; 926; SKIP-CACHE-INV-LABEL: global_system_seq_cst_atomicrmw: 927; SKIP-CACHE-INV: ; %bb.0: ; %entry 928; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 929; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 930; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 931; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 932; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 933; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 934; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 935; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 936; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 937; SKIP-CACHE-INV-NEXT: s_endpgm 938 i32 addrspace(1)* %out, i32 %in) { 939entry: 940 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 941 ret void 942} 943 944define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( 945; GFX6-LABEL: global_system_acquire_ret_atomicrmw: 946; GFX6: ; %bb.0: ; %entry 947; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 948; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 949; GFX6-NEXT: s_mov_b32 s7, 0xf000 950; GFX6-NEXT: s_mov_b32 s6, -1 951; GFX6-NEXT: s_waitcnt lgkmcnt(0) 952; GFX6-NEXT: v_mov_b32_e32 v0, s0 953; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 954; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 955; GFX6-NEXT: buffer_wbinvl1 956; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 957; GFX6-NEXT: s_endpgm 958; 959; GFX7-LABEL: global_system_acquire_ret_atomicrmw: 960; GFX7: ; %bb.0: ; %entry 961; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 962; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 963; GFX7-NEXT: s_waitcnt lgkmcnt(0) 964; GFX7-NEXT: v_mov_b32_e32 v0, s0 965; GFX7-NEXT: v_mov_b32_e32 v1, s1 966; GFX7-NEXT: v_mov_b32_e32 v2, s2 967; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 968; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 969; GFX7-NEXT: buffer_wbinvl1_vol 970; GFX7-NEXT: flat_store_dword v[0:1], v2 971; GFX7-NEXT: s_endpgm 972; 973; GFX10-WGP-LABEL: global_system_acquire_ret_atomicrmw: 974; GFX10-WGP: ; %bb.0: ; %entry 975; GFX10-WGP-NEXT: s_clause 0x1 976; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 977; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 978; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 979; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 980; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 981; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 982; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 983; GFX10-WGP-NEXT: buffer_gl0_inv 984; GFX10-WGP-NEXT: buffer_gl1_inv 985; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 986; GFX10-WGP-NEXT: s_endpgm 987; 988; GFX10-CU-LABEL: global_system_acquire_ret_atomicrmw: 989; GFX10-CU: ; %bb.0: ; %entry 990; GFX10-CU-NEXT: s_clause 0x1 991; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 992; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 993; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 994; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 995; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 996; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 997; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 998; GFX10-CU-NEXT: buffer_gl0_inv 999; GFX10-CU-NEXT: buffer_gl1_inv 1000; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1001; GFX10-CU-NEXT: s_endpgm 1002; 1003; SKIP-CACHE-INV-LABEL: global_system_acquire_ret_atomicrmw: 1004; SKIP-CACHE-INV: ; %bb.0: ; %entry 1005; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1006; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1007; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1008; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1009; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1010; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1011; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1012; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1013; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1014; SKIP-CACHE-INV-NEXT: s_endpgm 1015 i32 addrspace(1)* %out, i32 %in) { 1016entry: 1017 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acquire 1018 store i32 %val, i32 addrspace(1)* %out, align 4 1019 ret void 1020} 1021 1022define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( 1023; GFX6-LABEL: global_system_acq_rel_ret_atomicrmw: 1024; GFX6: ; %bb.0: ; %entry 1025; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1026; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1027; GFX6-NEXT: s_mov_b32 s7, 0xf000 1028; GFX6-NEXT: s_mov_b32 s6, -1 1029; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1030; GFX6-NEXT: v_mov_b32_e32 v0, s0 1031; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1032; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1033; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1034; GFX6-NEXT: buffer_wbinvl1 1035; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1036; GFX6-NEXT: s_endpgm 1037; 1038; GFX7-LABEL: global_system_acq_rel_ret_atomicrmw: 1039; GFX7: ; %bb.0: ; %entry 1040; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1041; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1042; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1043; GFX7-NEXT: v_mov_b32_e32 v0, s0 1044; GFX7-NEXT: v_mov_b32_e32 v1, s1 1045; GFX7-NEXT: v_mov_b32_e32 v2, s2 1046; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1047; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 1048; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1049; GFX7-NEXT: buffer_wbinvl1_vol 1050; GFX7-NEXT: flat_store_dword v[0:1], v2 1051; GFX7-NEXT: s_endpgm 1052; 1053; GFX10-WGP-LABEL: global_system_acq_rel_ret_atomicrmw: 1054; GFX10-WGP: ; %bb.0: ; %entry 1055; GFX10-WGP-NEXT: s_clause 0x1 1056; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1057; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1058; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1059; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1060; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1061; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1062; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1063; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1064; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1065; GFX10-WGP-NEXT: buffer_gl0_inv 1066; GFX10-WGP-NEXT: buffer_gl1_inv 1067; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 1068; GFX10-WGP-NEXT: s_endpgm 1069; 1070; GFX10-CU-LABEL: global_system_acq_rel_ret_atomicrmw: 1071; GFX10-CU: ; %bb.0: ; %entry 1072; GFX10-CU-NEXT: s_clause 0x1 1073; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1074; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1075; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1076; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1077; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1078; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1079; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1080; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1081; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1082; GFX10-CU-NEXT: buffer_gl0_inv 1083; GFX10-CU-NEXT: buffer_gl1_inv 1084; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1085; GFX10-CU-NEXT: s_endpgm 1086; 1087; SKIP-CACHE-INV-LABEL: global_system_acq_rel_ret_atomicrmw: 1088; SKIP-CACHE-INV: ; %bb.0: ; %entry 1089; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1090; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1091; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1092; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1093; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1094; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1095; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1096; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1097; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1098; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1099; SKIP-CACHE-INV-NEXT: s_endpgm 1100 i32 addrspace(1)* %out, i32 %in) { 1101entry: 1102 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in acq_rel 1103 store i32 %val, i32 addrspace(1)* %out, align 4 1104 ret void 1105} 1106 1107define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( 1108; GFX6-LABEL: global_system_seq_cst_ret_atomicrmw: 1109; GFX6: ; %bb.0: ; %entry 1110; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1111; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 1112; GFX6-NEXT: s_mov_b32 s7, 0xf000 1113; GFX6-NEXT: s_mov_b32 s6, -1 1114; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1115; GFX6-NEXT: v_mov_b32_e32 v0, s0 1116; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1117; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1118; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1119; GFX6-NEXT: buffer_wbinvl1 1120; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 1121; GFX6-NEXT: s_endpgm 1122; 1123; GFX7-LABEL: global_system_seq_cst_ret_atomicrmw: 1124; GFX7: ; %bb.0: ; %entry 1125; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1126; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 1127; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1128; GFX7-NEXT: v_mov_b32_e32 v0, s0 1129; GFX7-NEXT: v_mov_b32_e32 v1, s1 1130; GFX7-NEXT: v_mov_b32_e32 v2, s2 1131; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1132; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 1133; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1134; GFX7-NEXT: buffer_wbinvl1_vol 1135; GFX7-NEXT: flat_store_dword v[0:1], v2 1136; GFX7-NEXT: s_endpgm 1137; 1138; GFX10-WGP-LABEL: global_system_seq_cst_ret_atomicrmw: 1139; GFX10-WGP: ; %bb.0: ; %entry 1140; GFX10-WGP-NEXT: s_clause 0x1 1141; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 1142; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1143; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 1144; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1145; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1146; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1147; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1148; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1149; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1150; GFX10-WGP-NEXT: buffer_gl0_inv 1151; GFX10-WGP-NEXT: buffer_gl1_inv 1152; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 1153; GFX10-WGP-NEXT: s_endpgm 1154; 1155; GFX10-CU-LABEL: global_system_seq_cst_ret_atomicrmw: 1156; GFX10-CU: ; %bb.0: ; %entry 1157; GFX10-CU-NEXT: s_clause 0x1 1158; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 1159; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1160; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 1161; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1162; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1163; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1164; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1165; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 1166; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1167; GFX10-CU-NEXT: buffer_gl0_inv 1168; GFX10-CU-NEXT: buffer_gl1_inv 1169; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 1170; GFX10-CU-NEXT: s_endpgm 1171; 1172; SKIP-CACHE-INV-LABEL: global_system_seq_cst_ret_atomicrmw: 1173; SKIP-CACHE-INV: ; %bb.0: ; %entry 1174; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1175; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1176; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1177; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1178; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1179; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1180; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1181; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 1182; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1183; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 1184; SKIP-CACHE-INV-NEXT: s_endpgm 1185 i32 addrspace(1)* %out, i32 %in) { 1186entry: 1187 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst 1188 store i32 %val, i32 addrspace(1)* %out, align 4 1189 ret void 1190} 1191 1192define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg( 1193; GFX6-LABEL: global_system_monotonic_monotonic_cmpxchg: 1194; GFX6: ; %bb.0: ; %entry 1195; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1196; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1197; GFX6-NEXT: s_mov_b32 s7, 0xf000 1198; GFX6-NEXT: s_mov_b32 s6, -1 1199; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1200; GFX6-NEXT: v_mov_b32_e32 v0, s0 1201; GFX6-NEXT: v_mov_b32_e32 v1, s1 1202; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1203; GFX6-NEXT: s_endpgm 1204; 1205; GFX7-LABEL: global_system_monotonic_monotonic_cmpxchg: 1206; GFX7: ; %bb.0: ; %entry 1207; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1208; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1209; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1210; GFX7-NEXT: s_add_u32 s0, s0, 16 1211; GFX7-NEXT: s_addc_u32 s1, s1, 0 1212; GFX7-NEXT: v_mov_b32_e32 v0, s0 1213; GFX7-NEXT: v_mov_b32_e32 v2, s2 1214; GFX7-NEXT: v_mov_b32_e32 v1, s1 1215; GFX7-NEXT: v_mov_b32_e32 v3, s3 1216; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1217; GFX7-NEXT: s_endpgm 1218; 1219; GFX10-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg: 1220; GFX10-WGP: ; %bb.0: ; %entry 1221; GFX10-WGP-NEXT: s_clause 0x1 1222; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1223; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1224; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1225; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1226; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1227; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1228; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1229; GFX10-WGP-NEXT: s_endpgm 1230; 1231; GFX10-CU-LABEL: global_system_monotonic_monotonic_cmpxchg: 1232; GFX10-CU: ; %bb.0: ; %entry 1233; GFX10-CU-NEXT: s_clause 0x1 1234; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1235; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1236; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1237; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1238; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1239; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1240; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1241; GFX10-CU-NEXT: s_endpgm 1242; 1243; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_cmpxchg: 1244; SKIP-CACHE-INV: ; %bb.0: ; %entry 1245; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1246; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1247; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1248; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1249; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1250; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1251; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1252; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1253; SKIP-CACHE-INV-NEXT: s_endpgm 1254 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1255entry: 1256 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1257 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in monotonic monotonic 1258 ret void 1259} 1260 1261define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( 1262; GFX6-LABEL: global_system_acquire_monotonic_cmpxchg: 1263; GFX6: ; %bb.0: ; %entry 1264; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1265; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1266; GFX6-NEXT: s_mov_b32 s7, 0xf000 1267; GFX6-NEXT: s_mov_b32 s6, -1 1268; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1269; GFX6-NEXT: v_mov_b32_e32 v0, s0 1270; GFX6-NEXT: v_mov_b32_e32 v1, s1 1271; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1272; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1273; GFX6-NEXT: buffer_wbinvl1 1274; GFX6-NEXT: s_endpgm 1275; 1276; GFX7-LABEL: global_system_acquire_monotonic_cmpxchg: 1277; GFX7: ; %bb.0: ; %entry 1278; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1279; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1280; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1281; GFX7-NEXT: s_add_u32 s0, s0, 16 1282; GFX7-NEXT: s_addc_u32 s1, s1, 0 1283; GFX7-NEXT: v_mov_b32_e32 v0, s0 1284; GFX7-NEXT: v_mov_b32_e32 v2, s2 1285; GFX7-NEXT: v_mov_b32_e32 v1, s1 1286; GFX7-NEXT: v_mov_b32_e32 v3, s3 1287; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1288; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1289; GFX7-NEXT: buffer_wbinvl1_vol 1290; GFX7-NEXT: s_endpgm 1291; 1292; GFX10-WGP-LABEL: global_system_acquire_monotonic_cmpxchg: 1293; GFX10-WGP: ; %bb.0: ; %entry 1294; GFX10-WGP-NEXT: s_clause 0x1 1295; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1296; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1297; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1298; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1299; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1300; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1301; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1302; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1303; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1304; GFX10-WGP-NEXT: buffer_gl0_inv 1305; GFX10-WGP-NEXT: buffer_gl1_inv 1306; GFX10-WGP-NEXT: s_endpgm 1307; 1308; GFX10-CU-LABEL: global_system_acquire_monotonic_cmpxchg: 1309; GFX10-CU: ; %bb.0: ; %entry 1310; GFX10-CU-NEXT: s_clause 0x1 1311; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1312; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1313; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1314; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1315; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1316; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1317; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1318; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1319; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1320; GFX10-CU-NEXT: buffer_gl0_inv 1321; GFX10-CU-NEXT: buffer_gl1_inv 1322; GFX10-CU-NEXT: s_endpgm 1323; 1324; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_cmpxchg: 1325; SKIP-CACHE-INV: ; %bb.0: ; %entry 1326; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1327; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1328; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1329; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1330; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1331; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1332; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1333; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1334; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1335; SKIP-CACHE-INV-NEXT: s_endpgm 1336 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1337entry: 1338 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1339 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire monotonic 1340 ret void 1341} 1342 1343define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( 1344; GFX6-LABEL: global_system_release_monotonic_cmpxchg: 1345; GFX6: ; %bb.0: ; %entry 1346; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1347; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1348; GFX6-NEXT: s_mov_b32 s7, 0xf000 1349; GFX6-NEXT: s_mov_b32 s6, -1 1350; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1351; GFX6-NEXT: v_mov_b32_e32 v0, s0 1352; GFX6-NEXT: v_mov_b32_e32 v1, s1 1353; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1354; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1355; GFX6-NEXT: s_endpgm 1356; 1357; GFX7-LABEL: global_system_release_monotonic_cmpxchg: 1358; GFX7: ; %bb.0: ; %entry 1359; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1360; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1361; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1362; GFX7-NEXT: s_add_u32 s0, s0, 16 1363; GFX7-NEXT: s_addc_u32 s1, s1, 0 1364; GFX7-NEXT: v_mov_b32_e32 v0, s0 1365; GFX7-NEXT: v_mov_b32_e32 v2, s2 1366; GFX7-NEXT: v_mov_b32_e32 v1, s1 1367; GFX7-NEXT: v_mov_b32_e32 v3, s3 1368; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1369; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1370; GFX7-NEXT: s_endpgm 1371; 1372; GFX10-WGP-LABEL: global_system_release_monotonic_cmpxchg: 1373; GFX10-WGP: ; %bb.0: ; %entry 1374; GFX10-WGP-NEXT: s_clause 0x1 1375; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1376; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1377; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1378; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1379; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1380; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1381; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1382; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1383; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1384; GFX10-WGP-NEXT: s_endpgm 1385; 1386; GFX10-CU-LABEL: global_system_release_monotonic_cmpxchg: 1387; GFX10-CU: ; %bb.0: ; %entry 1388; GFX10-CU-NEXT: s_clause 0x1 1389; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1390; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1391; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1392; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1393; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1394; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1395; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1396; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1397; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1398; GFX10-CU-NEXT: s_endpgm 1399; 1400; SKIP-CACHE-INV-LABEL: global_system_release_monotonic_cmpxchg: 1401; SKIP-CACHE-INV: ; %bb.0: ; %entry 1402; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1403; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1404; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1405; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1406; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1407; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1408; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1409; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1410; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1411; SKIP-CACHE-INV-NEXT: s_endpgm 1412 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1413entry: 1414 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1415 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release monotonic 1416 ret void 1417} 1418 1419define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( 1420; GFX6-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1421; GFX6: ; %bb.0: ; %entry 1422; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1423; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1424; GFX6-NEXT: s_mov_b32 s7, 0xf000 1425; GFX6-NEXT: s_mov_b32 s6, -1 1426; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1427; GFX6-NEXT: v_mov_b32_e32 v0, s0 1428; GFX6-NEXT: v_mov_b32_e32 v1, s1 1429; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1430; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1431; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1432; GFX6-NEXT: buffer_wbinvl1 1433; GFX6-NEXT: s_endpgm 1434; 1435; GFX7-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1436; GFX7: ; %bb.0: ; %entry 1437; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1438; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1439; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1440; GFX7-NEXT: s_add_u32 s0, s0, 16 1441; GFX7-NEXT: s_addc_u32 s1, s1, 0 1442; GFX7-NEXT: v_mov_b32_e32 v0, s0 1443; GFX7-NEXT: v_mov_b32_e32 v2, s2 1444; GFX7-NEXT: v_mov_b32_e32 v1, s1 1445; GFX7-NEXT: v_mov_b32_e32 v3, s3 1446; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1447; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1448; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1449; GFX7-NEXT: buffer_wbinvl1_vol 1450; GFX7-NEXT: s_endpgm 1451; 1452; GFX10-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1453; GFX10-WGP: ; %bb.0: ; %entry 1454; GFX10-WGP-NEXT: s_clause 0x1 1455; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1456; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1457; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1458; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1459; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1460; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1461; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1462; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1463; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1464; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1465; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1466; GFX10-WGP-NEXT: buffer_gl0_inv 1467; GFX10-WGP-NEXT: buffer_gl1_inv 1468; GFX10-WGP-NEXT: s_endpgm 1469; 1470; GFX10-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1471; GFX10-CU: ; %bb.0: ; %entry 1472; GFX10-CU-NEXT: s_clause 0x1 1473; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1474; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1475; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1476; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1477; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1478; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1479; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1480; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1481; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1482; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1483; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1484; GFX10-CU-NEXT: buffer_gl0_inv 1485; GFX10-CU-NEXT: buffer_gl1_inv 1486; GFX10-CU-NEXT: s_endpgm 1487; 1488; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_cmpxchg: 1489; SKIP-CACHE-INV: ; %bb.0: ; %entry 1490; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1491; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1492; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1493; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1494; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1495; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1496; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1497; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1498; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1499; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1500; SKIP-CACHE-INV-NEXT: s_endpgm 1501 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1502entry: 1503 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1504 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel monotonic 1505 ret void 1506} 1507 1508define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( 1509; GFX6-LABEL: global_system_seq_cst_monotonic_cmpxchg: 1510; GFX6: ; %bb.0: ; %entry 1511; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1512; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1513; GFX6-NEXT: s_mov_b32 s7, 0xf000 1514; GFX6-NEXT: s_mov_b32 s6, -1 1515; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1516; GFX6-NEXT: v_mov_b32_e32 v0, s0 1517; GFX6-NEXT: v_mov_b32_e32 v1, s1 1518; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1519; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1520; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1521; GFX6-NEXT: buffer_wbinvl1 1522; GFX6-NEXT: s_endpgm 1523; 1524; GFX7-LABEL: global_system_seq_cst_monotonic_cmpxchg: 1525; GFX7: ; %bb.0: ; %entry 1526; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1527; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1528; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1529; GFX7-NEXT: s_add_u32 s0, s0, 16 1530; GFX7-NEXT: s_addc_u32 s1, s1, 0 1531; GFX7-NEXT: v_mov_b32_e32 v0, s0 1532; GFX7-NEXT: v_mov_b32_e32 v2, s2 1533; GFX7-NEXT: v_mov_b32_e32 v1, s1 1534; GFX7-NEXT: v_mov_b32_e32 v3, s3 1535; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1536; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1537; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1538; GFX7-NEXT: buffer_wbinvl1_vol 1539; GFX7-NEXT: s_endpgm 1540; 1541; GFX10-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg: 1542; GFX10-WGP: ; %bb.0: ; %entry 1543; GFX10-WGP-NEXT: s_clause 0x1 1544; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1545; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1546; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1547; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1548; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1549; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1550; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1551; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1552; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1553; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1554; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1555; GFX10-WGP-NEXT: buffer_gl0_inv 1556; GFX10-WGP-NEXT: buffer_gl1_inv 1557; GFX10-WGP-NEXT: s_endpgm 1558; 1559; GFX10-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg: 1560; GFX10-CU: ; %bb.0: ; %entry 1561; GFX10-CU-NEXT: s_clause 0x1 1562; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1563; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1564; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1565; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1566; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1567; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1568; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1569; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1570; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1571; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1572; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1573; GFX10-CU-NEXT: buffer_gl0_inv 1574; GFX10-CU-NEXT: buffer_gl1_inv 1575; GFX10-CU-NEXT: s_endpgm 1576; 1577; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_cmpxchg: 1578; SKIP-CACHE-INV: ; %bb.0: ; %entry 1579; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1580; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1581; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1582; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1583; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1584; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1585; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1586; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1587; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1588; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1589; SKIP-CACHE-INV-NEXT: s_endpgm 1590 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1591entry: 1592 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1593 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst monotonic 1594 ret void 1595} 1596 1597define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( 1598; GFX6-LABEL: global_system_acquire_acquire_cmpxchg: 1599; GFX6: ; %bb.0: ; %entry 1600; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1601; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1602; GFX6-NEXT: s_mov_b32 s7, 0xf000 1603; GFX6-NEXT: s_mov_b32 s6, -1 1604; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1605; GFX6-NEXT: v_mov_b32_e32 v0, s0 1606; GFX6-NEXT: v_mov_b32_e32 v1, s1 1607; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1608; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1609; GFX6-NEXT: buffer_wbinvl1 1610; GFX6-NEXT: s_endpgm 1611; 1612; GFX7-LABEL: global_system_acquire_acquire_cmpxchg: 1613; GFX7: ; %bb.0: ; %entry 1614; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1615; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1616; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1617; GFX7-NEXT: s_add_u32 s0, s0, 16 1618; GFX7-NEXT: s_addc_u32 s1, s1, 0 1619; GFX7-NEXT: v_mov_b32_e32 v0, s0 1620; GFX7-NEXT: v_mov_b32_e32 v2, s2 1621; GFX7-NEXT: v_mov_b32_e32 v1, s1 1622; GFX7-NEXT: v_mov_b32_e32 v3, s3 1623; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1624; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1625; GFX7-NEXT: buffer_wbinvl1_vol 1626; GFX7-NEXT: s_endpgm 1627; 1628; GFX10-WGP-LABEL: global_system_acquire_acquire_cmpxchg: 1629; GFX10-WGP: ; %bb.0: ; %entry 1630; GFX10-WGP-NEXT: s_clause 0x1 1631; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1632; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1633; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1634; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1635; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1636; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1637; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1638; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1639; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1640; GFX10-WGP-NEXT: buffer_gl0_inv 1641; GFX10-WGP-NEXT: buffer_gl1_inv 1642; GFX10-WGP-NEXT: s_endpgm 1643; 1644; GFX10-CU-LABEL: global_system_acquire_acquire_cmpxchg: 1645; GFX10-CU: ; %bb.0: ; %entry 1646; GFX10-CU-NEXT: s_clause 0x1 1647; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1648; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1649; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1650; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1651; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1652; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1653; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1654; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1655; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1656; GFX10-CU-NEXT: buffer_gl0_inv 1657; GFX10-CU-NEXT: buffer_gl1_inv 1658; GFX10-CU-NEXT: s_endpgm 1659; 1660; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_cmpxchg: 1661; SKIP-CACHE-INV: ; %bb.0: ; %entry 1662; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1663; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1664; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1665; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1666; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1667; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1668; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1669; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1670; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1671; SKIP-CACHE-INV-NEXT: s_endpgm 1672 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1673entry: 1674 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1675 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire acquire 1676 ret void 1677} 1678 1679define amdgpu_kernel void @global_system_release_acquire_cmpxchg( 1680; GFX6-LABEL: global_system_release_acquire_cmpxchg: 1681; GFX6: ; %bb.0: ; %entry 1682; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1683; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1684; GFX6-NEXT: s_mov_b32 s7, 0xf000 1685; GFX6-NEXT: s_mov_b32 s6, -1 1686; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1687; GFX6-NEXT: v_mov_b32_e32 v0, s0 1688; GFX6-NEXT: v_mov_b32_e32 v1, s1 1689; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1690; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1691; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1692; GFX6-NEXT: buffer_wbinvl1 1693; GFX6-NEXT: s_endpgm 1694; 1695; GFX7-LABEL: global_system_release_acquire_cmpxchg: 1696; GFX7: ; %bb.0: ; %entry 1697; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1698; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1699; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1700; GFX7-NEXT: s_add_u32 s0, s0, 16 1701; GFX7-NEXT: s_addc_u32 s1, s1, 0 1702; GFX7-NEXT: v_mov_b32_e32 v0, s0 1703; GFX7-NEXT: v_mov_b32_e32 v2, s2 1704; GFX7-NEXT: v_mov_b32_e32 v1, s1 1705; GFX7-NEXT: v_mov_b32_e32 v3, s3 1706; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1707; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1708; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1709; GFX7-NEXT: buffer_wbinvl1_vol 1710; GFX7-NEXT: s_endpgm 1711; 1712; GFX10-WGP-LABEL: global_system_release_acquire_cmpxchg: 1713; GFX10-WGP: ; %bb.0: ; %entry 1714; GFX10-WGP-NEXT: s_clause 0x1 1715; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1716; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1717; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1718; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1719; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1720; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1721; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1722; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1723; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1724; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1725; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1726; GFX10-WGP-NEXT: buffer_gl0_inv 1727; GFX10-WGP-NEXT: buffer_gl1_inv 1728; GFX10-WGP-NEXT: s_endpgm 1729; 1730; GFX10-CU-LABEL: global_system_release_acquire_cmpxchg: 1731; GFX10-CU: ; %bb.0: ; %entry 1732; GFX10-CU-NEXT: s_clause 0x1 1733; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1734; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1735; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1736; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1737; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1738; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1739; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1740; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1741; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1742; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1743; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1744; GFX10-CU-NEXT: buffer_gl0_inv 1745; GFX10-CU-NEXT: buffer_gl1_inv 1746; GFX10-CU-NEXT: s_endpgm 1747; 1748; SKIP-CACHE-INV-LABEL: global_system_release_acquire_cmpxchg: 1749; SKIP-CACHE-INV: ; %bb.0: ; %entry 1750; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1751; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1752; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1753; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1754; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1755; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1756; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1757; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1758; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1759; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1760; SKIP-CACHE-INV-NEXT: s_endpgm 1761 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1762entry: 1763 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1764 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release acquire 1765 ret void 1766} 1767 1768define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( 1769; GFX6-LABEL: global_system_acq_rel_acquire_cmpxchg: 1770; GFX6: ; %bb.0: ; %entry 1771; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1772; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1773; GFX6-NEXT: s_mov_b32 s7, 0xf000 1774; GFX6-NEXT: s_mov_b32 s6, -1 1775; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1776; GFX6-NEXT: v_mov_b32_e32 v0, s0 1777; GFX6-NEXT: v_mov_b32_e32 v1, s1 1778; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1779; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1780; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1781; GFX6-NEXT: buffer_wbinvl1 1782; GFX6-NEXT: s_endpgm 1783; 1784; GFX7-LABEL: global_system_acq_rel_acquire_cmpxchg: 1785; GFX7: ; %bb.0: ; %entry 1786; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1787; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1788; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1789; GFX7-NEXT: s_add_u32 s0, s0, 16 1790; GFX7-NEXT: s_addc_u32 s1, s1, 0 1791; GFX7-NEXT: v_mov_b32_e32 v0, s0 1792; GFX7-NEXT: v_mov_b32_e32 v2, s2 1793; GFX7-NEXT: v_mov_b32_e32 v1, s1 1794; GFX7-NEXT: v_mov_b32_e32 v3, s3 1795; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1796; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1797; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1798; GFX7-NEXT: buffer_wbinvl1_vol 1799; GFX7-NEXT: s_endpgm 1800; 1801; GFX10-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg: 1802; GFX10-WGP: ; %bb.0: ; %entry 1803; GFX10-WGP-NEXT: s_clause 0x1 1804; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1805; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1806; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1807; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1808; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1809; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1810; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1811; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1812; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1813; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1814; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1815; GFX10-WGP-NEXT: buffer_gl0_inv 1816; GFX10-WGP-NEXT: buffer_gl1_inv 1817; GFX10-WGP-NEXT: s_endpgm 1818; 1819; GFX10-CU-LABEL: global_system_acq_rel_acquire_cmpxchg: 1820; GFX10-CU: ; %bb.0: ; %entry 1821; GFX10-CU-NEXT: s_clause 0x1 1822; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1823; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1824; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1825; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1826; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1827; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1828; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1829; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1830; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1831; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1832; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1833; GFX10-CU-NEXT: buffer_gl0_inv 1834; GFX10-CU-NEXT: buffer_gl1_inv 1835; GFX10-CU-NEXT: s_endpgm 1836; 1837; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_cmpxchg: 1838; SKIP-CACHE-INV: ; %bb.0: ; %entry 1839; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1840; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1841; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1842; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1843; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1844; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1845; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1846; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1847; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1848; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1849; SKIP-CACHE-INV-NEXT: s_endpgm 1850 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1851entry: 1852 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1853 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel acquire 1854 ret void 1855} 1856 1857define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( 1858; GFX6-LABEL: global_system_seq_cst_acquire_cmpxchg: 1859; GFX6: ; %bb.0: ; %entry 1860; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1861; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1862; GFX6-NEXT: s_mov_b32 s7, 0xf000 1863; GFX6-NEXT: s_mov_b32 s6, -1 1864; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1865; GFX6-NEXT: v_mov_b32_e32 v0, s0 1866; GFX6-NEXT: v_mov_b32_e32 v1, s1 1867; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1868; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1869; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1870; GFX6-NEXT: buffer_wbinvl1 1871; GFX6-NEXT: s_endpgm 1872; 1873; GFX7-LABEL: global_system_seq_cst_acquire_cmpxchg: 1874; GFX7: ; %bb.0: ; %entry 1875; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1876; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1877; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1878; GFX7-NEXT: s_add_u32 s0, s0, 16 1879; GFX7-NEXT: s_addc_u32 s1, s1, 0 1880; GFX7-NEXT: v_mov_b32_e32 v0, s0 1881; GFX7-NEXT: v_mov_b32_e32 v2, s2 1882; GFX7-NEXT: v_mov_b32_e32 v1, s1 1883; GFX7-NEXT: v_mov_b32_e32 v3, s3 1884; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1885; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1886; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1887; GFX7-NEXT: buffer_wbinvl1_vol 1888; GFX7-NEXT: s_endpgm 1889; 1890; GFX10-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg: 1891; GFX10-WGP: ; %bb.0: ; %entry 1892; GFX10-WGP-NEXT: s_clause 0x1 1893; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1894; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1895; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1896; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1897; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1898; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1899; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1900; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1901; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1902; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1903; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1904; GFX10-WGP-NEXT: buffer_gl0_inv 1905; GFX10-WGP-NEXT: buffer_gl1_inv 1906; GFX10-WGP-NEXT: s_endpgm 1907; 1908; GFX10-CU-LABEL: global_system_seq_cst_acquire_cmpxchg: 1909; GFX10-CU: ; %bb.0: ; %entry 1910; GFX10-CU-NEXT: s_clause 0x1 1911; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1912; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1913; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 1914; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1915; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1916; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1917; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1918; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1919; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1920; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1921; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1922; GFX10-CU-NEXT: buffer_gl0_inv 1923; GFX10-CU-NEXT: buffer_gl1_inv 1924; GFX10-CU-NEXT: s_endpgm 1925; 1926; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_cmpxchg: 1927; SKIP-CACHE-INV: ; %bb.0: ; %entry 1928; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1929; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1930; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 1931; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 1932; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1933; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1934; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1935; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1936; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1937; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1938; SKIP-CACHE-INV-NEXT: s_endpgm 1939 i32 addrspace(1)* %out, i32 %in, i32 %old) { 1940entry: 1941 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 1942 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst acquire 1943 ret void 1944} 1945 1946define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( 1947; GFX6-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 1948; GFX6: ; %bb.0: ; %entry 1949; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1950; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 1951; GFX6-NEXT: s_mov_b32 s7, 0xf000 1952; GFX6-NEXT: s_mov_b32 s6, -1 1953; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1954; GFX6-NEXT: v_mov_b32_e32 v0, s0 1955; GFX6-NEXT: v_mov_b32_e32 v1, s1 1956; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1957; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 1958; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1959; GFX6-NEXT: buffer_wbinvl1 1960; GFX6-NEXT: s_endpgm 1961; 1962; GFX7-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 1963; GFX7: ; %bb.0: ; %entry 1964; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1965; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 1966; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1967; GFX7-NEXT: s_add_u32 s0, s0, 16 1968; GFX7-NEXT: s_addc_u32 s1, s1, 0 1969; GFX7-NEXT: v_mov_b32_e32 v0, s0 1970; GFX7-NEXT: v_mov_b32_e32 v2, s2 1971; GFX7-NEXT: v_mov_b32_e32 v1, s1 1972; GFX7-NEXT: v_mov_b32_e32 v3, s3 1973; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1974; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 1975; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1976; GFX7-NEXT: buffer_wbinvl1_vol 1977; GFX7-NEXT: s_endpgm 1978; 1979; GFX10-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 1980; GFX10-WGP: ; %bb.0: ; %entry 1981; GFX10-WGP-NEXT: s_clause 0x1 1982; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 1983; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 1984; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 1985; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1986; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1987; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1988; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1989; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1990; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 1991; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1992; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1993; GFX10-WGP-NEXT: buffer_gl0_inv 1994; GFX10-WGP-NEXT: buffer_gl1_inv 1995; GFX10-WGP-NEXT: s_endpgm 1996; 1997; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 1998; GFX10-CU: ; %bb.0: ; %entry 1999; GFX10-CU-NEXT: s_clause 0x1 2000; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2001; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2002; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2003; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2004; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2005; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2006; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2007; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2008; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 2009; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2010; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2011; GFX10-CU-NEXT: buffer_gl0_inv 2012; GFX10-CU-NEXT: buffer_gl1_inv 2013; GFX10-CU-NEXT: s_endpgm 2014; 2015; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_cmpxchg: 2016; SKIP-CACHE-INV: ; %bb.0: ; %entry 2017; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2018; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2019; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2020; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2021; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2022; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2023; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2024; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2025; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 2026; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2027; SKIP-CACHE-INV-NEXT: s_endpgm 2028 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2029entry: 2030 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2031 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 2032 ret void 2033} 2034 2035define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( 2036; GFX6-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2037; GFX6: ; %bb.0: ; %entry 2038; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2039; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2040; GFX6-NEXT: s_mov_b32 s7, 0xf000 2041; GFX6-NEXT: s_mov_b32 s6, -1 2042; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2043; GFX6-NEXT: v_mov_b32_e32 v0, s0 2044; GFX6-NEXT: v_mov_b32_e32 v1, s1 2045; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2046; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2047; GFX6-NEXT: buffer_wbinvl1 2048; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2049; GFX6-NEXT: s_endpgm 2050; 2051; GFX7-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2052; GFX7: ; %bb.0: ; %entry 2053; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2054; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2055; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2056; GFX7-NEXT: s_add_u32 s4, s0, 16 2057; GFX7-NEXT: s_addc_u32 s5, s1, 0 2058; GFX7-NEXT: v_mov_b32_e32 v0, s4 2059; GFX7-NEXT: v_mov_b32_e32 v2, s2 2060; GFX7-NEXT: v_mov_b32_e32 v1, s5 2061; GFX7-NEXT: v_mov_b32_e32 v3, s3 2062; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2063; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2064; GFX7-NEXT: buffer_wbinvl1_vol 2065; GFX7-NEXT: v_mov_b32_e32 v0, s0 2066; GFX7-NEXT: v_mov_b32_e32 v1, s1 2067; GFX7-NEXT: flat_store_dword v[0:1], v2 2068; GFX7-NEXT: s_endpgm 2069; 2070; GFX10-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2071; GFX10-WGP: ; %bb.0: ; %entry 2072; GFX10-WGP-NEXT: s_clause 0x1 2073; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2074; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2075; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2076; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2077; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2078; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2079; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2080; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2081; GFX10-WGP-NEXT: buffer_gl0_inv 2082; GFX10-WGP-NEXT: buffer_gl1_inv 2083; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2084; GFX10-WGP-NEXT: s_endpgm 2085; 2086; GFX10-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2087; GFX10-CU: ; %bb.0: ; %entry 2088; GFX10-CU-NEXT: s_clause 0x1 2089; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2090; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2091; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2092; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2093; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2094; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2095; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2096; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2097; GFX10-CU-NEXT: buffer_gl0_inv 2098; GFX10-CU-NEXT: buffer_gl1_inv 2099; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2100; GFX10-CU-NEXT: s_endpgm 2101; 2102; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_ret_cmpxchg: 2103; SKIP-CACHE-INV: ; %bb.0: ; %entry 2104; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2105; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2106; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2107; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2108; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2109; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2110; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2111; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2112; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2113; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2114; SKIP-CACHE-INV-NEXT: s_endpgm 2115 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2116entry: 2117 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2118 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire monotonic 2119 %val0 = extractvalue { i32, i1 } %val, 0 2120 store i32 %val0, i32 addrspace(1)* %out, align 4 2121 ret void 2122} 2123 2124define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( 2125; GFX6-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2126; GFX6: ; %bb.0: ; %entry 2127; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2128; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2129; GFX6-NEXT: s_mov_b32 s7, 0xf000 2130; GFX6-NEXT: s_mov_b32 s6, -1 2131; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2132; GFX6-NEXT: v_mov_b32_e32 v0, s0 2133; GFX6-NEXT: v_mov_b32_e32 v1, s1 2134; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2135; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2136; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2137; GFX6-NEXT: buffer_wbinvl1 2138; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2139; GFX6-NEXT: s_endpgm 2140; 2141; GFX7-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2142; GFX7: ; %bb.0: ; %entry 2143; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2144; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2145; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2146; GFX7-NEXT: s_add_u32 s4, s0, 16 2147; GFX7-NEXT: s_addc_u32 s5, s1, 0 2148; GFX7-NEXT: v_mov_b32_e32 v0, s4 2149; GFX7-NEXT: v_mov_b32_e32 v2, s2 2150; GFX7-NEXT: v_mov_b32_e32 v1, s5 2151; GFX7-NEXT: v_mov_b32_e32 v3, s3 2152; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2153; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2154; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2155; GFX7-NEXT: buffer_wbinvl1_vol 2156; GFX7-NEXT: v_mov_b32_e32 v0, s0 2157; GFX7-NEXT: v_mov_b32_e32 v1, s1 2158; GFX7-NEXT: flat_store_dword v[0:1], v2 2159; GFX7-NEXT: s_endpgm 2160; 2161; GFX10-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2162; GFX10-WGP: ; %bb.0: ; %entry 2163; GFX10-WGP-NEXT: s_clause 0x1 2164; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2165; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2166; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2167; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2168; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2169; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2170; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2171; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2172; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2173; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2174; GFX10-WGP-NEXT: buffer_gl0_inv 2175; GFX10-WGP-NEXT: buffer_gl1_inv 2176; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2177; GFX10-WGP-NEXT: s_endpgm 2178; 2179; GFX10-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2180; GFX10-CU: ; %bb.0: ; %entry 2181; GFX10-CU-NEXT: s_clause 0x1 2182; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2183; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2184; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2185; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2186; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2187; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2188; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2189; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2190; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2191; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2192; GFX10-CU-NEXT: buffer_gl0_inv 2193; GFX10-CU-NEXT: buffer_gl1_inv 2194; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2195; GFX10-CU-NEXT: s_endpgm 2196; 2197; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: 2198; SKIP-CACHE-INV: ; %bb.0: ; %entry 2199; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2200; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2201; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2202; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2203; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2204; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2205; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2206; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2207; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2208; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2209; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2210; SKIP-CACHE-INV-NEXT: s_endpgm 2211 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2212entry: 2213 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2214 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel monotonic 2215 %val0 = extractvalue { i32, i1 } %val, 0 2216 store i32 %val0, i32 addrspace(1)* %out, align 4 2217 ret void 2218} 2219 2220define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( 2221; GFX6-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2222; GFX6: ; %bb.0: ; %entry 2223; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2224; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2225; GFX6-NEXT: s_mov_b32 s7, 0xf000 2226; GFX6-NEXT: s_mov_b32 s6, -1 2227; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2228; GFX6-NEXT: v_mov_b32_e32 v0, s0 2229; GFX6-NEXT: v_mov_b32_e32 v1, s1 2230; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2231; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2232; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2233; GFX6-NEXT: buffer_wbinvl1 2234; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2235; GFX6-NEXT: s_endpgm 2236; 2237; GFX7-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2238; GFX7: ; %bb.0: ; %entry 2239; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2240; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2241; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2242; GFX7-NEXT: s_add_u32 s4, s0, 16 2243; GFX7-NEXT: s_addc_u32 s5, s1, 0 2244; GFX7-NEXT: v_mov_b32_e32 v0, s4 2245; GFX7-NEXT: v_mov_b32_e32 v2, s2 2246; GFX7-NEXT: v_mov_b32_e32 v1, s5 2247; GFX7-NEXT: v_mov_b32_e32 v3, s3 2248; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2249; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2250; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2251; GFX7-NEXT: buffer_wbinvl1_vol 2252; GFX7-NEXT: v_mov_b32_e32 v0, s0 2253; GFX7-NEXT: v_mov_b32_e32 v1, s1 2254; GFX7-NEXT: flat_store_dword v[0:1], v2 2255; GFX7-NEXT: s_endpgm 2256; 2257; GFX10-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2258; GFX10-WGP: ; %bb.0: ; %entry 2259; GFX10-WGP-NEXT: s_clause 0x1 2260; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2261; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2262; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2263; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2264; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2265; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2266; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2267; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2268; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2269; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2270; GFX10-WGP-NEXT: buffer_gl0_inv 2271; GFX10-WGP-NEXT: buffer_gl1_inv 2272; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2273; GFX10-WGP-NEXT: s_endpgm 2274; 2275; GFX10-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2276; GFX10-CU: ; %bb.0: ; %entry 2277; GFX10-CU-NEXT: s_clause 0x1 2278; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2279; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2280; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2281; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2282; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2283; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2284; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2285; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2286; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2287; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2288; GFX10-CU-NEXT: buffer_gl0_inv 2289; GFX10-CU-NEXT: buffer_gl1_inv 2290; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2291; GFX10-CU-NEXT: s_endpgm 2292; 2293; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: 2294; SKIP-CACHE-INV: ; %bb.0: ; %entry 2295; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2296; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2297; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2298; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2299; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2300; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2301; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2302; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2303; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2304; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2305; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2306; SKIP-CACHE-INV-NEXT: s_endpgm 2307 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2308entry: 2309 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2310 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst monotonic 2311 %val0 = extractvalue { i32, i1 } %val, 0 2312 store i32 %val0, i32 addrspace(1)* %out, align 4 2313 ret void 2314} 2315 2316define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( 2317; GFX6-LABEL: global_system_acquire_acquire_ret_cmpxchg: 2318; GFX6: ; %bb.0: ; %entry 2319; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2320; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2321; GFX6-NEXT: s_mov_b32 s7, 0xf000 2322; GFX6-NEXT: s_mov_b32 s6, -1 2323; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2324; GFX6-NEXT: v_mov_b32_e32 v0, s0 2325; GFX6-NEXT: v_mov_b32_e32 v1, s1 2326; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2327; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2328; GFX6-NEXT: buffer_wbinvl1 2329; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2330; GFX6-NEXT: s_endpgm 2331; 2332; GFX7-LABEL: global_system_acquire_acquire_ret_cmpxchg: 2333; GFX7: ; %bb.0: ; %entry 2334; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2335; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2336; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2337; GFX7-NEXT: s_add_u32 s4, s0, 16 2338; GFX7-NEXT: s_addc_u32 s5, s1, 0 2339; GFX7-NEXT: v_mov_b32_e32 v0, s4 2340; GFX7-NEXT: v_mov_b32_e32 v2, s2 2341; GFX7-NEXT: v_mov_b32_e32 v1, s5 2342; GFX7-NEXT: v_mov_b32_e32 v3, s3 2343; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2344; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2345; GFX7-NEXT: buffer_wbinvl1_vol 2346; GFX7-NEXT: v_mov_b32_e32 v0, s0 2347; GFX7-NEXT: v_mov_b32_e32 v1, s1 2348; GFX7-NEXT: flat_store_dword v[0:1], v2 2349; GFX7-NEXT: s_endpgm 2350; 2351; GFX10-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg: 2352; GFX10-WGP: ; %bb.0: ; %entry 2353; GFX10-WGP-NEXT: s_clause 0x1 2354; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2355; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2356; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2357; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2358; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2359; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2360; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2361; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2362; GFX10-WGP-NEXT: buffer_gl0_inv 2363; GFX10-WGP-NEXT: buffer_gl1_inv 2364; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2365; GFX10-WGP-NEXT: s_endpgm 2366; 2367; GFX10-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg: 2368; GFX10-CU: ; %bb.0: ; %entry 2369; GFX10-CU-NEXT: s_clause 0x1 2370; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2371; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2372; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2373; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2374; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2375; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2376; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2377; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2378; GFX10-CU-NEXT: buffer_gl0_inv 2379; GFX10-CU-NEXT: buffer_gl1_inv 2380; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2381; GFX10-CU-NEXT: s_endpgm 2382; 2383; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_ret_cmpxchg: 2384; SKIP-CACHE-INV: ; %bb.0: ; %entry 2385; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2386; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2387; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2388; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2389; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2390; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2391; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2392; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2393; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2394; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2395; SKIP-CACHE-INV-NEXT: s_endpgm 2396 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2397entry: 2398 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2399 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acquire acquire 2400 %val0 = extractvalue { i32, i1 } %val, 0 2401 store i32 %val0, i32 addrspace(1)* %out, align 4 2402 ret void 2403} 2404 2405define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( 2406; GFX6-LABEL: global_system_release_acquire_ret_cmpxchg: 2407; GFX6: ; %bb.0: ; %entry 2408; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2409; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2410; GFX6-NEXT: s_mov_b32 s7, 0xf000 2411; GFX6-NEXT: s_mov_b32 s6, -1 2412; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2413; GFX6-NEXT: v_mov_b32_e32 v0, s0 2414; GFX6-NEXT: v_mov_b32_e32 v1, s1 2415; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2416; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2417; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2418; GFX6-NEXT: buffer_wbinvl1 2419; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2420; GFX6-NEXT: s_endpgm 2421; 2422; GFX7-LABEL: global_system_release_acquire_ret_cmpxchg: 2423; GFX7: ; %bb.0: ; %entry 2424; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2425; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2426; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2427; GFX7-NEXT: s_add_u32 s4, s0, 16 2428; GFX7-NEXT: s_addc_u32 s5, s1, 0 2429; GFX7-NEXT: v_mov_b32_e32 v0, s4 2430; GFX7-NEXT: v_mov_b32_e32 v2, s2 2431; GFX7-NEXT: v_mov_b32_e32 v1, s5 2432; GFX7-NEXT: v_mov_b32_e32 v3, s3 2433; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2434; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2435; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2436; GFX7-NEXT: buffer_wbinvl1_vol 2437; GFX7-NEXT: v_mov_b32_e32 v0, s0 2438; GFX7-NEXT: v_mov_b32_e32 v1, s1 2439; GFX7-NEXT: flat_store_dword v[0:1], v2 2440; GFX7-NEXT: s_endpgm 2441; 2442; GFX10-WGP-LABEL: global_system_release_acquire_ret_cmpxchg: 2443; GFX10-WGP: ; %bb.0: ; %entry 2444; GFX10-WGP-NEXT: s_clause 0x1 2445; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2446; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2447; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2448; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2449; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2450; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2451; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2452; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2453; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2454; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2455; GFX10-WGP-NEXT: buffer_gl0_inv 2456; GFX10-WGP-NEXT: buffer_gl1_inv 2457; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2458; GFX10-WGP-NEXT: s_endpgm 2459; 2460; GFX10-CU-LABEL: global_system_release_acquire_ret_cmpxchg: 2461; GFX10-CU: ; %bb.0: ; %entry 2462; GFX10-CU-NEXT: s_clause 0x1 2463; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2464; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2465; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2466; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2467; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2468; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2469; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2470; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2471; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2472; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2473; GFX10-CU-NEXT: buffer_gl0_inv 2474; GFX10-CU-NEXT: buffer_gl1_inv 2475; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2476; GFX10-CU-NEXT: s_endpgm 2477; 2478; SKIP-CACHE-INV-LABEL: global_system_release_acquire_ret_cmpxchg: 2479; SKIP-CACHE-INV: ; %bb.0: ; %entry 2480; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2481; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2482; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2483; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2484; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2485; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2486; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2487; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2488; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2489; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2490; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2491; SKIP-CACHE-INV-NEXT: s_endpgm 2492 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2493entry: 2494 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2495 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in release acquire 2496 %val0 = extractvalue { i32, i1 } %val, 0 2497 store i32 %val0, i32 addrspace(1)* %out, align 4 2498 ret void 2499} 2500 2501define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( 2502; GFX6-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 2503; GFX6: ; %bb.0: ; %entry 2504; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2505; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2506; GFX6-NEXT: s_mov_b32 s7, 0xf000 2507; GFX6-NEXT: s_mov_b32 s6, -1 2508; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2509; GFX6-NEXT: v_mov_b32_e32 v0, s0 2510; GFX6-NEXT: v_mov_b32_e32 v1, s1 2511; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2512; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2513; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2514; GFX6-NEXT: buffer_wbinvl1 2515; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2516; GFX6-NEXT: s_endpgm 2517; 2518; GFX7-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 2519; GFX7: ; %bb.0: ; %entry 2520; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2521; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2522; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2523; GFX7-NEXT: s_add_u32 s4, s0, 16 2524; GFX7-NEXT: s_addc_u32 s5, s1, 0 2525; GFX7-NEXT: v_mov_b32_e32 v0, s4 2526; GFX7-NEXT: v_mov_b32_e32 v2, s2 2527; GFX7-NEXT: v_mov_b32_e32 v1, s5 2528; GFX7-NEXT: v_mov_b32_e32 v3, s3 2529; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2530; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2531; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2532; GFX7-NEXT: buffer_wbinvl1_vol 2533; GFX7-NEXT: v_mov_b32_e32 v0, s0 2534; GFX7-NEXT: v_mov_b32_e32 v1, s1 2535; GFX7-NEXT: flat_store_dword v[0:1], v2 2536; GFX7-NEXT: s_endpgm 2537; 2538; GFX10-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 2539; GFX10-WGP: ; %bb.0: ; %entry 2540; GFX10-WGP-NEXT: s_clause 0x1 2541; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2542; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2543; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2544; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2545; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2546; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2547; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2548; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2549; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2550; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2551; GFX10-WGP-NEXT: buffer_gl0_inv 2552; GFX10-WGP-NEXT: buffer_gl1_inv 2553; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2554; GFX10-WGP-NEXT: s_endpgm 2555; 2556; GFX10-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 2557; GFX10-CU: ; %bb.0: ; %entry 2558; GFX10-CU-NEXT: s_clause 0x1 2559; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2560; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2561; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2562; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2563; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2564; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2565; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2566; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2567; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2568; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2569; GFX10-CU-NEXT: buffer_gl0_inv 2570; GFX10-CU-NEXT: buffer_gl1_inv 2571; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2572; GFX10-CU-NEXT: s_endpgm 2573; 2574; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: 2575; SKIP-CACHE-INV: ; %bb.0: ; %entry 2576; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2577; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2578; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2579; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2580; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2581; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2582; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2583; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2584; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2585; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2586; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2587; SKIP-CACHE-INV-NEXT: s_endpgm 2588 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2589entry: 2590 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2591 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in acq_rel acquire 2592 %val0 = extractvalue { i32, i1 } %val, 0 2593 store i32 %val0, i32 addrspace(1)* %out, align 4 2594 ret void 2595} 2596 2597define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( 2598; GFX6-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 2599; GFX6: ; %bb.0: ; %entry 2600; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2601; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2602; GFX6-NEXT: s_mov_b32 s7, 0xf000 2603; GFX6-NEXT: s_mov_b32 s6, -1 2604; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2605; GFX6-NEXT: v_mov_b32_e32 v0, s0 2606; GFX6-NEXT: v_mov_b32_e32 v1, s1 2607; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2608; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2609; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2610; GFX6-NEXT: buffer_wbinvl1 2611; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2612; GFX6-NEXT: s_endpgm 2613; 2614; GFX7-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 2615; GFX7: ; %bb.0: ; %entry 2616; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2617; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2618; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2619; GFX7-NEXT: s_add_u32 s4, s0, 16 2620; GFX7-NEXT: s_addc_u32 s5, s1, 0 2621; GFX7-NEXT: v_mov_b32_e32 v0, s4 2622; GFX7-NEXT: v_mov_b32_e32 v2, s2 2623; GFX7-NEXT: v_mov_b32_e32 v1, s5 2624; GFX7-NEXT: v_mov_b32_e32 v3, s3 2625; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2626; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2627; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2628; GFX7-NEXT: buffer_wbinvl1_vol 2629; GFX7-NEXT: v_mov_b32_e32 v0, s0 2630; GFX7-NEXT: v_mov_b32_e32 v1, s1 2631; GFX7-NEXT: flat_store_dword v[0:1], v2 2632; GFX7-NEXT: s_endpgm 2633; 2634; GFX10-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 2635; GFX10-WGP: ; %bb.0: ; %entry 2636; GFX10-WGP-NEXT: s_clause 0x1 2637; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2638; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2639; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2640; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2641; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2642; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2643; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2644; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2645; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2646; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2647; GFX10-WGP-NEXT: buffer_gl0_inv 2648; GFX10-WGP-NEXT: buffer_gl1_inv 2649; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2650; GFX10-WGP-NEXT: s_endpgm 2651; 2652; GFX10-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 2653; GFX10-CU: ; %bb.0: ; %entry 2654; GFX10-CU-NEXT: s_clause 0x1 2655; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2656; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2657; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2658; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2659; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2660; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2661; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2662; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2663; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2664; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2665; GFX10-CU-NEXT: buffer_gl0_inv 2666; GFX10-CU-NEXT: buffer_gl1_inv 2667; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2668; GFX10-CU-NEXT: s_endpgm 2669; 2670; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: 2671; SKIP-CACHE-INV: ; %bb.0: ; %entry 2672; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2673; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2674; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2675; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2676; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2677; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2678; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2679; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2680; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2681; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2682; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2683; SKIP-CACHE-INV-NEXT: s_endpgm 2684 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2685entry: 2686 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2687 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst acquire 2688 %val0 = extractvalue { i32, i1 } %val, 0 2689 store i32 %val0, i32 addrspace(1)* %out, align 4 2690 ret void 2691} 2692 2693define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( 2694; GFX6-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 2695; GFX6: ; %bb.0: ; %entry 2696; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2697; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2698; GFX6-NEXT: s_mov_b32 s7, 0xf000 2699; GFX6-NEXT: s_mov_b32 s6, -1 2700; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2701; GFX6-NEXT: v_mov_b32_e32 v0, s0 2702; GFX6-NEXT: v_mov_b32_e32 v1, s1 2703; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2704; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2705; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2706; GFX6-NEXT: buffer_wbinvl1 2707; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2708; GFX6-NEXT: s_endpgm 2709; 2710; GFX7-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 2711; GFX7: ; %bb.0: ; %entry 2712; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2713; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 2714; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2715; GFX7-NEXT: s_add_u32 s4, s0, 16 2716; GFX7-NEXT: s_addc_u32 s5, s1, 0 2717; GFX7-NEXT: v_mov_b32_e32 v0, s4 2718; GFX7-NEXT: v_mov_b32_e32 v2, s2 2719; GFX7-NEXT: v_mov_b32_e32 v1, s5 2720; GFX7-NEXT: v_mov_b32_e32 v3, s3 2721; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2722; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 2723; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2724; GFX7-NEXT: buffer_wbinvl1_vol 2725; GFX7-NEXT: v_mov_b32_e32 v0, s0 2726; GFX7-NEXT: v_mov_b32_e32 v1, s1 2727; GFX7-NEXT: flat_store_dword v[0:1], v2 2728; GFX7-NEXT: s_endpgm 2729; 2730; GFX10-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 2731; GFX10-WGP: ; %bb.0: ; %entry 2732; GFX10-WGP-NEXT: s_clause 0x1 2733; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2734; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2735; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 2736; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2737; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2738; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2739; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2740; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2741; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2742; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2743; GFX10-WGP-NEXT: buffer_gl0_inv 2744; GFX10-WGP-NEXT: buffer_gl1_inv 2745; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 2746; GFX10-WGP-NEXT: s_endpgm 2747; 2748; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 2749; GFX10-CU: ; %bb.0: ; %entry 2750; GFX10-CU-NEXT: s_clause 0x1 2751; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 2752; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 2753; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 2754; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2755; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2756; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2757; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2758; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2759; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 2760; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2761; GFX10-CU-NEXT: buffer_gl0_inv 2762; GFX10-CU-NEXT: buffer_gl1_inv 2763; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 2764; GFX10-CU-NEXT: s_endpgm 2765; 2766; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: 2767; SKIP-CACHE-INV: ; %bb.0: ; %entry 2768; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2769; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 2770; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 2771; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 2772; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2773; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2774; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2775; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2776; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 2777; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2778; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2779; SKIP-CACHE-INV-NEXT: s_endpgm 2780 i32 addrspace(1)* %out, i32 %in, i32 %old) { 2781entry: 2782 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 2783 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 2784 %val0 = extractvalue { i32, i1 } %val, 0 2785 store i32 %val0, i32 addrspace(1)* %out, align 4 2786 ret void 2787} 2788 2789define amdgpu_kernel void @global_system_one_as_unordered_load( 2790; GFX6-LABEL: global_system_one_as_unordered_load: 2791; GFX6: ; %bb.0: ; %entry 2792; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2793; GFX6-NEXT: s_mov_b32 s3, 0xf000 2794; GFX6-NEXT: s_mov_b32 s2, -1 2795; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2796; GFX6-NEXT: s_mov_b32 s0, s4 2797; GFX6-NEXT: s_mov_b32 s1, s5 2798; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 2799; GFX6-NEXT: s_mov_b32 s4, s6 2800; GFX6-NEXT: s_mov_b32 s5, s7 2801; GFX6-NEXT: s_mov_b32 s6, s2 2802; GFX6-NEXT: s_mov_b32 s7, s3 2803; GFX6-NEXT: s_waitcnt vmcnt(0) 2804; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2805; GFX6-NEXT: s_endpgm 2806; 2807; GFX7-LABEL: global_system_one_as_unordered_load: 2808; GFX7: ; %bb.0: ; %entry 2809; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2810; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2811; GFX7-NEXT: v_mov_b32_e32 v0, s0 2812; GFX7-NEXT: v_mov_b32_e32 v1, s1 2813; GFX7-NEXT: flat_load_dword v0, v[0:1] 2814; GFX7-NEXT: v_mov_b32_e32 v2, s2 2815; GFX7-NEXT: v_mov_b32_e32 v3, s3 2816; GFX7-NEXT: s_waitcnt vmcnt(0) 2817; GFX7-NEXT: flat_store_dword v[2:3], v0 2818; GFX7-NEXT: s_endpgm 2819; 2820; GFX10-WGP-LABEL: global_system_one_as_unordered_load: 2821; GFX10-WGP: ; %bb.0: ; %entry 2822; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2823; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2824; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2825; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] 2826; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2827; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 2828; GFX10-WGP-NEXT: s_endpgm 2829; 2830; GFX10-CU-LABEL: global_system_one_as_unordered_load: 2831; GFX10-CU: ; %bb.0: ; %entry 2832; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2833; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2834; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2835; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] 2836; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2837; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 2838; GFX10-CU-NEXT: s_endpgm 2839; 2840; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_load: 2841; SKIP-CACHE-INV: ; %bb.0: ; %entry 2842; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2843; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 2844; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 2845; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2846; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 2847; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 2848; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 2849; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 2850; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 2851; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 2852; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 2853; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2854; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2855; SKIP-CACHE-INV-NEXT: s_endpgm 2856 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 2857entry: 2858 %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") unordered, align 4 2859 store i32 %val, i32 addrspace(1)* %out 2860 ret void 2861} 2862 2863define amdgpu_kernel void @global_system_one_as_monotonic_load( 2864; GFX6-LABEL: global_system_one_as_monotonic_load: 2865; GFX6: ; %bb.0: ; %entry 2866; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2867; GFX6-NEXT: s_mov_b32 s3, 0xf000 2868; GFX6-NEXT: s_mov_b32 s2, -1 2869; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2870; GFX6-NEXT: s_mov_b32 s0, s4 2871; GFX6-NEXT: s_mov_b32 s1, s5 2872; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 2873; GFX6-NEXT: s_mov_b32 s4, s6 2874; GFX6-NEXT: s_mov_b32 s5, s7 2875; GFX6-NEXT: s_mov_b32 s6, s2 2876; GFX6-NEXT: s_mov_b32 s7, s3 2877; GFX6-NEXT: s_waitcnt vmcnt(0) 2878; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2879; GFX6-NEXT: s_endpgm 2880; 2881; GFX7-LABEL: global_system_one_as_monotonic_load: 2882; GFX7: ; %bb.0: ; %entry 2883; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2884; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2885; GFX7-NEXT: v_mov_b32_e32 v0, s0 2886; GFX7-NEXT: v_mov_b32_e32 v1, s1 2887; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 2888; GFX7-NEXT: v_mov_b32_e32 v2, s2 2889; GFX7-NEXT: v_mov_b32_e32 v3, s3 2890; GFX7-NEXT: s_waitcnt vmcnt(0) 2891; GFX7-NEXT: flat_store_dword v[2:3], v0 2892; GFX7-NEXT: s_endpgm 2893; 2894; GFX10-WGP-LABEL: global_system_one_as_monotonic_load: 2895; GFX10-WGP: ; %bb.0: ; %entry 2896; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2897; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2898; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2899; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 2900; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2901; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 2902; GFX10-WGP-NEXT: s_endpgm 2903; 2904; GFX10-CU-LABEL: global_system_one_as_monotonic_load: 2905; GFX10-CU: ; %bb.0: ; %entry 2906; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2907; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2908; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2909; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 2910; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2911; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 2912; GFX10-CU-NEXT: s_endpgm 2913; 2914; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_load: 2915; SKIP-CACHE-INV: ; %bb.0: ; %entry 2916; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2917; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 2918; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 2919; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2920; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 2921; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 2922; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 2923; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 2924; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 2925; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 2926; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 2927; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 2928; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 2929; SKIP-CACHE-INV-NEXT: s_endpgm 2930 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 2931entry: 2932 %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") monotonic, align 4 2933 store i32 %val, i32 addrspace(1)* %out 2934 ret void 2935} 2936 2937define amdgpu_kernel void @global_system_one_as_acquire_load( 2938; GFX6-LABEL: global_system_one_as_acquire_load: 2939; GFX6: ; %bb.0: ; %entry 2940; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2941; GFX6-NEXT: s_mov_b32 s3, 0xf000 2942; GFX6-NEXT: s_mov_b32 s2, -1 2943; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2944; GFX6-NEXT: s_mov_b32 s0, s4 2945; GFX6-NEXT: s_mov_b32 s1, s5 2946; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 2947; GFX6-NEXT: s_waitcnt vmcnt(0) 2948; GFX6-NEXT: buffer_wbinvl1 2949; GFX6-NEXT: s_mov_b32 s4, s6 2950; GFX6-NEXT: s_mov_b32 s5, s7 2951; GFX6-NEXT: s_mov_b32 s6, s2 2952; GFX6-NEXT: s_mov_b32 s7, s3 2953; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 2954; GFX6-NEXT: s_endpgm 2955; 2956; GFX7-LABEL: global_system_one_as_acquire_load: 2957; GFX7: ; %bb.0: ; %entry 2958; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2959; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2960; GFX7-NEXT: v_mov_b32_e32 v0, s0 2961; GFX7-NEXT: v_mov_b32_e32 v1, s1 2962; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 2963; GFX7-NEXT: s_waitcnt vmcnt(0) 2964; GFX7-NEXT: buffer_wbinvl1_vol 2965; GFX7-NEXT: v_mov_b32_e32 v2, s2 2966; GFX7-NEXT: v_mov_b32_e32 v3, s3 2967; GFX7-NEXT: flat_store_dword v[2:3], v0 2968; GFX7-NEXT: s_endpgm 2969; 2970; GFX10-WGP-LABEL: global_system_one_as_acquire_load: 2971; GFX10-WGP: ; %bb.0: ; %entry 2972; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2973; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 2974; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2975; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 2976; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 2977; GFX10-WGP-NEXT: buffer_gl0_inv 2978; GFX10-WGP-NEXT: buffer_gl1_inv 2979; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 2980; GFX10-WGP-NEXT: s_endpgm 2981; 2982; GFX10-CU-LABEL: global_system_one_as_acquire_load: 2983; GFX10-CU: ; %bb.0: ; %entry 2984; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2985; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 2986; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2987; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 2988; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 2989; GFX10-CU-NEXT: buffer_gl0_inv 2990; GFX10-CU-NEXT: buffer_gl1_inv 2991; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 2992; GFX10-CU-NEXT: s_endpgm 2993; 2994; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_load: 2995; SKIP-CACHE-INV: ; %bb.0: ; %entry 2996; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2997; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 2998; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 2999; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3000; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3001; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3002; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3003; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3004; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3005; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3006; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3007; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3008; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3009; SKIP-CACHE-INV-NEXT: s_endpgm 3010 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3011entry: 3012 %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") acquire, align 4 3013 store i32 %val, i32 addrspace(1)* %out 3014 ret void 3015} 3016 3017define amdgpu_kernel void @global_system_one_as_seq_cst_load( 3018; GFX6-LABEL: global_system_one_as_seq_cst_load: 3019; GFX6: ; %bb.0: ; %entry 3020; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3021; GFX6-NEXT: s_mov_b32 s3, 0xf000 3022; GFX6-NEXT: s_mov_b32 s2, -1 3023; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3024; GFX6-NEXT: s_mov_b32 s0, s4 3025; GFX6-NEXT: s_mov_b32 s1, s5 3026; GFX6-NEXT: s_waitcnt vmcnt(0) 3027; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3028; GFX6-NEXT: s_waitcnt vmcnt(0) 3029; GFX6-NEXT: buffer_wbinvl1 3030; GFX6-NEXT: s_mov_b32 s4, s6 3031; GFX6-NEXT: s_mov_b32 s5, s7 3032; GFX6-NEXT: s_mov_b32 s6, s2 3033; GFX6-NEXT: s_mov_b32 s7, s3 3034; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3035; GFX6-NEXT: s_endpgm 3036; 3037; GFX7-LABEL: global_system_one_as_seq_cst_load: 3038; GFX7: ; %bb.0: ; %entry 3039; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3040; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3041; GFX7-NEXT: v_mov_b32_e32 v0, s0 3042; GFX7-NEXT: v_mov_b32_e32 v1, s1 3043; GFX7-NEXT: s_waitcnt vmcnt(0) 3044; GFX7-NEXT: flat_load_dword v0, v[0:1] glc 3045; GFX7-NEXT: s_waitcnt vmcnt(0) 3046; GFX7-NEXT: buffer_wbinvl1_vol 3047; GFX7-NEXT: v_mov_b32_e32 v2, s2 3048; GFX7-NEXT: v_mov_b32_e32 v3, s3 3049; GFX7-NEXT: flat_store_dword v[2:3], v0 3050; GFX7-NEXT: s_endpgm 3051; 3052; GFX10-WGP-LABEL: global_system_one_as_seq_cst_load: 3053; GFX10-WGP: ; %bb.0: ; %entry 3054; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3055; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3056; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3057; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3058; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3059; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3060; GFX10-WGP-NEXT: buffer_gl0_inv 3061; GFX10-WGP-NEXT: buffer_gl1_inv 3062; GFX10-WGP-NEXT: global_store_dword v0, v1, s[2:3] 3063; GFX10-WGP-NEXT: s_endpgm 3064; 3065; GFX10-CU-LABEL: global_system_one_as_seq_cst_load: 3066; GFX10-CU: ; %bb.0: ; %entry 3067; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3068; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3069; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3070; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3071; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc 3072; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3073; GFX10-CU-NEXT: buffer_gl0_inv 3074; GFX10-CU-NEXT: buffer_gl1_inv 3075; GFX10-CU-NEXT: global_store_dword v0, v1, s[2:3] 3076; GFX10-CU-NEXT: s_endpgm 3077; 3078; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_load: 3079; SKIP-CACHE-INV: ; %bb.0: ; %entry 3080; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3081; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3082; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3083; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3084; SKIP-CACHE-INV-NEXT: s_mov_b32 s0, s4 3085; SKIP-CACHE-INV-NEXT: s_mov_b32 s1, s5 3086; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3087; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 3088; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3089; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s6 3090; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s7 3091; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, s2 3092; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, s3 3093; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3094; SKIP-CACHE-INV-NEXT: s_endpgm 3095 i32 addrspace(1)* %in, i32 addrspace(1)* %out) { 3096entry: 3097 %val = load atomic i32, i32 addrspace(1)* %in syncscope("one-as") seq_cst, align 4 3098 store i32 %val, i32 addrspace(1)* %out 3099 ret void 3100} 3101 3102define amdgpu_kernel void @global_system_one_as_unordered_store( 3103; GFX6-LABEL: global_system_one_as_unordered_store: 3104; GFX6: ; %bb.0: ; %entry 3105; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 3106; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3107; GFX6-NEXT: s_mov_b32 s3, 0xf000 3108; GFX6-NEXT: s_mov_b32 s2, -1 3109; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3110; GFX6-NEXT: v_mov_b32_e32 v0, s4 3111; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3112; GFX6-NEXT: s_endpgm 3113; 3114; GFX7-LABEL: global_system_one_as_unordered_store: 3115; GFX7: ; %bb.0: ; %entry 3116; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 3117; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 3118; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3119; GFX7-NEXT: v_mov_b32_e32 v2, s2 3120; GFX7-NEXT: v_mov_b32_e32 v0, s0 3121; GFX7-NEXT: v_mov_b32_e32 v1, s1 3122; GFX7-NEXT: flat_store_dword v[0:1], v2 3123; GFX7-NEXT: s_endpgm 3124; 3125; GFX10-WGP-LABEL: global_system_one_as_unordered_store: 3126; GFX10-WGP: ; %bb.0: ; %entry 3127; GFX10-WGP-NEXT: s_clause 0x1 3128; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 3129; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3130; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3131; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3132; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3133; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 3134; GFX10-WGP-NEXT: s_endpgm 3135; 3136; GFX10-CU-LABEL: global_system_one_as_unordered_store: 3137; GFX10-CU: ; %bb.0: ; %entry 3138; GFX10-CU-NEXT: s_clause 0x1 3139; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 3140; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3141; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3142; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3143; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3144; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 3145; GFX10-CU-NEXT: s_endpgm 3146; 3147; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_store: 3148; SKIP-CACHE-INV: ; %bb.0: ; %entry 3149; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 3150; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3151; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3152; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3153; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3154; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 3155; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 3156; SKIP-CACHE-INV-NEXT: s_endpgm 3157 i32 %in, i32 addrspace(1)* %out) { 3158entry: 3159 store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") unordered, align 4 3160 ret void 3161} 3162 3163define amdgpu_kernel void @global_system_one_as_monotonic_store( 3164; GFX6-LABEL: global_system_one_as_monotonic_store: 3165; GFX6: ; %bb.0: ; %entry 3166; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 3167; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3168; GFX6-NEXT: s_mov_b32 s3, 0xf000 3169; GFX6-NEXT: s_mov_b32 s2, -1 3170; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3171; GFX6-NEXT: v_mov_b32_e32 v0, s4 3172; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3173; GFX6-NEXT: s_endpgm 3174; 3175; GFX7-LABEL: global_system_one_as_monotonic_store: 3176; GFX7: ; %bb.0: ; %entry 3177; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 3178; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 3179; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3180; GFX7-NEXT: v_mov_b32_e32 v2, s2 3181; GFX7-NEXT: v_mov_b32_e32 v0, s0 3182; GFX7-NEXT: v_mov_b32_e32 v1, s1 3183; GFX7-NEXT: flat_store_dword v[0:1], v2 3184; GFX7-NEXT: s_endpgm 3185; 3186; GFX10-WGP-LABEL: global_system_one_as_monotonic_store: 3187; GFX10-WGP: ; %bb.0: ; %entry 3188; GFX10-WGP-NEXT: s_clause 0x1 3189; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 3190; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3191; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3192; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3193; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3194; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 3195; GFX10-WGP-NEXT: s_endpgm 3196; 3197; GFX10-CU-LABEL: global_system_one_as_monotonic_store: 3198; GFX10-CU: ; %bb.0: ; %entry 3199; GFX10-CU-NEXT: s_clause 0x1 3200; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 3201; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3202; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3203; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3204; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3205; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 3206; GFX10-CU-NEXT: s_endpgm 3207; 3208; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_store: 3209; SKIP-CACHE-INV: ; %bb.0: ; %entry 3210; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 3211; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3212; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3213; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3214; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3215; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 3216; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 3217; SKIP-CACHE-INV-NEXT: s_endpgm 3218 i32 %in, i32 addrspace(1)* %out) { 3219entry: 3220 store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") monotonic, align 4 3221 ret void 3222} 3223 3224define amdgpu_kernel void @global_system_one_as_release_store( 3225; GFX6-LABEL: global_system_one_as_release_store: 3226; GFX6: ; %bb.0: ; %entry 3227; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 3228; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3229; GFX6-NEXT: s_mov_b32 s3, 0xf000 3230; GFX6-NEXT: s_mov_b32 s2, -1 3231; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3232; GFX6-NEXT: v_mov_b32_e32 v0, s4 3233; GFX6-NEXT: s_waitcnt vmcnt(0) 3234; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3235; GFX6-NEXT: s_endpgm 3236; 3237; GFX7-LABEL: global_system_one_as_release_store: 3238; GFX7: ; %bb.0: ; %entry 3239; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 3240; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 3241; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3242; GFX7-NEXT: v_mov_b32_e32 v2, s2 3243; GFX7-NEXT: v_mov_b32_e32 v0, s0 3244; GFX7-NEXT: v_mov_b32_e32 v1, s1 3245; GFX7-NEXT: s_waitcnt vmcnt(0) 3246; GFX7-NEXT: flat_store_dword v[0:1], v2 3247; GFX7-NEXT: s_endpgm 3248; 3249; GFX10-WGP-LABEL: global_system_one_as_release_store: 3250; GFX10-WGP: ; %bb.0: ; %entry 3251; GFX10-WGP-NEXT: s_clause 0x1 3252; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 3253; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3254; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3255; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3256; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3257; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3258; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3259; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 3260; GFX10-WGP-NEXT: s_endpgm 3261; 3262; GFX10-CU-LABEL: global_system_one_as_release_store: 3263; GFX10-CU: ; %bb.0: ; %entry 3264; GFX10-CU-NEXT: s_clause 0x1 3265; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 3266; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3267; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3268; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3269; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3270; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3271; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3272; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 3273; GFX10-CU-NEXT: s_endpgm 3274; 3275; SKIP-CACHE-INV-LABEL: global_system_one_as_release_store: 3276; SKIP-CACHE-INV: ; %bb.0: ; %entry 3277; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 3278; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3279; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3280; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3281; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3282; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 3283; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3284; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 3285; SKIP-CACHE-INV-NEXT: s_endpgm 3286 i32 %in, i32 addrspace(1)* %out) { 3287entry: 3288 store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") release, align 4 3289 ret void 3290} 3291 3292define amdgpu_kernel void @global_system_one_as_seq_cst_store( 3293; GFX6-LABEL: global_system_one_as_seq_cst_store: 3294; GFX6: ; %bb.0: ; %entry 3295; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 3296; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3297; GFX6-NEXT: s_mov_b32 s3, 0xf000 3298; GFX6-NEXT: s_mov_b32 s2, -1 3299; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3300; GFX6-NEXT: v_mov_b32_e32 v0, s4 3301; GFX6-NEXT: s_waitcnt vmcnt(0) 3302; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 3303; GFX6-NEXT: s_endpgm 3304; 3305; GFX7-LABEL: global_system_one_as_seq_cst_store: 3306; GFX7: ; %bb.0: ; %entry 3307; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 3308; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 3309; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3310; GFX7-NEXT: v_mov_b32_e32 v2, s2 3311; GFX7-NEXT: v_mov_b32_e32 v0, s0 3312; GFX7-NEXT: v_mov_b32_e32 v1, s1 3313; GFX7-NEXT: s_waitcnt vmcnt(0) 3314; GFX7-NEXT: flat_store_dword v[0:1], v2 3315; GFX7-NEXT: s_endpgm 3316; 3317; GFX10-WGP-LABEL: global_system_one_as_seq_cst_store: 3318; GFX10-WGP: ; %bb.0: ; %entry 3319; GFX10-WGP-NEXT: s_clause 0x1 3320; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x0 3321; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3322; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3323; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3324; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3325; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3326; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3327; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 3328; GFX10-WGP-NEXT: s_endpgm 3329; 3330; GFX10-CU-LABEL: global_system_one_as_seq_cst_store: 3331; GFX10-CU: ; %bb.0: ; %entry 3332; GFX10-CU-NEXT: s_clause 0x1 3333; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x0 3334; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3335; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3336; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3337; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3338; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3339; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3340; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 3341; GFX10-CU-NEXT: s_endpgm 3342; 3343; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_store: 3344; SKIP-CACHE-INV: ; %bb.0: ; %entry 3345; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 3346; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3347; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 3348; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 3349; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3350; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 3351; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3352; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 3353; SKIP-CACHE-INV-NEXT: s_endpgm 3354 i32 %in, i32 addrspace(1)* %out) { 3355entry: 3356 store atomic i32 %in, i32 addrspace(1)* %out syncscope("one-as") seq_cst, align 4 3357 ret void 3358} 3359 3360define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw( 3361; GFX6-LABEL: global_system_one_as_monotonic_atomicrmw: 3362; GFX6: ; %bb.0: ; %entry 3363; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3364; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 3365; GFX6-NEXT: s_mov_b32 s7, 0xf000 3366; GFX6-NEXT: s_mov_b32 s6, -1 3367; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3368; GFX6-NEXT: v_mov_b32_e32 v0, s0 3369; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3370; GFX6-NEXT: s_endpgm 3371; 3372; GFX7-LABEL: global_system_one_as_monotonic_atomicrmw: 3373; GFX7: ; %bb.0: ; %entry 3374; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3375; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 3376; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3377; GFX7-NEXT: v_mov_b32_e32 v0, s0 3378; GFX7-NEXT: v_mov_b32_e32 v1, s1 3379; GFX7-NEXT: v_mov_b32_e32 v2, s2 3380; GFX7-NEXT: flat_atomic_swap v[0:1], v2 3381; GFX7-NEXT: s_endpgm 3382; 3383; GFX10-WGP-LABEL: global_system_one_as_monotonic_atomicrmw: 3384; GFX10-WGP: ; %bb.0: ; %entry 3385; GFX10-WGP-NEXT: s_clause 0x1 3386; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 3387; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3388; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3389; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3390; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3391; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 3392; GFX10-WGP-NEXT: s_endpgm 3393; 3394; GFX10-CU-LABEL: global_system_one_as_monotonic_atomicrmw: 3395; GFX10-CU: ; %bb.0: ; %entry 3396; GFX10-CU-NEXT: s_clause 0x1 3397; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 3398; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3399; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3400; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3401; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3402; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 3403; GFX10-CU-NEXT: s_endpgm 3404; 3405; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_atomicrmw: 3406; SKIP-CACHE-INV: ; %bb.0: ; %entry 3407; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3408; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3409; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3410; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3411; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3412; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3413; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3414; SKIP-CACHE-INV-NEXT: s_endpgm 3415 i32 addrspace(1)* %out, i32 %in) { 3416entry: 3417 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") monotonic 3418 ret void 3419} 3420 3421define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( 3422; GFX6-LABEL: global_system_one_as_acquire_atomicrmw: 3423; GFX6: ; %bb.0: ; %entry 3424; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3425; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 3426; GFX6-NEXT: s_mov_b32 s7, 0xf000 3427; GFX6-NEXT: s_mov_b32 s6, -1 3428; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3429; GFX6-NEXT: v_mov_b32_e32 v0, s0 3430; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3431; GFX6-NEXT: s_waitcnt vmcnt(0) 3432; GFX6-NEXT: buffer_wbinvl1 3433; GFX6-NEXT: s_endpgm 3434; 3435; GFX7-LABEL: global_system_one_as_acquire_atomicrmw: 3436; GFX7: ; %bb.0: ; %entry 3437; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3438; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 3439; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3440; GFX7-NEXT: v_mov_b32_e32 v0, s0 3441; GFX7-NEXT: v_mov_b32_e32 v1, s1 3442; GFX7-NEXT: v_mov_b32_e32 v2, s2 3443; GFX7-NEXT: flat_atomic_swap v[0:1], v2 3444; GFX7-NEXT: s_waitcnt vmcnt(0) 3445; GFX7-NEXT: buffer_wbinvl1_vol 3446; GFX7-NEXT: s_endpgm 3447; 3448; GFX10-WGP-LABEL: global_system_one_as_acquire_atomicrmw: 3449; GFX10-WGP: ; %bb.0: ; %entry 3450; GFX10-WGP-NEXT: s_clause 0x1 3451; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 3452; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3453; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3454; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3455; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3456; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 3457; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3458; GFX10-WGP-NEXT: buffer_gl0_inv 3459; GFX10-WGP-NEXT: buffer_gl1_inv 3460; GFX10-WGP-NEXT: s_endpgm 3461; 3462; GFX10-CU-LABEL: global_system_one_as_acquire_atomicrmw: 3463; GFX10-CU: ; %bb.0: ; %entry 3464; GFX10-CU-NEXT: s_clause 0x1 3465; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 3466; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3467; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3468; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3469; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3470; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 3471; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3472; GFX10-CU-NEXT: buffer_gl0_inv 3473; GFX10-CU-NEXT: buffer_gl1_inv 3474; GFX10-CU-NEXT: s_endpgm 3475; 3476; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_atomicrmw: 3477; SKIP-CACHE-INV: ; %bb.0: ; %entry 3478; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3479; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3480; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3481; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3482; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3483; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3484; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3485; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3486; SKIP-CACHE-INV-NEXT: s_endpgm 3487 i32 addrspace(1)* %out, i32 %in) { 3488entry: 3489 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acquire 3490 ret void 3491} 3492 3493define amdgpu_kernel void @global_system_one_as_release_atomicrmw( 3494; GFX6-LABEL: global_system_one_as_release_atomicrmw: 3495; GFX6: ; %bb.0: ; %entry 3496; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3497; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 3498; GFX6-NEXT: s_mov_b32 s7, 0xf000 3499; GFX6-NEXT: s_mov_b32 s6, -1 3500; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3501; GFX6-NEXT: v_mov_b32_e32 v0, s0 3502; GFX6-NEXT: s_waitcnt vmcnt(0) 3503; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3504; GFX6-NEXT: s_endpgm 3505; 3506; GFX7-LABEL: global_system_one_as_release_atomicrmw: 3507; GFX7: ; %bb.0: ; %entry 3508; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3509; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 3510; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3511; GFX7-NEXT: v_mov_b32_e32 v0, s0 3512; GFX7-NEXT: v_mov_b32_e32 v1, s1 3513; GFX7-NEXT: v_mov_b32_e32 v2, s2 3514; GFX7-NEXT: s_waitcnt vmcnt(0) 3515; GFX7-NEXT: flat_atomic_swap v[0:1], v2 3516; GFX7-NEXT: s_endpgm 3517; 3518; GFX10-WGP-LABEL: global_system_one_as_release_atomicrmw: 3519; GFX10-WGP: ; %bb.0: ; %entry 3520; GFX10-WGP-NEXT: s_clause 0x1 3521; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 3522; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3523; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3524; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3525; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3526; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3527; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3528; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 3529; GFX10-WGP-NEXT: s_endpgm 3530; 3531; GFX10-CU-LABEL: global_system_one_as_release_atomicrmw: 3532; GFX10-CU: ; %bb.0: ; %entry 3533; GFX10-CU-NEXT: s_clause 0x1 3534; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 3535; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3536; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3537; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3538; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3539; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3540; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3541; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 3542; GFX10-CU-NEXT: s_endpgm 3543; 3544; SKIP-CACHE-INV-LABEL: global_system_one_as_release_atomicrmw: 3545; SKIP-CACHE-INV: ; %bb.0: ; %entry 3546; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3547; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3548; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3549; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3550; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3551; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3552; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3553; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3554; SKIP-CACHE-INV-NEXT: s_endpgm 3555 i32 addrspace(1)* %out, i32 %in) { 3556entry: 3557 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") release 3558 ret void 3559} 3560 3561define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( 3562; GFX6-LABEL: global_system_one_as_acq_rel_atomicrmw: 3563; GFX6: ; %bb.0: ; %entry 3564; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3565; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 3566; GFX6-NEXT: s_mov_b32 s7, 0xf000 3567; GFX6-NEXT: s_mov_b32 s6, -1 3568; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3569; GFX6-NEXT: v_mov_b32_e32 v0, s0 3570; GFX6-NEXT: s_waitcnt vmcnt(0) 3571; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3572; GFX6-NEXT: s_waitcnt vmcnt(0) 3573; GFX6-NEXT: buffer_wbinvl1 3574; GFX6-NEXT: s_endpgm 3575; 3576; GFX7-LABEL: global_system_one_as_acq_rel_atomicrmw: 3577; GFX7: ; %bb.0: ; %entry 3578; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3579; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 3580; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3581; GFX7-NEXT: v_mov_b32_e32 v0, s0 3582; GFX7-NEXT: v_mov_b32_e32 v1, s1 3583; GFX7-NEXT: v_mov_b32_e32 v2, s2 3584; GFX7-NEXT: s_waitcnt vmcnt(0) 3585; GFX7-NEXT: flat_atomic_swap v[0:1], v2 3586; GFX7-NEXT: s_waitcnt vmcnt(0) 3587; GFX7-NEXT: buffer_wbinvl1_vol 3588; GFX7-NEXT: s_endpgm 3589; 3590; GFX10-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw: 3591; GFX10-WGP: ; %bb.0: ; %entry 3592; GFX10-WGP-NEXT: s_clause 0x1 3593; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 3594; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3595; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3596; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3597; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3598; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3599; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3600; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 3601; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3602; GFX10-WGP-NEXT: buffer_gl0_inv 3603; GFX10-WGP-NEXT: buffer_gl1_inv 3604; GFX10-WGP-NEXT: s_endpgm 3605; 3606; GFX10-CU-LABEL: global_system_one_as_acq_rel_atomicrmw: 3607; GFX10-CU: ; %bb.0: ; %entry 3608; GFX10-CU-NEXT: s_clause 0x1 3609; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 3610; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3611; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3612; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3613; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3614; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3615; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3616; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 3617; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3618; GFX10-CU-NEXT: buffer_gl0_inv 3619; GFX10-CU-NEXT: buffer_gl1_inv 3620; GFX10-CU-NEXT: s_endpgm 3621; 3622; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_atomicrmw: 3623; SKIP-CACHE-INV: ; %bb.0: ; %entry 3624; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3625; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3626; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3627; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3628; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3629; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3630; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3631; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3632; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3633; SKIP-CACHE-INV-NEXT: s_endpgm 3634 i32 addrspace(1)* %out, i32 %in) { 3635entry: 3636 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acq_rel 3637 ret void 3638} 3639 3640define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( 3641; GFX6-LABEL: global_system_one_as_seq_cst_atomicrmw: 3642; GFX6: ; %bb.0: ; %entry 3643; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3644; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 3645; GFX6-NEXT: s_mov_b32 s7, 0xf000 3646; GFX6-NEXT: s_mov_b32 s6, -1 3647; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3648; GFX6-NEXT: v_mov_b32_e32 v0, s0 3649; GFX6-NEXT: s_waitcnt vmcnt(0) 3650; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3651; GFX6-NEXT: s_waitcnt vmcnt(0) 3652; GFX6-NEXT: buffer_wbinvl1 3653; GFX6-NEXT: s_endpgm 3654; 3655; GFX7-LABEL: global_system_one_as_seq_cst_atomicrmw: 3656; GFX7: ; %bb.0: ; %entry 3657; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3658; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 3659; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3660; GFX7-NEXT: v_mov_b32_e32 v0, s0 3661; GFX7-NEXT: v_mov_b32_e32 v1, s1 3662; GFX7-NEXT: v_mov_b32_e32 v2, s2 3663; GFX7-NEXT: s_waitcnt vmcnt(0) 3664; GFX7-NEXT: flat_atomic_swap v[0:1], v2 3665; GFX7-NEXT: s_waitcnt vmcnt(0) 3666; GFX7-NEXT: buffer_wbinvl1_vol 3667; GFX7-NEXT: s_endpgm 3668; 3669; GFX10-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw: 3670; GFX10-WGP: ; %bb.0: ; %entry 3671; GFX10-WGP-NEXT: s_clause 0x1 3672; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 3673; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3674; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3675; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3676; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3677; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3678; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3679; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] 3680; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3681; GFX10-WGP-NEXT: buffer_gl0_inv 3682; GFX10-WGP-NEXT: buffer_gl1_inv 3683; GFX10-WGP-NEXT: s_endpgm 3684; 3685; GFX10-CU-LABEL: global_system_one_as_seq_cst_atomicrmw: 3686; GFX10-CU: ; %bb.0: ; %entry 3687; GFX10-CU-NEXT: s_clause 0x1 3688; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 3689; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3690; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3691; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3692; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3693; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3694; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3695; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] 3696; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3697; GFX10-CU-NEXT: buffer_gl0_inv 3698; GFX10-CU-NEXT: buffer_gl1_inv 3699; GFX10-CU-NEXT: s_endpgm 3700; 3701; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_atomicrmw: 3702; SKIP-CACHE-INV: ; %bb.0: ; %entry 3703; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3704; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3705; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3706; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3707; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3708; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3709; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3710; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 3711; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3712; SKIP-CACHE-INV-NEXT: s_endpgm 3713 i32 addrspace(1)* %out, i32 %in) { 3714entry: 3715 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") seq_cst 3716 ret void 3717} 3718 3719define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( 3720; GFX6-LABEL: global_system_one_as_acquire_ret_atomicrmw: 3721; GFX6: ; %bb.0: ; %entry 3722; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3723; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 3724; GFX6-NEXT: s_mov_b32 s7, 0xf000 3725; GFX6-NEXT: s_mov_b32 s6, -1 3726; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3727; GFX6-NEXT: v_mov_b32_e32 v0, s0 3728; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 3729; GFX6-NEXT: s_waitcnt vmcnt(0) 3730; GFX6-NEXT: buffer_wbinvl1 3731; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3732; GFX6-NEXT: s_endpgm 3733; 3734; GFX7-LABEL: global_system_one_as_acquire_ret_atomicrmw: 3735; GFX7: ; %bb.0: ; %entry 3736; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3737; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 3738; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3739; GFX7-NEXT: v_mov_b32_e32 v0, s0 3740; GFX7-NEXT: v_mov_b32_e32 v1, s1 3741; GFX7-NEXT: v_mov_b32_e32 v2, s2 3742; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 3743; GFX7-NEXT: s_waitcnt vmcnt(0) 3744; GFX7-NEXT: buffer_wbinvl1_vol 3745; GFX7-NEXT: flat_store_dword v[0:1], v2 3746; GFX7-NEXT: s_endpgm 3747; 3748; GFX10-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw: 3749; GFX10-WGP: ; %bb.0: ; %entry 3750; GFX10-WGP-NEXT: s_clause 0x1 3751; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 3752; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3753; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3754; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3755; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3756; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 3757; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3758; GFX10-WGP-NEXT: buffer_gl0_inv 3759; GFX10-WGP-NEXT: buffer_gl1_inv 3760; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 3761; GFX10-WGP-NEXT: s_endpgm 3762; 3763; GFX10-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw: 3764; GFX10-CU: ; %bb.0: ; %entry 3765; GFX10-CU-NEXT: s_clause 0x1 3766; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 3767; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3768; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3769; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3770; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3771; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 3772; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3773; GFX10-CU-NEXT: buffer_gl0_inv 3774; GFX10-CU-NEXT: buffer_gl1_inv 3775; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 3776; GFX10-CU-NEXT: s_endpgm 3777; 3778; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_ret_atomicrmw: 3779; SKIP-CACHE-INV: ; %bb.0: ; %entry 3780; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3781; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3782; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3783; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3784; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3785; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3786; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 3787; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3788; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3789; SKIP-CACHE-INV-NEXT: s_endpgm 3790 i32 addrspace(1)* %out, i32 %in) { 3791entry: 3792 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acquire 3793 store i32 %val, i32 addrspace(1)* %out, align 4 3794 ret void 3795} 3796 3797define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( 3798; GFX6-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 3799; GFX6: ; %bb.0: ; %entry 3800; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3801; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 3802; GFX6-NEXT: s_mov_b32 s7, 0xf000 3803; GFX6-NEXT: s_mov_b32 s6, -1 3804; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3805; GFX6-NEXT: v_mov_b32_e32 v0, s0 3806; GFX6-NEXT: s_waitcnt vmcnt(0) 3807; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 3808; GFX6-NEXT: s_waitcnt vmcnt(0) 3809; GFX6-NEXT: buffer_wbinvl1 3810; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3811; GFX6-NEXT: s_endpgm 3812; 3813; GFX7-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 3814; GFX7: ; %bb.0: ; %entry 3815; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3816; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 3817; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3818; GFX7-NEXT: v_mov_b32_e32 v0, s0 3819; GFX7-NEXT: v_mov_b32_e32 v1, s1 3820; GFX7-NEXT: v_mov_b32_e32 v2, s2 3821; GFX7-NEXT: s_waitcnt vmcnt(0) 3822; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 3823; GFX7-NEXT: s_waitcnt vmcnt(0) 3824; GFX7-NEXT: buffer_wbinvl1_vol 3825; GFX7-NEXT: flat_store_dword v[0:1], v2 3826; GFX7-NEXT: s_endpgm 3827; 3828; GFX10-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 3829; GFX10-WGP: ; %bb.0: ; %entry 3830; GFX10-WGP-NEXT: s_clause 0x1 3831; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 3832; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3833; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3834; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3835; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3836; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3837; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3838; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 3839; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3840; GFX10-WGP-NEXT: buffer_gl0_inv 3841; GFX10-WGP-NEXT: buffer_gl1_inv 3842; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 3843; GFX10-WGP-NEXT: s_endpgm 3844; 3845; GFX10-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 3846; GFX10-CU: ; %bb.0: ; %entry 3847; GFX10-CU-NEXT: s_clause 0x1 3848; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 3849; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3850; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3851; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3852; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3853; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3854; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3855; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 3856; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3857; GFX10-CU-NEXT: buffer_gl0_inv 3858; GFX10-CU-NEXT: buffer_gl1_inv 3859; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 3860; GFX10-CU-NEXT: s_endpgm 3861; 3862; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: 3863; SKIP-CACHE-INV: ; %bb.0: ; %entry 3864; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3865; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3866; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3867; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3868; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3869; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3870; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3871; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 3872; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3873; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3874; SKIP-CACHE-INV-NEXT: s_endpgm 3875 i32 addrspace(1)* %out, i32 %in) { 3876entry: 3877 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") acq_rel 3878 store i32 %val, i32 addrspace(1)* %out, align 4 3879 ret void 3880} 3881 3882define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( 3883; GFX6-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 3884; GFX6: ; %bb.0: ; %entry 3885; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3886; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb 3887; GFX6-NEXT: s_mov_b32 s7, 0xf000 3888; GFX6-NEXT: s_mov_b32 s6, -1 3889; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3890; GFX6-NEXT: v_mov_b32_e32 v0, s0 3891; GFX6-NEXT: s_waitcnt vmcnt(0) 3892; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 3893; GFX6-NEXT: s_waitcnt vmcnt(0) 3894; GFX6-NEXT: buffer_wbinvl1 3895; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 3896; GFX6-NEXT: s_endpgm 3897; 3898; GFX7-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 3899; GFX7: ; %bb.0: ; %entry 3900; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3901; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2 3902; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3903; GFX7-NEXT: v_mov_b32_e32 v0, s0 3904; GFX7-NEXT: v_mov_b32_e32 v1, s1 3905; GFX7-NEXT: v_mov_b32_e32 v2, s2 3906; GFX7-NEXT: s_waitcnt vmcnt(0) 3907; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc 3908; GFX7-NEXT: s_waitcnt vmcnt(0) 3909; GFX7-NEXT: buffer_wbinvl1_vol 3910; GFX7-NEXT: flat_store_dword v[0:1], v2 3911; GFX7-NEXT: s_endpgm 3912; 3913; GFX10-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 3914; GFX10-WGP: ; %bb.0: ; %entry 3915; GFX10-WGP-NEXT: s_clause 0x1 3916; GFX10-WGP-NEXT: s_load_dword s2, s[4:5], 0x8 3917; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3918; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 3919; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3920; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3921; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3922; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 3923; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 3924; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 3925; GFX10-WGP-NEXT: buffer_gl0_inv 3926; GFX10-WGP-NEXT: buffer_gl1_inv 3927; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] 3928; GFX10-WGP-NEXT: s_endpgm 3929; 3930; GFX10-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 3931; GFX10-CU: ; %bb.0: ; %entry 3932; GFX10-CU-NEXT: s_clause 0x1 3933; GFX10-CU-NEXT: s_load_dword s2, s[4:5], 0x8 3934; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3935; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 3936; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3937; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3938; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3939; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 3940; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc 3941; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 3942; GFX10-CU-NEXT: buffer_gl0_inv 3943; GFX10-CU-NEXT: buffer_gl1_inv 3944; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] 3945; GFX10-CU-NEXT: s_endpgm 3946; 3947; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: 3948; SKIP-CACHE-INV: ; %bb.0: ; %entry 3949; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3950; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3951; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 3952; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 3953; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3954; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3955; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3956; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc 3957; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 3958; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 3959; SKIP-CACHE-INV-NEXT: s_endpgm 3960 i32 addrspace(1)* %out, i32 %in) { 3961entry: 3962 %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in syncscope("one-as") seq_cst 3963 store i32 %val, i32 addrspace(1)* %out, align 4 3964 ret void 3965} 3966 3967define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg( 3968; GFX6-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 3969; GFX6: ; %bb.0: ; %entry 3970; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 3971; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 3972; GFX6-NEXT: s_mov_b32 s7, 0xf000 3973; GFX6-NEXT: s_mov_b32 s6, -1 3974; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3975; GFX6-NEXT: v_mov_b32_e32 v0, s0 3976; GFX6-NEXT: v_mov_b32_e32 v1, s1 3977; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 3978; GFX6-NEXT: s_endpgm 3979; 3980; GFX7-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 3981; GFX7: ; %bb.0: ; %entry 3982; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3983; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 3984; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3985; GFX7-NEXT: s_add_u32 s0, s0, 16 3986; GFX7-NEXT: s_addc_u32 s1, s1, 0 3987; GFX7-NEXT: v_mov_b32_e32 v0, s0 3988; GFX7-NEXT: v_mov_b32_e32 v2, s2 3989; GFX7-NEXT: v_mov_b32_e32 v1, s1 3990; GFX7-NEXT: v_mov_b32_e32 v3, s3 3991; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 3992; GFX7-NEXT: s_endpgm 3993; 3994; GFX10-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 3995; GFX10-WGP: ; %bb.0: ; %entry 3996; GFX10-WGP-NEXT: s_clause 0x1 3997; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 3998; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 3999; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4000; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4001; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4002; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4003; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4004; GFX10-WGP-NEXT: s_endpgm 4005; 4006; GFX10-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 4007; GFX10-CU: ; %bb.0: ; %entry 4008; GFX10-CU-NEXT: s_clause 0x1 4009; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4010; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4011; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4012; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4013; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4014; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4015; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4016; GFX10-CU-NEXT: s_endpgm 4017; 4018; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: 4019; SKIP-CACHE-INV: ; %bb.0: ; %entry 4020; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4021; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4022; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4023; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4024; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4025; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4026; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4027; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4028; SKIP-CACHE-INV-NEXT: s_endpgm 4029 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4030entry: 4031 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4032 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic 4033 ret void 4034} 4035 4036define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( 4037; GFX6-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 4038; GFX6: ; %bb.0: ; %entry 4039; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4040; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4041; GFX6-NEXT: s_mov_b32 s7, 0xf000 4042; GFX6-NEXT: s_mov_b32 s6, -1 4043; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4044; GFX6-NEXT: v_mov_b32_e32 v0, s0 4045; GFX6-NEXT: v_mov_b32_e32 v1, s1 4046; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4047; GFX6-NEXT: s_waitcnt vmcnt(0) 4048; GFX6-NEXT: buffer_wbinvl1 4049; GFX6-NEXT: s_endpgm 4050; 4051; GFX7-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 4052; GFX7: ; %bb.0: ; %entry 4053; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4054; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4055; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4056; GFX7-NEXT: s_add_u32 s0, s0, 16 4057; GFX7-NEXT: s_addc_u32 s1, s1, 0 4058; GFX7-NEXT: v_mov_b32_e32 v0, s0 4059; GFX7-NEXT: v_mov_b32_e32 v2, s2 4060; GFX7-NEXT: v_mov_b32_e32 v1, s1 4061; GFX7-NEXT: v_mov_b32_e32 v3, s3 4062; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4063; GFX7-NEXT: s_waitcnt vmcnt(0) 4064; GFX7-NEXT: buffer_wbinvl1_vol 4065; GFX7-NEXT: s_endpgm 4066; 4067; GFX10-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 4068; GFX10-WGP: ; %bb.0: ; %entry 4069; GFX10-WGP-NEXT: s_clause 0x1 4070; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4071; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4072; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4073; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4074; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4075; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4076; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4077; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4078; GFX10-WGP-NEXT: buffer_gl0_inv 4079; GFX10-WGP-NEXT: buffer_gl1_inv 4080; GFX10-WGP-NEXT: s_endpgm 4081; 4082; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 4083; GFX10-CU: ; %bb.0: ; %entry 4084; GFX10-CU-NEXT: s_clause 0x1 4085; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4086; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4087; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4088; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4089; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4090; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4091; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4092; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4093; GFX10-CU-NEXT: buffer_gl0_inv 4094; GFX10-CU-NEXT: buffer_gl1_inv 4095; GFX10-CU-NEXT: s_endpgm 4096; 4097; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: 4098; SKIP-CACHE-INV: ; %bb.0: ; %entry 4099; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4100; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4101; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4102; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4103; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4104; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4105; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4106; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4107; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4108; SKIP-CACHE-INV-NEXT: s_endpgm 4109 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4110entry: 4111 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4112 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 4113 ret void 4114} 4115 4116define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( 4117; GFX6-LABEL: global_system_one_as_release_monotonic_cmpxchg: 4118; GFX6: ; %bb.0: ; %entry 4119; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4120; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4121; GFX6-NEXT: s_mov_b32 s7, 0xf000 4122; GFX6-NEXT: s_mov_b32 s6, -1 4123; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4124; GFX6-NEXT: v_mov_b32_e32 v0, s0 4125; GFX6-NEXT: v_mov_b32_e32 v1, s1 4126; GFX6-NEXT: s_waitcnt vmcnt(0) 4127; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4128; GFX6-NEXT: s_endpgm 4129; 4130; GFX7-LABEL: global_system_one_as_release_monotonic_cmpxchg: 4131; GFX7: ; %bb.0: ; %entry 4132; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4133; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4134; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4135; GFX7-NEXT: s_add_u32 s0, s0, 16 4136; GFX7-NEXT: s_addc_u32 s1, s1, 0 4137; GFX7-NEXT: v_mov_b32_e32 v0, s0 4138; GFX7-NEXT: v_mov_b32_e32 v2, s2 4139; GFX7-NEXT: v_mov_b32_e32 v1, s1 4140; GFX7-NEXT: v_mov_b32_e32 v3, s3 4141; GFX7-NEXT: s_waitcnt vmcnt(0) 4142; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4143; GFX7-NEXT: s_endpgm 4144; 4145; GFX10-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg: 4146; GFX10-WGP: ; %bb.0: ; %entry 4147; GFX10-WGP-NEXT: s_clause 0x1 4148; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4149; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4150; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4151; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4152; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4153; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4154; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4155; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4156; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4157; GFX10-WGP-NEXT: s_endpgm 4158; 4159; GFX10-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg: 4160; GFX10-CU: ; %bb.0: ; %entry 4161; GFX10-CU-NEXT: s_clause 0x1 4162; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4163; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4164; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4165; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4166; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4167; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4168; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4169; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4170; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4171; GFX10-CU-NEXT: s_endpgm 4172; 4173; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_cmpxchg: 4174; SKIP-CACHE-INV: ; %bb.0: ; %entry 4175; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4176; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4177; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4178; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4179; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4180; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4181; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4182; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4183; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4184; SKIP-CACHE-INV-NEXT: s_endpgm 4185 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4186entry: 4187 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4188 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic 4189 ret void 4190} 4191 4192define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( 4193; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 4194; GFX6: ; %bb.0: ; %entry 4195; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4196; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4197; GFX6-NEXT: s_mov_b32 s7, 0xf000 4198; GFX6-NEXT: s_mov_b32 s6, -1 4199; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4200; GFX6-NEXT: v_mov_b32_e32 v0, s0 4201; GFX6-NEXT: v_mov_b32_e32 v1, s1 4202; GFX6-NEXT: s_waitcnt vmcnt(0) 4203; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4204; GFX6-NEXT: s_waitcnt vmcnt(0) 4205; GFX6-NEXT: buffer_wbinvl1 4206; GFX6-NEXT: s_endpgm 4207; 4208; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 4209; GFX7: ; %bb.0: ; %entry 4210; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4211; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4212; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4213; GFX7-NEXT: s_add_u32 s0, s0, 16 4214; GFX7-NEXT: s_addc_u32 s1, s1, 0 4215; GFX7-NEXT: v_mov_b32_e32 v0, s0 4216; GFX7-NEXT: v_mov_b32_e32 v2, s2 4217; GFX7-NEXT: v_mov_b32_e32 v1, s1 4218; GFX7-NEXT: v_mov_b32_e32 v3, s3 4219; GFX7-NEXT: s_waitcnt vmcnt(0) 4220; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4221; GFX7-NEXT: s_waitcnt vmcnt(0) 4222; GFX7-NEXT: buffer_wbinvl1_vol 4223; GFX7-NEXT: s_endpgm 4224; 4225; GFX10-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 4226; GFX10-WGP: ; %bb.0: ; %entry 4227; GFX10-WGP-NEXT: s_clause 0x1 4228; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4229; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4230; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4231; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4232; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4233; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4234; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4235; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4236; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4237; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4238; GFX10-WGP-NEXT: buffer_gl0_inv 4239; GFX10-WGP-NEXT: buffer_gl1_inv 4240; GFX10-WGP-NEXT: s_endpgm 4241; 4242; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 4243; GFX10-CU: ; %bb.0: ; %entry 4244; GFX10-CU-NEXT: s_clause 0x1 4245; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4246; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4247; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4248; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4249; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4250; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4251; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4252; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4253; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4254; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4255; GFX10-CU-NEXT: buffer_gl0_inv 4256; GFX10-CU-NEXT: buffer_gl1_inv 4257; GFX10-CU-NEXT: s_endpgm 4258; 4259; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: 4260; SKIP-CACHE-INV: ; %bb.0: ; %entry 4261; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4262; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4263; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4264; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4265; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4266; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4267; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4268; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4269; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4270; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4271; SKIP-CACHE-INV-NEXT: s_endpgm 4272 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4273entry: 4274 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4275 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 4276 ret void 4277} 4278 4279define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( 4280; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 4281; GFX6: ; %bb.0: ; %entry 4282; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4283; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4284; GFX6-NEXT: s_mov_b32 s7, 0xf000 4285; GFX6-NEXT: s_mov_b32 s6, -1 4286; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4287; GFX6-NEXT: v_mov_b32_e32 v0, s0 4288; GFX6-NEXT: v_mov_b32_e32 v1, s1 4289; GFX6-NEXT: s_waitcnt vmcnt(0) 4290; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4291; GFX6-NEXT: s_waitcnt vmcnt(0) 4292; GFX6-NEXT: buffer_wbinvl1 4293; GFX6-NEXT: s_endpgm 4294; 4295; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 4296; GFX7: ; %bb.0: ; %entry 4297; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4298; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4299; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4300; GFX7-NEXT: s_add_u32 s0, s0, 16 4301; GFX7-NEXT: s_addc_u32 s1, s1, 0 4302; GFX7-NEXT: v_mov_b32_e32 v0, s0 4303; GFX7-NEXT: v_mov_b32_e32 v2, s2 4304; GFX7-NEXT: v_mov_b32_e32 v1, s1 4305; GFX7-NEXT: v_mov_b32_e32 v3, s3 4306; GFX7-NEXT: s_waitcnt vmcnt(0) 4307; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4308; GFX7-NEXT: s_waitcnt vmcnt(0) 4309; GFX7-NEXT: buffer_wbinvl1_vol 4310; GFX7-NEXT: s_endpgm 4311; 4312; GFX10-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 4313; GFX10-WGP: ; %bb.0: ; %entry 4314; GFX10-WGP-NEXT: s_clause 0x1 4315; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4316; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4317; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4318; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4319; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4320; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4321; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4322; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4323; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4324; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4325; GFX10-WGP-NEXT: buffer_gl0_inv 4326; GFX10-WGP-NEXT: buffer_gl1_inv 4327; GFX10-WGP-NEXT: s_endpgm 4328; 4329; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 4330; GFX10-CU: ; %bb.0: ; %entry 4331; GFX10-CU-NEXT: s_clause 0x1 4332; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4333; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4334; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4335; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4336; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4337; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4338; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4339; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4340; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4341; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4342; GFX10-CU-NEXT: buffer_gl0_inv 4343; GFX10-CU-NEXT: buffer_gl1_inv 4344; GFX10-CU-NEXT: s_endpgm 4345; 4346; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: 4347; SKIP-CACHE-INV: ; %bb.0: ; %entry 4348; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4349; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4350; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4351; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4352; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4353; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4354; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4355; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4356; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4357; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4358; SKIP-CACHE-INV-NEXT: s_endpgm 4359 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4360entry: 4361 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4362 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 4363 ret void 4364} 4365 4366define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( 4367; GFX6-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 4368; GFX6: ; %bb.0: ; %entry 4369; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4370; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4371; GFX6-NEXT: s_mov_b32 s7, 0xf000 4372; GFX6-NEXT: s_mov_b32 s6, -1 4373; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4374; GFX6-NEXT: v_mov_b32_e32 v0, s0 4375; GFX6-NEXT: v_mov_b32_e32 v1, s1 4376; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4377; GFX6-NEXT: s_waitcnt vmcnt(0) 4378; GFX6-NEXT: buffer_wbinvl1 4379; GFX6-NEXT: s_endpgm 4380; 4381; GFX7-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 4382; GFX7: ; %bb.0: ; %entry 4383; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4384; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4385; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4386; GFX7-NEXT: s_add_u32 s0, s0, 16 4387; GFX7-NEXT: s_addc_u32 s1, s1, 0 4388; GFX7-NEXT: v_mov_b32_e32 v0, s0 4389; GFX7-NEXT: v_mov_b32_e32 v2, s2 4390; GFX7-NEXT: v_mov_b32_e32 v1, s1 4391; GFX7-NEXT: v_mov_b32_e32 v3, s3 4392; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4393; GFX7-NEXT: s_waitcnt vmcnt(0) 4394; GFX7-NEXT: buffer_wbinvl1_vol 4395; GFX7-NEXT: s_endpgm 4396; 4397; GFX10-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 4398; GFX10-WGP: ; %bb.0: ; %entry 4399; GFX10-WGP-NEXT: s_clause 0x1 4400; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4401; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4402; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4403; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4404; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4405; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4406; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4407; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4408; GFX10-WGP-NEXT: buffer_gl0_inv 4409; GFX10-WGP-NEXT: buffer_gl1_inv 4410; GFX10-WGP-NEXT: s_endpgm 4411; 4412; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 4413; GFX10-CU: ; %bb.0: ; %entry 4414; GFX10-CU-NEXT: s_clause 0x1 4415; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4416; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4417; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4418; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4419; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4420; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4421; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4422; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4423; GFX10-CU-NEXT: buffer_gl0_inv 4424; GFX10-CU-NEXT: buffer_gl1_inv 4425; GFX10-CU-NEXT: s_endpgm 4426; 4427; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_cmpxchg: 4428; SKIP-CACHE-INV: ; %bb.0: ; %entry 4429; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4430; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4431; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4432; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4433; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4434; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4435; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4436; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4437; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4438; SKIP-CACHE-INV-NEXT: s_endpgm 4439 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4440entry: 4441 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4442 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 4443 ret void 4444} 4445 4446define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( 4447; GFX6-LABEL: global_system_one_as_release_acquire_cmpxchg: 4448; GFX6: ; %bb.0: ; %entry 4449; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4450; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4451; GFX6-NEXT: s_mov_b32 s7, 0xf000 4452; GFX6-NEXT: s_mov_b32 s6, -1 4453; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4454; GFX6-NEXT: v_mov_b32_e32 v0, s0 4455; GFX6-NEXT: v_mov_b32_e32 v1, s1 4456; GFX6-NEXT: s_waitcnt vmcnt(0) 4457; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4458; GFX6-NEXT: s_waitcnt vmcnt(0) 4459; GFX6-NEXT: buffer_wbinvl1 4460; GFX6-NEXT: s_endpgm 4461; 4462; GFX7-LABEL: global_system_one_as_release_acquire_cmpxchg: 4463; GFX7: ; %bb.0: ; %entry 4464; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4465; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4466; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4467; GFX7-NEXT: s_add_u32 s0, s0, 16 4468; GFX7-NEXT: s_addc_u32 s1, s1, 0 4469; GFX7-NEXT: v_mov_b32_e32 v0, s0 4470; GFX7-NEXT: v_mov_b32_e32 v2, s2 4471; GFX7-NEXT: v_mov_b32_e32 v1, s1 4472; GFX7-NEXT: v_mov_b32_e32 v3, s3 4473; GFX7-NEXT: s_waitcnt vmcnt(0) 4474; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4475; GFX7-NEXT: s_waitcnt vmcnt(0) 4476; GFX7-NEXT: buffer_wbinvl1_vol 4477; GFX7-NEXT: s_endpgm 4478; 4479; GFX10-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg: 4480; GFX10-WGP: ; %bb.0: ; %entry 4481; GFX10-WGP-NEXT: s_clause 0x1 4482; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4483; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4484; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4485; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4486; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4487; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4488; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4489; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4490; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4491; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4492; GFX10-WGP-NEXT: buffer_gl0_inv 4493; GFX10-WGP-NEXT: buffer_gl1_inv 4494; GFX10-WGP-NEXT: s_endpgm 4495; 4496; GFX10-CU-LABEL: global_system_one_as_release_acquire_cmpxchg: 4497; GFX10-CU: ; %bb.0: ; %entry 4498; GFX10-CU-NEXT: s_clause 0x1 4499; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4500; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4501; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4502; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4503; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4504; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4505; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4506; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4507; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4508; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4509; GFX10-CU-NEXT: buffer_gl0_inv 4510; GFX10-CU-NEXT: buffer_gl1_inv 4511; GFX10-CU-NEXT: s_endpgm 4512; 4513; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_cmpxchg: 4514; SKIP-CACHE-INV: ; %bb.0: ; %entry 4515; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4516; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4517; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4518; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4519; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4520; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4521; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4522; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4523; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4524; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4525; SKIP-CACHE-INV-NEXT: s_endpgm 4526 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4527entry: 4528 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4529 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire 4530 ret void 4531} 4532 4533define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( 4534; GFX6-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 4535; GFX6: ; %bb.0: ; %entry 4536; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4537; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4538; GFX6-NEXT: s_mov_b32 s7, 0xf000 4539; GFX6-NEXT: s_mov_b32 s6, -1 4540; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4541; GFX6-NEXT: v_mov_b32_e32 v0, s0 4542; GFX6-NEXT: v_mov_b32_e32 v1, s1 4543; GFX6-NEXT: s_waitcnt vmcnt(0) 4544; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4545; GFX6-NEXT: s_waitcnt vmcnt(0) 4546; GFX6-NEXT: buffer_wbinvl1 4547; GFX6-NEXT: s_endpgm 4548; 4549; GFX7-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 4550; GFX7: ; %bb.0: ; %entry 4551; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4552; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4553; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4554; GFX7-NEXT: s_add_u32 s0, s0, 16 4555; GFX7-NEXT: s_addc_u32 s1, s1, 0 4556; GFX7-NEXT: v_mov_b32_e32 v0, s0 4557; GFX7-NEXT: v_mov_b32_e32 v2, s2 4558; GFX7-NEXT: v_mov_b32_e32 v1, s1 4559; GFX7-NEXT: v_mov_b32_e32 v3, s3 4560; GFX7-NEXT: s_waitcnt vmcnt(0) 4561; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4562; GFX7-NEXT: s_waitcnt vmcnt(0) 4563; GFX7-NEXT: buffer_wbinvl1_vol 4564; GFX7-NEXT: s_endpgm 4565; 4566; GFX10-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 4567; GFX10-WGP: ; %bb.0: ; %entry 4568; GFX10-WGP-NEXT: s_clause 0x1 4569; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4570; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4571; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4572; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4573; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4574; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4575; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4576; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4577; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4578; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4579; GFX10-WGP-NEXT: buffer_gl0_inv 4580; GFX10-WGP-NEXT: buffer_gl1_inv 4581; GFX10-WGP-NEXT: s_endpgm 4582; 4583; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 4584; GFX10-CU: ; %bb.0: ; %entry 4585; GFX10-CU-NEXT: s_clause 0x1 4586; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4587; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4588; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4589; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4590; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4591; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4592; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4593; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4594; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4595; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4596; GFX10-CU-NEXT: buffer_gl0_inv 4597; GFX10-CU-NEXT: buffer_gl1_inv 4598; GFX10-CU-NEXT: s_endpgm 4599; 4600; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: 4601; SKIP-CACHE-INV: ; %bb.0: ; %entry 4602; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4603; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4604; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4605; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4606; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4607; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4608; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4609; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4610; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4611; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4612; SKIP-CACHE-INV-NEXT: s_endpgm 4613 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4614entry: 4615 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4616 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 4617 ret void 4618} 4619 4620define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( 4621; GFX6-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 4622; GFX6: ; %bb.0: ; %entry 4623; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4624; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4625; GFX6-NEXT: s_mov_b32 s7, 0xf000 4626; GFX6-NEXT: s_mov_b32 s6, -1 4627; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4628; GFX6-NEXT: v_mov_b32_e32 v0, s0 4629; GFX6-NEXT: v_mov_b32_e32 v1, s1 4630; GFX6-NEXT: s_waitcnt vmcnt(0) 4631; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4632; GFX6-NEXT: s_waitcnt vmcnt(0) 4633; GFX6-NEXT: buffer_wbinvl1 4634; GFX6-NEXT: s_endpgm 4635; 4636; GFX7-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 4637; GFX7: ; %bb.0: ; %entry 4638; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4639; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4640; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4641; GFX7-NEXT: s_add_u32 s0, s0, 16 4642; GFX7-NEXT: s_addc_u32 s1, s1, 0 4643; GFX7-NEXT: v_mov_b32_e32 v0, s0 4644; GFX7-NEXT: v_mov_b32_e32 v2, s2 4645; GFX7-NEXT: v_mov_b32_e32 v1, s1 4646; GFX7-NEXT: v_mov_b32_e32 v3, s3 4647; GFX7-NEXT: s_waitcnt vmcnt(0) 4648; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4649; GFX7-NEXT: s_waitcnt vmcnt(0) 4650; GFX7-NEXT: buffer_wbinvl1_vol 4651; GFX7-NEXT: s_endpgm 4652; 4653; GFX10-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 4654; GFX10-WGP: ; %bb.0: ; %entry 4655; GFX10-WGP-NEXT: s_clause 0x1 4656; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4657; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4658; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4659; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4660; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4661; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4662; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4663; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4664; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4665; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4666; GFX10-WGP-NEXT: buffer_gl0_inv 4667; GFX10-WGP-NEXT: buffer_gl1_inv 4668; GFX10-WGP-NEXT: s_endpgm 4669; 4670; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 4671; GFX10-CU: ; %bb.0: ; %entry 4672; GFX10-CU-NEXT: s_clause 0x1 4673; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4674; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4675; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4676; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4677; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4678; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4679; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4680; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4681; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4682; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4683; GFX10-CU-NEXT: buffer_gl0_inv 4684; GFX10-CU-NEXT: buffer_gl1_inv 4685; GFX10-CU-NEXT: s_endpgm 4686; 4687; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: 4688; SKIP-CACHE-INV: ; %bb.0: ; %entry 4689; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4690; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4691; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4692; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4693; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4694; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4695; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4696; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4697; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4698; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4699; SKIP-CACHE-INV-NEXT: s_endpgm 4700 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4701entry: 4702 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4703 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 4704 ret void 4705} 4706 4707define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( 4708; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 4709; GFX6: ; %bb.0: ; %entry 4710; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4711; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4712; GFX6-NEXT: s_mov_b32 s7, 0xf000 4713; GFX6-NEXT: s_mov_b32 s6, -1 4714; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4715; GFX6-NEXT: v_mov_b32_e32 v0, s0 4716; GFX6-NEXT: v_mov_b32_e32 v1, s1 4717; GFX6-NEXT: s_waitcnt vmcnt(0) 4718; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4719; GFX6-NEXT: s_waitcnt vmcnt(0) 4720; GFX6-NEXT: buffer_wbinvl1 4721; GFX6-NEXT: s_endpgm 4722; 4723; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 4724; GFX7: ; %bb.0: ; %entry 4725; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4726; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4727; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4728; GFX7-NEXT: s_add_u32 s0, s0, 16 4729; GFX7-NEXT: s_addc_u32 s1, s1, 0 4730; GFX7-NEXT: v_mov_b32_e32 v0, s0 4731; GFX7-NEXT: v_mov_b32_e32 v2, s2 4732; GFX7-NEXT: v_mov_b32_e32 v1, s1 4733; GFX7-NEXT: v_mov_b32_e32 v3, s3 4734; GFX7-NEXT: s_waitcnt vmcnt(0) 4735; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] 4736; GFX7-NEXT: s_waitcnt vmcnt(0) 4737; GFX7-NEXT: buffer_wbinvl1_vol 4738; GFX7-NEXT: s_endpgm 4739; 4740; GFX10-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 4741; GFX10-WGP: ; %bb.0: ; %entry 4742; GFX10-WGP-NEXT: s_clause 0x1 4743; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4744; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4745; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4746; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4747; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4748; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4749; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4750; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4751; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4752; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4753; GFX10-WGP-NEXT: buffer_gl0_inv 4754; GFX10-WGP-NEXT: buffer_gl1_inv 4755; GFX10-WGP-NEXT: s_endpgm 4756; 4757; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 4758; GFX10-CU: ; %bb.0: ; %entry 4759; GFX10-CU-NEXT: s_clause 0x1 4760; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4761; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4762; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4763; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4764; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4765; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4766; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4767; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4768; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[2:3] offset:16 4769; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4770; GFX10-CU-NEXT: buffer_gl0_inv 4771; GFX10-CU-NEXT: buffer_gl1_inv 4772; GFX10-CU-NEXT: s_endpgm 4773; 4774; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: 4775; SKIP-CACHE-INV: ; %bb.0: ; %entry 4776; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4777; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4778; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4779; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4780; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4781; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4782; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4783; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4784; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 4785; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4786; SKIP-CACHE-INV-NEXT: s_endpgm 4787 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4788entry: 4789 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4790 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 4791 ret void 4792} 4793 4794define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( 4795; GFX6-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 4796; GFX6: ; %bb.0: ; %entry 4797; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4798; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4799; GFX6-NEXT: s_mov_b32 s7, 0xf000 4800; GFX6-NEXT: s_mov_b32 s6, -1 4801; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4802; GFX6-NEXT: v_mov_b32_e32 v0, s0 4803; GFX6-NEXT: v_mov_b32_e32 v1, s1 4804; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 4805; GFX6-NEXT: s_waitcnt vmcnt(0) 4806; GFX6-NEXT: buffer_wbinvl1 4807; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 4808; GFX6-NEXT: s_endpgm 4809; 4810; GFX7-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 4811; GFX7: ; %bb.0: ; %entry 4812; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4813; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4814; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4815; GFX7-NEXT: s_add_u32 s4, s0, 16 4816; GFX7-NEXT: s_addc_u32 s5, s1, 0 4817; GFX7-NEXT: v_mov_b32_e32 v0, s4 4818; GFX7-NEXT: v_mov_b32_e32 v2, s2 4819; GFX7-NEXT: v_mov_b32_e32 v1, s5 4820; GFX7-NEXT: v_mov_b32_e32 v3, s3 4821; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 4822; GFX7-NEXT: s_waitcnt vmcnt(0) 4823; GFX7-NEXT: buffer_wbinvl1_vol 4824; GFX7-NEXT: v_mov_b32_e32 v0, s0 4825; GFX7-NEXT: v_mov_b32_e32 v1, s1 4826; GFX7-NEXT: flat_store_dword v[0:1], v2 4827; GFX7-NEXT: s_endpgm 4828; 4829; GFX10-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 4830; GFX10-WGP: ; %bb.0: ; %entry 4831; GFX10-WGP-NEXT: s_clause 0x1 4832; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4833; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4834; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4835; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4836; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4837; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4838; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 4839; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4840; GFX10-WGP-NEXT: buffer_gl0_inv 4841; GFX10-WGP-NEXT: buffer_gl1_inv 4842; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 4843; GFX10-WGP-NEXT: s_endpgm 4844; 4845; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 4846; GFX10-CU: ; %bb.0: ; %entry 4847; GFX10-CU-NEXT: s_clause 0x1 4848; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4849; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4850; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4851; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4852; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4853; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4854; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 4855; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4856; GFX10-CU-NEXT: buffer_gl0_inv 4857; GFX10-CU-NEXT: buffer_gl1_inv 4858; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 4859; GFX10-CU-NEXT: s_endpgm 4860; 4861; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: 4862; SKIP-CACHE-INV: ; %bb.0: ; %entry 4863; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4864; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4865; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4866; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4867; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4868; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4869; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4870; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 4871; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4872; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 4873; SKIP-CACHE-INV-NEXT: s_endpgm 4874 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4875entry: 4876 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4877 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 4878 %val0 = extractvalue { i32, i1 } %val, 0 4879 store i32 %val0, i32 addrspace(1)* %out, align 4 4880 ret void 4881} 4882 4883define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( 4884; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4885; GFX6: ; %bb.0: ; %entry 4886; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4887; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4888; GFX6-NEXT: s_mov_b32 s7, 0xf000 4889; GFX6-NEXT: s_mov_b32 s6, -1 4890; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4891; GFX6-NEXT: v_mov_b32_e32 v0, s0 4892; GFX6-NEXT: v_mov_b32_e32 v1, s1 4893; GFX6-NEXT: s_waitcnt vmcnt(0) 4894; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 4895; GFX6-NEXT: s_waitcnt vmcnt(0) 4896; GFX6-NEXT: buffer_wbinvl1 4897; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 4898; GFX6-NEXT: s_endpgm 4899; 4900; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4901; GFX7: ; %bb.0: ; %entry 4902; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4903; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 4904; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4905; GFX7-NEXT: s_add_u32 s4, s0, 16 4906; GFX7-NEXT: s_addc_u32 s5, s1, 0 4907; GFX7-NEXT: v_mov_b32_e32 v0, s4 4908; GFX7-NEXT: v_mov_b32_e32 v2, s2 4909; GFX7-NEXT: v_mov_b32_e32 v1, s5 4910; GFX7-NEXT: v_mov_b32_e32 v3, s3 4911; GFX7-NEXT: s_waitcnt vmcnt(0) 4912; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 4913; GFX7-NEXT: s_waitcnt vmcnt(0) 4914; GFX7-NEXT: buffer_wbinvl1_vol 4915; GFX7-NEXT: v_mov_b32_e32 v0, s0 4916; GFX7-NEXT: v_mov_b32_e32 v1, s1 4917; GFX7-NEXT: flat_store_dword v[0:1], v2 4918; GFX7-NEXT: s_endpgm 4919; 4920; GFX10-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4921; GFX10-WGP: ; %bb.0: ; %entry 4922; GFX10-WGP-NEXT: s_clause 0x1 4923; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4924; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4925; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 4926; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4927; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4928; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 4929; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4930; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 4931; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 4932; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 4933; GFX10-WGP-NEXT: buffer_gl0_inv 4934; GFX10-WGP-NEXT: buffer_gl1_inv 4935; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 4936; GFX10-WGP-NEXT: s_endpgm 4937; 4938; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4939; GFX10-CU: ; %bb.0: ; %entry 4940; GFX10-CU-NEXT: s_clause 0x1 4941; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 4942; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 4943; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 4944; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4945; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4946; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 4947; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4948; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 4949; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 4950; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 4951; GFX10-CU-NEXT: buffer_gl0_inv 4952; GFX10-CU-NEXT: buffer_gl1_inv 4953; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 4954; GFX10-CU-NEXT: s_endpgm 4955; 4956; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4957; SKIP-CACHE-INV: ; %bb.0: ; %entry 4958; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4959; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4960; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 4961; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 4962; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4963; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 4964; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 4965; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4966; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 4967; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 4968; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 4969; SKIP-CACHE-INV-NEXT: s_endpgm 4970 i32 addrspace(1)* %out, i32 %in, i32 %old) { 4971entry: 4972 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 4973 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 4974 %val0 = extractvalue { i32, i1 } %val, 0 4975 store i32 %val0, i32 addrspace(1)* %out, align 4 4976 ret void 4977} 4978 4979define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( 4980; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 4981; GFX6: ; %bb.0: ; %entry 4982; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 4983; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 4984; GFX6-NEXT: s_mov_b32 s7, 0xf000 4985; GFX6-NEXT: s_mov_b32 s6, -1 4986; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4987; GFX6-NEXT: v_mov_b32_e32 v0, s0 4988; GFX6-NEXT: v_mov_b32_e32 v1, s1 4989; GFX6-NEXT: s_waitcnt vmcnt(0) 4990; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 4991; GFX6-NEXT: s_waitcnt vmcnt(0) 4992; GFX6-NEXT: buffer_wbinvl1 4993; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 4994; GFX6-NEXT: s_endpgm 4995; 4996; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 4997; GFX7: ; %bb.0: ; %entry 4998; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 4999; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5000; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5001; GFX7-NEXT: s_add_u32 s4, s0, 16 5002; GFX7-NEXT: s_addc_u32 s5, s1, 0 5003; GFX7-NEXT: v_mov_b32_e32 v0, s4 5004; GFX7-NEXT: v_mov_b32_e32 v2, s2 5005; GFX7-NEXT: v_mov_b32_e32 v1, s5 5006; GFX7-NEXT: v_mov_b32_e32 v3, s3 5007; GFX7-NEXT: s_waitcnt vmcnt(0) 5008; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 5009; GFX7-NEXT: s_waitcnt vmcnt(0) 5010; GFX7-NEXT: buffer_wbinvl1_vol 5011; GFX7-NEXT: v_mov_b32_e32 v0, s0 5012; GFX7-NEXT: v_mov_b32_e32 v1, s1 5013; GFX7-NEXT: flat_store_dword v[0:1], v2 5014; GFX7-NEXT: s_endpgm 5015; 5016; GFX10-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 5017; GFX10-WGP: ; %bb.0: ; %entry 5018; GFX10-WGP-NEXT: s_clause 0x1 5019; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5020; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5021; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5022; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5023; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5024; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5025; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5026; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5027; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5028; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5029; GFX10-WGP-NEXT: buffer_gl0_inv 5030; GFX10-WGP-NEXT: buffer_gl1_inv 5031; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 5032; GFX10-WGP-NEXT: s_endpgm 5033; 5034; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 5035; GFX10-CU: ; %bb.0: ; %entry 5036; GFX10-CU-NEXT: s_clause 0x1 5037; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5038; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5039; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5040; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5041; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5042; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5043; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5044; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5045; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5046; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5047; GFX10-CU-NEXT: buffer_gl0_inv 5048; GFX10-CU-NEXT: buffer_gl1_inv 5049; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 5050; GFX10-CU-NEXT: s_endpgm 5051; 5052; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: 5053; SKIP-CACHE-INV: ; %bb.0: ; %entry 5054; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5055; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5056; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5057; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5058; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5059; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5060; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5061; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5062; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5063; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5064; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5065; SKIP-CACHE-INV-NEXT: s_endpgm 5066 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5067entry: 5068 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5069 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 5070 %val0 = extractvalue { i32, i1 } %val, 0 5071 store i32 %val0, i32 addrspace(1)* %out, align 4 5072 ret void 5073} 5074 5075define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( 5076; GFX6-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 5077; GFX6: ; %bb.0: ; %entry 5078; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5079; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5080; GFX6-NEXT: s_mov_b32 s7, 0xf000 5081; GFX6-NEXT: s_mov_b32 s6, -1 5082; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5083; GFX6-NEXT: v_mov_b32_e32 v0, s0 5084; GFX6-NEXT: v_mov_b32_e32 v1, s1 5085; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5086; GFX6-NEXT: s_waitcnt vmcnt(0) 5087; GFX6-NEXT: buffer_wbinvl1 5088; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 5089; GFX6-NEXT: s_endpgm 5090; 5091; GFX7-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 5092; GFX7: ; %bb.0: ; %entry 5093; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5094; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5095; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5096; GFX7-NEXT: s_add_u32 s4, s0, 16 5097; GFX7-NEXT: s_addc_u32 s5, s1, 0 5098; GFX7-NEXT: v_mov_b32_e32 v0, s4 5099; GFX7-NEXT: v_mov_b32_e32 v2, s2 5100; GFX7-NEXT: v_mov_b32_e32 v1, s5 5101; GFX7-NEXT: v_mov_b32_e32 v3, s3 5102; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 5103; GFX7-NEXT: s_waitcnt vmcnt(0) 5104; GFX7-NEXT: buffer_wbinvl1_vol 5105; GFX7-NEXT: v_mov_b32_e32 v0, s0 5106; GFX7-NEXT: v_mov_b32_e32 v1, s1 5107; GFX7-NEXT: flat_store_dword v[0:1], v2 5108; GFX7-NEXT: s_endpgm 5109; 5110; GFX10-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 5111; GFX10-WGP: ; %bb.0: ; %entry 5112; GFX10-WGP-NEXT: s_clause 0x1 5113; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5114; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5115; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5116; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5117; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5118; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5119; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5120; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5121; GFX10-WGP-NEXT: buffer_gl0_inv 5122; GFX10-WGP-NEXT: buffer_gl1_inv 5123; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 5124; GFX10-WGP-NEXT: s_endpgm 5125; 5126; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 5127; GFX10-CU: ; %bb.0: ; %entry 5128; GFX10-CU-NEXT: s_clause 0x1 5129; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5130; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5131; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5132; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5133; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5134; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5135; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5136; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5137; GFX10-CU-NEXT: buffer_gl0_inv 5138; GFX10-CU-NEXT: buffer_gl1_inv 5139; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 5140; GFX10-CU-NEXT: s_endpgm 5141; 5142; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: 5143; SKIP-CACHE-INV: ; %bb.0: ; %entry 5144; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5145; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5146; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5147; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5148; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5149; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5150; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5151; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5152; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5153; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5154; SKIP-CACHE-INV-NEXT: s_endpgm 5155 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5156entry: 5157 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5158 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 5159 %val0 = extractvalue { i32, i1 } %val, 0 5160 store i32 %val0, i32 addrspace(1)* %out, align 4 5161 ret void 5162} 5163 5164define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( 5165; GFX6-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 5166; GFX6: ; %bb.0: ; %entry 5167; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5168; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5169; GFX6-NEXT: s_mov_b32 s7, 0xf000 5170; GFX6-NEXT: s_mov_b32 s6, -1 5171; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5172; GFX6-NEXT: v_mov_b32_e32 v0, s0 5173; GFX6-NEXT: v_mov_b32_e32 v1, s1 5174; GFX6-NEXT: s_waitcnt vmcnt(0) 5175; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5176; GFX6-NEXT: s_waitcnt vmcnt(0) 5177; GFX6-NEXT: buffer_wbinvl1 5178; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 5179; GFX6-NEXT: s_endpgm 5180; 5181; GFX7-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 5182; GFX7: ; %bb.0: ; %entry 5183; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5184; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5185; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5186; GFX7-NEXT: s_add_u32 s4, s0, 16 5187; GFX7-NEXT: s_addc_u32 s5, s1, 0 5188; GFX7-NEXT: v_mov_b32_e32 v0, s4 5189; GFX7-NEXT: v_mov_b32_e32 v2, s2 5190; GFX7-NEXT: v_mov_b32_e32 v1, s5 5191; GFX7-NEXT: v_mov_b32_e32 v3, s3 5192; GFX7-NEXT: s_waitcnt vmcnt(0) 5193; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 5194; GFX7-NEXT: s_waitcnt vmcnt(0) 5195; GFX7-NEXT: buffer_wbinvl1_vol 5196; GFX7-NEXT: v_mov_b32_e32 v0, s0 5197; GFX7-NEXT: v_mov_b32_e32 v1, s1 5198; GFX7-NEXT: flat_store_dword v[0:1], v2 5199; GFX7-NEXT: s_endpgm 5200; 5201; GFX10-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 5202; GFX10-WGP: ; %bb.0: ; %entry 5203; GFX10-WGP-NEXT: s_clause 0x1 5204; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5205; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5206; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5207; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5208; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5209; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5210; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5211; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5212; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5213; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5214; GFX10-WGP-NEXT: buffer_gl0_inv 5215; GFX10-WGP-NEXT: buffer_gl1_inv 5216; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 5217; GFX10-WGP-NEXT: s_endpgm 5218; 5219; GFX10-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 5220; GFX10-CU: ; %bb.0: ; %entry 5221; GFX10-CU-NEXT: s_clause 0x1 5222; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5223; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5224; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5225; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5226; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5227; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5228; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5229; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5230; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5231; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5232; GFX10-CU-NEXT: buffer_gl0_inv 5233; GFX10-CU-NEXT: buffer_gl1_inv 5234; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 5235; GFX10-CU-NEXT: s_endpgm 5236; 5237; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: 5238; SKIP-CACHE-INV: ; %bb.0: ; %entry 5239; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5240; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5241; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5242; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5243; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5244; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5245; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5246; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5247; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5248; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5249; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5250; SKIP-CACHE-INV-NEXT: s_endpgm 5251 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5252entry: 5253 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5254 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire 5255 %val0 = extractvalue { i32, i1 } %val, 0 5256 store i32 %val0, i32 addrspace(1)* %out, align 4 5257 ret void 5258} 5259 5260define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( 5261; GFX6-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 5262; GFX6: ; %bb.0: ; %entry 5263; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5264; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5265; GFX6-NEXT: s_mov_b32 s7, 0xf000 5266; GFX6-NEXT: s_mov_b32 s6, -1 5267; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5268; GFX6-NEXT: v_mov_b32_e32 v0, s0 5269; GFX6-NEXT: v_mov_b32_e32 v1, s1 5270; GFX6-NEXT: s_waitcnt vmcnt(0) 5271; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5272; GFX6-NEXT: s_waitcnt vmcnt(0) 5273; GFX6-NEXT: buffer_wbinvl1 5274; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 5275; GFX6-NEXT: s_endpgm 5276; 5277; GFX7-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 5278; GFX7: ; %bb.0: ; %entry 5279; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5280; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5281; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5282; GFX7-NEXT: s_add_u32 s4, s0, 16 5283; GFX7-NEXT: s_addc_u32 s5, s1, 0 5284; GFX7-NEXT: v_mov_b32_e32 v0, s4 5285; GFX7-NEXT: v_mov_b32_e32 v2, s2 5286; GFX7-NEXT: v_mov_b32_e32 v1, s5 5287; GFX7-NEXT: v_mov_b32_e32 v3, s3 5288; GFX7-NEXT: s_waitcnt vmcnt(0) 5289; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 5290; GFX7-NEXT: s_waitcnt vmcnt(0) 5291; GFX7-NEXT: buffer_wbinvl1_vol 5292; GFX7-NEXT: v_mov_b32_e32 v0, s0 5293; GFX7-NEXT: v_mov_b32_e32 v1, s1 5294; GFX7-NEXT: flat_store_dword v[0:1], v2 5295; GFX7-NEXT: s_endpgm 5296; 5297; GFX10-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 5298; GFX10-WGP: ; %bb.0: ; %entry 5299; GFX10-WGP-NEXT: s_clause 0x1 5300; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5301; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5302; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5303; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5304; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5305; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5306; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5307; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5308; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5309; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5310; GFX10-WGP-NEXT: buffer_gl0_inv 5311; GFX10-WGP-NEXT: buffer_gl1_inv 5312; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 5313; GFX10-WGP-NEXT: s_endpgm 5314; 5315; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 5316; GFX10-CU: ; %bb.0: ; %entry 5317; GFX10-CU-NEXT: s_clause 0x1 5318; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5319; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5320; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5321; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5322; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5323; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5324; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5325; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5326; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5327; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5328; GFX10-CU-NEXT: buffer_gl0_inv 5329; GFX10-CU-NEXT: buffer_gl1_inv 5330; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 5331; GFX10-CU-NEXT: s_endpgm 5332; 5333; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: 5334; SKIP-CACHE-INV: ; %bb.0: ; %entry 5335; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5336; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5337; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5338; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5339; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5340; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5341; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5342; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5343; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5344; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5345; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5346; SKIP-CACHE-INV-NEXT: s_endpgm 5347 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5348entry: 5349 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5350 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 5351 %val0 = extractvalue { i32, i1 } %val, 0 5352 store i32 %val0, i32 addrspace(1)* %out, align 4 5353 ret void 5354} 5355 5356define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( 5357; GFX6-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 5358; GFX6: ; %bb.0: ; %entry 5359; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5360; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5361; GFX6-NEXT: s_mov_b32 s7, 0xf000 5362; GFX6-NEXT: s_mov_b32 s6, -1 5363; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5364; GFX6-NEXT: v_mov_b32_e32 v0, s0 5365; GFX6-NEXT: v_mov_b32_e32 v1, s1 5366; GFX6-NEXT: s_waitcnt vmcnt(0) 5367; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5368; GFX6-NEXT: s_waitcnt vmcnt(0) 5369; GFX6-NEXT: buffer_wbinvl1 5370; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 5371; GFX6-NEXT: s_endpgm 5372; 5373; GFX7-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 5374; GFX7: ; %bb.0: ; %entry 5375; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5376; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5377; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5378; GFX7-NEXT: s_add_u32 s4, s0, 16 5379; GFX7-NEXT: s_addc_u32 s5, s1, 0 5380; GFX7-NEXT: v_mov_b32_e32 v0, s4 5381; GFX7-NEXT: v_mov_b32_e32 v2, s2 5382; GFX7-NEXT: v_mov_b32_e32 v1, s5 5383; GFX7-NEXT: v_mov_b32_e32 v3, s3 5384; GFX7-NEXT: s_waitcnt vmcnt(0) 5385; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 5386; GFX7-NEXT: s_waitcnt vmcnt(0) 5387; GFX7-NEXT: buffer_wbinvl1_vol 5388; GFX7-NEXT: v_mov_b32_e32 v0, s0 5389; GFX7-NEXT: v_mov_b32_e32 v1, s1 5390; GFX7-NEXT: flat_store_dword v[0:1], v2 5391; GFX7-NEXT: s_endpgm 5392; 5393; GFX10-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 5394; GFX10-WGP: ; %bb.0: ; %entry 5395; GFX10-WGP-NEXT: s_clause 0x1 5396; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5397; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5398; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5399; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5400; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5401; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5402; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5403; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5404; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5405; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5406; GFX10-WGP-NEXT: buffer_gl0_inv 5407; GFX10-WGP-NEXT: buffer_gl1_inv 5408; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 5409; GFX10-WGP-NEXT: s_endpgm 5410; 5411; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 5412; GFX10-CU: ; %bb.0: ; %entry 5413; GFX10-CU-NEXT: s_clause 0x1 5414; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5415; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5416; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5417; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5418; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5419; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5420; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5421; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5422; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5423; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5424; GFX10-CU-NEXT: buffer_gl0_inv 5425; GFX10-CU-NEXT: buffer_gl1_inv 5426; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 5427; GFX10-CU-NEXT: s_endpgm 5428; 5429; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: 5430; SKIP-CACHE-INV: ; %bb.0: ; %entry 5431; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5432; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5433; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5434; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5435; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5436; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5437; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5438; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5439; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5440; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5441; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5442; SKIP-CACHE-INV-NEXT: s_endpgm 5443 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5444entry: 5445 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5446 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 5447 %val0 = extractvalue { i32, i1 } %val, 0 5448 store i32 %val0, i32 addrspace(1)* %out, align 4 5449 ret void 5450} 5451 5452define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( 5453; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 5454; GFX6: ; %bb.0: ; %entry 5455; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5456; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5457; GFX6-NEXT: s_mov_b32 s7, 0xf000 5458; GFX6-NEXT: s_mov_b32 s6, -1 5459; GFX6-NEXT: s_waitcnt lgkmcnt(0) 5460; GFX6-NEXT: v_mov_b32_e32 v0, s0 5461; GFX6-NEXT: v_mov_b32_e32 v1, s1 5462; GFX6-NEXT: s_waitcnt vmcnt(0) 5463; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5464; GFX6-NEXT: s_waitcnt vmcnt(0) 5465; GFX6-NEXT: buffer_wbinvl1 5466; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 5467; GFX6-NEXT: s_endpgm 5468; 5469; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 5470; GFX7: ; %bb.0: ; %entry 5471; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 5472; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x2 5473; GFX7-NEXT: s_waitcnt lgkmcnt(0) 5474; GFX7-NEXT: s_add_u32 s4, s0, 16 5475; GFX7-NEXT: s_addc_u32 s5, s1, 0 5476; GFX7-NEXT: v_mov_b32_e32 v0, s4 5477; GFX7-NEXT: v_mov_b32_e32 v2, s2 5478; GFX7-NEXT: v_mov_b32_e32 v1, s5 5479; GFX7-NEXT: v_mov_b32_e32 v3, s3 5480; GFX7-NEXT: s_waitcnt vmcnt(0) 5481; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc 5482; GFX7-NEXT: s_waitcnt vmcnt(0) 5483; GFX7-NEXT: buffer_wbinvl1_vol 5484; GFX7-NEXT: v_mov_b32_e32 v0, s0 5485; GFX7-NEXT: v_mov_b32_e32 v1, s1 5486; GFX7-NEXT: flat_store_dword v[0:1], v2 5487; GFX7-NEXT: s_endpgm 5488; 5489; GFX10-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 5490; GFX10-WGP: ; %bb.0: ; %entry 5491; GFX10-WGP-NEXT: s_clause 0x1 5492; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5493; GFX10-WGP-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5494; GFX10-WGP-NEXT: v_mov_b32_e32 v2, 0 5495; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 5496; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 5497; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 5498; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5499; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 5500; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5501; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) 5502; GFX10-WGP-NEXT: buffer_gl0_inv 5503; GFX10-WGP-NEXT: buffer_gl1_inv 5504; GFX10-WGP-NEXT: global_store_dword v2, v0, s[2:3] 5505; GFX10-WGP-NEXT: s_endpgm 5506; 5507; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 5508; GFX10-CU: ; %bb.0: ; %entry 5509; GFX10-CU-NEXT: s_clause 0x1 5510; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 5511; GFX10-CU-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 5512; GFX10-CU-NEXT: v_mov_b32_e32 v2, 0 5513; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 5514; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 5515; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 5516; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5517; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 5518; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] offset:16 glc 5519; GFX10-CU-NEXT: s_waitcnt vmcnt(0) 5520; GFX10-CU-NEXT: buffer_gl0_inv 5521; GFX10-CU-NEXT: buffer_gl1_inv 5522; GFX10-CU-NEXT: global_store_dword v2, v0, s[2:3] 5523; GFX10-CU-NEXT: s_endpgm 5524; 5525; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 5526; SKIP-CACHE-INV: ; %bb.0: ; %entry 5527; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 5528; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 5529; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 5530; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 5531; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 5532; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 5533; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 5534; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5535; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc 5536; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) 5537; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 5538; SKIP-CACHE-INV-NEXT: s_endpgm 5539 i32 addrspace(1)* %out, i32 %in, i32 %old) { 5540entry: 5541 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 5542 %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 5543 %val0 = extractvalue { i32, i1 } %val, 0 5544 store i32 %val0, i32 addrspace(1)* %out, align 4 5545 ret void 5546} 5547 5548