1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s 7 8define amdgpu_kernel void @local_system_unordered_load( 9; GFX6-LABEL: local_system_unordered_load: 10; GFX6: ; %bb.0: ; %entry 11; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 12; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 13; GFX6-NEXT: s_mov_b32 m0, -1 14; GFX6-NEXT: s_waitcnt lgkmcnt(0) 15; GFX6-NEXT: v_mov_b32_e32 v0, s2 16; GFX6-NEXT: ds_read_b32 v0, v0 17; GFX6-NEXT: v_mov_b32_e32 v1, s0 18; GFX6-NEXT: s_waitcnt lgkmcnt(0) 19; GFX6-NEXT: ds_write_b32 v1, v0 20; GFX6-NEXT: s_endpgm 21; 22; GFX7-LABEL: local_system_unordered_load: 23; GFX7: ; %bb.0: ; %entry 24; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 25; GFX7-NEXT: s_mov_b32 m0, -1 26; GFX7-NEXT: s_waitcnt lgkmcnt(0) 27; GFX7-NEXT: v_mov_b32_e32 v0, s0 28; GFX7-NEXT: ds_read_b32 v0, v0 29; GFX7-NEXT: v_mov_b32_e32 v1, s1 30; GFX7-NEXT: s_waitcnt lgkmcnt(0) 31; GFX7-NEXT: ds_write_b32 v1, v0 32; GFX7-NEXT: s_endpgm 33; 34; GFX10-WGP-LABEL: local_system_unordered_load: 35; GFX10-WGP: ; %bb.0: ; %entry 36; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 37; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 38; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 39; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 40; GFX10-WGP-NEXT: ds_read_b32 v0, v0 41; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 42; GFX10-WGP-NEXT: ds_write_b32 v1, v0 43; GFX10-WGP-NEXT: s_endpgm 44; 45; GFX10-CU-LABEL: local_system_unordered_load: 46; GFX10-CU: ; %bb.0: ; %entry 47; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 48; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 49; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 50; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 51; GFX10-CU-NEXT: ds_read_b32 v0, v0 52; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 53; GFX10-CU-NEXT: ds_write_b32 v1, v0 54; GFX10-CU-NEXT: s_endpgm 55; 56; SKIP-CACHE-INV-LABEL: local_system_unordered_load: 57; SKIP-CACHE-INV: ; %bb.0: ; %entry 58; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 59; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 60; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 61; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 62; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 63; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 64; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 65; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 66; SKIP-CACHE-INV-NEXT: s_endpgm 67 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 68entry: 69 %val = load atomic i32, i32 addrspace(3)* %in unordered, align 4 70 store i32 %val, i32 addrspace(3)* %out 71 ret void 72} 73 74define amdgpu_kernel void @local_system_monotonic_load( 75; GFX6-LABEL: local_system_monotonic_load: 76; GFX6: ; %bb.0: ; %entry 77; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 78; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 79; GFX6-NEXT: s_mov_b32 m0, -1 80; GFX6-NEXT: s_waitcnt lgkmcnt(0) 81; GFX6-NEXT: v_mov_b32_e32 v0, s2 82; GFX6-NEXT: ds_read_b32 v0, v0 83; GFX6-NEXT: v_mov_b32_e32 v1, s0 84; GFX6-NEXT: s_waitcnt lgkmcnt(0) 85; GFX6-NEXT: ds_write_b32 v1, v0 86; GFX6-NEXT: s_endpgm 87; 88; GFX7-LABEL: local_system_monotonic_load: 89; GFX7: ; %bb.0: ; %entry 90; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 91; GFX7-NEXT: s_mov_b32 m0, -1 92; GFX7-NEXT: s_waitcnt lgkmcnt(0) 93; GFX7-NEXT: v_mov_b32_e32 v0, s0 94; GFX7-NEXT: ds_read_b32 v0, v0 95; GFX7-NEXT: v_mov_b32_e32 v1, s1 96; GFX7-NEXT: s_waitcnt lgkmcnt(0) 97; GFX7-NEXT: ds_write_b32 v1, v0 98; GFX7-NEXT: s_endpgm 99; 100; GFX10-WGP-LABEL: local_system_monotonic_load: 101; GFX10-WGP: ; %bb.0: ; %entry 102; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 103; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 104; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 105; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 106; GFX10-WGP-NEXT: ds_read_b32 v0, v0 107; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 108; GFX10-WGP-NEXT: ds_write_b32 v1, v0 109; GFX10-WGP-NEXT: s_endpgm 110; 111; GFX10-CU-LABEL: local_system_monotonic_load: 112; GFX10-CU: ; %bb.0: ; %entry 113; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 114; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 115; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 116; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 117; GFX10-CU-NEXT: ds_read_b32 v0, v0 118; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 119; GFX10-CU-NEXT: ds_write_b32 v1, v0 120; GFX10-CU-NEXT: s_endpgm 121; 122; SKIP-CACHE-INV-LABEL: local_system_monotonic_load: 123; SKIP-CACHE-INV: ; %bb.0: ; %entry 124; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 125; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 126; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 127; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 128; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 129; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 130; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 131; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 132; SKIP-CACHE-INV-NEXT: s_endpgm 133 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 134entry: 135 %val = load atomic i32, i32 addrspace(3)* %in monotonic, align 4 136 store i32 %val, i32 addrspace(3)* %out 137 ret void 138} 139 140define amdgpu_kernel void @local_system_acquire_load( 141; GFX6-LABEL: local_system_acquire_load: 142; GFX6: ; %bb.0: ; %entry 143; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 144; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 145; GFX6-NEXT: s_mov_b32 m0, -1 146; GFX6-NEXT: s_waitcnt lgkmcnt(0) 147; GFX6-NEXT: v_mov_b32_e32 v0, s2 148; GFX6-NEXT: ds_read_b32 v0, v0 149; GFX6-NEXT: s_waitcnt lgkmcnt(0) 150; GFX6-NEXT: buffer_wbinvl1 151; GFX6-NEXT: v_mov_b32_e32 v1, s0 152; GFX6-NEXT: ds_write_b32 v1, v0 153; GFX6-NEXT: s_endpgm 154; 155; GFX7-LABEL: local_system_acquire_load: 156; GFX7: ; %bb.0: ; %entry 157; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 158; GFX7-NEXT: s_mov_b32 m0, -1 159; GFX7-NEXT: s_waitcnt lgkmcnt(0) 160; GFX7-NEXT: v_mov_b32_e32 v0, s0 161; GFX7-NEXT: ds_read_b32 v0, v0 162; GFX7-NEXT: s_waitcnt lgkmcnt(0) 163; GFX7-NEXT: buffer_wbinvl1_vol 164; GFX7-NEXT: v_mov_b32_e32 v1, s1 165; GFX7-NEXT: ds_write_b32 v1, v0 166; GFX7-NEXT: s_endpgm 167; 168; GFX10-WGP-LABEL: local_system_acquire_load: 169; GFX10-WGP: ; %bb.0: ; %entry 170; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 171; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 172; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 173; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 174; GFX10-WGP-NEXT: ds_read_b32 v0, v0 175; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 176; GFX10-WGP-NEXT: buffer_gl0_inv 177; GFX10-WGP-NEXT: buffer_gl1_inv 178; GFX10-WGP-NEXT: ds_write_b32 v1, v0 179; GFX10-WGP-NEXT: s_endpgm 180; 181; GFX10-CU-LABEL: local_system_acquire_load: 182; GFX10-CU: ; %bb.0: ; %entry 183; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 184; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 185; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 186; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 187; GFX10-CU-NEXT: ds_read_b32 v0, v0 188; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 189; GFX10-CU-NEXT: buffer_gl0_inv 190; GFX10-CU-NEXT: buffer_gl1_inv 191; GFX10-CU-NEXT: ds_write_b32 v1, v0 192; GFX10-CU-NEXT: s_endpgm 193; 194; SKIP-CACHE-INV-LABEL: local_system_acquire_load: 195; SKIP-CACHE-INV: ; %bb.0: ; %entry 196; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 197; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 198; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 199; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 200; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 201; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 202; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 203; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 204; SKIP-CACHE-INV-NEXT: s_endpgm 205 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 206entry: 207 %val = load atomic i32, i32 addrspace(3)* %in acquire, align 4 208 store i32 %val, i32 addrspace(3)* %out 209 ret void 210} 211 212define amdgpu_kernel void @local_system_seq_cst_load( 213; GFX6-LABEL: local_system_seq_cst_load: 214; GFX6: ; %bb.0: ; %entry 215; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 216; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 217; GFX6-NEXT: s_mov_b32 m0, -1 218; GFX6-NEXT: s_waitcnt lgkmcnt(0) 219; GFX6-NEXT: v_mov_b32_e32 v0, s2 220; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 221; GFX6-NEXT: ds_read_b32 v0, v0 222; GFX6-NEXT: s_waitcnt lgkmcnt(0) 223; GFX6-NEXT: buffer_wbinvl1 224; GFX6-NEXT: v_mov_b32_e32 v1, s0 225; GFX6-NEXT: ds_write_b32 v1, v0 226; GFX6-NEXT: s_endpgm 227; 228; GFX7-LABEL: local_system_seq_cst_load: 229; GFX7: ; %bb.0: ; %entry 230; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 231; GFX7-NEXT: s_mov_b32 m0, -1 232; GFX7-NEXT: s_waitcnt lgkmcnt(0) 233; GFX7-NEXT: v_mov_b32_e32 v0, s0 234; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 235; GFX7-NEXT: ds_read_b32 v0, v0 236; GFX7-NEXT: s_waitcnt lgkmcnt(0) 237; GFX7-NEXT: buffer_wbinvl1_vol 238; GFX7-NEXT: v_mov_b32_e32 v1, s1 239; GFX7-NEXT: ds_write_b32 v1, v0 240; GFX7-NEXT: s_endpgm 241; 242; GFX10-WGP-LABEL: local_system_seq_cst_load: 243; GFX10-WGP: ; %bb.0: ; %entry 244; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 245; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 246; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 247; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 248; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 249; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 250; GFX10-WGP-NEXT: ds_read_b32 v0, v0 251; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 252; GFX10-WGP-NEXT: buffer_gl0_inv 253; GFX10-WGP-NEXT: buffer_gl1_inv 254; GFX10-WGP-NEXT: ds_write_b32 v1, v0 255; GFX10-WGP-NEXT: s_endpgm 256; 257; GFX10-CU-LABEL: local_system_seq_cst_load: 258; GFX10-CU: ; %bb.0: ; %entry 259; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 260; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 261; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 262; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 263; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 264; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 265; GFX10-CU-NEXT: ds_read_b32 v0, v0 266; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 267; GFX10-CU-NEXT: buffer_gl0_inv 268; GFX10-CU-NEXT: buffer_gl1_inv 269; GFX10-CU-NEXT: ds_write_b32 v1, v0 270; GFX10-CU-NEXT: s_endpgm 271; 272; SKIP-CACHE-INV-LABEL: local_system_seq_cst_load: 273; SKIP-CACHE-INV: ; %bb.0: ; %entry 274; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 275; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 276; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 277; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 278; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 279; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 280; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 281; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 282; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 283; SKIP-CACHE-INV-NEXT: s_endpgm 284 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 285entry: 286 %val = load atomic i32, i32 addrspace(3)* %in seq_cst, align 4 287 store i32 %val, i32 addrspace(3)* %out 288 ret void 289} 290 291define amdgpu_kernel void @local_system_unordered_store( 292; GFX6-LABEL: local_system_unordered_store: 293; GFX6: ; %bb.0: ; %entry 294; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 295; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 296; GFX6-NEXT: s_mov_b32 m0, -1 297; GFX6-NEXT: s_waitcnt lgkmcnt(0) 298; GFX6-NEXT: v_mov_b32_e32 v1, s2 299; GFX6-NEXT: v_mov_b32_e32 v0, s0 300; GFX6-NEXT: ds_write_b32 v0, v1 301; GFX6-NEXT: s_endpgm 302; 303; GFX7-LABEL: local_system_unordered_store: 304; GFX7: ; %bb.0: ; %entry 305; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 306; GFX7-NEXT: s_mov_b32 m0, -1 307; GFX7-NEXT: s_waitcnt lgkmcnt(0) 308; GFX7-NEXT: v_mov_b32_e32 v0, s1 309; GFX7-NEXT: v_mov_b32_e32 v1, s0 310; GFX7-NEXT: ds_write_b32 v0, v1 311; GFX7-NEXT: s_endpgm 312; 313; GFX10-WGP-LABEL: local_system_unordered_store: 314; GFX10-WGP: ; %bb.0: ; %entry 315; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 316; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 317; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 318; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 319; GFX10-WGP-NEXT: ds_write_b32 v0, v1 320; GFX10-WGP-NEXT: s_endpgm 321; 322; GFX10-CU-LABEL: local_system_unordered_store: 323; GFX10-CU: ; %bb.0: ; %entry 324; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 325; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 326; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 327; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 328; GFX10-CU-NEXT: ds_write_b32 v0, v1 329; GFX10-CU-NEXT: s_endpgm 330; 331; SKIP-CACHE-INV-LABEL: local_system_unordered_store: 332; SKIP-CACHE-INV: ; %bb.0: ; %entry 333; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 334; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 335; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 336; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 337; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 338; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 339; SKIP-CACHE-INV-NEXT: s_endpgm 340 i32 %in, i32 addrspace(3)* %out) { 341entry: 342 store atomic i32 %in, i32 addrspace(3)* %out unordered, align 4 343 ret void 344} 345 346define amdgpu_kernel void @local_system_monotonic_store( 347; GFX6-LABEL: local_system_monotonic_store: 348; GFX6: ; %bb.0: ; %entry 349; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 350; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 351; GFX6-NEXT: s_mov_b32 m0, -1 352; GFX6-NEXT: s_waitcnt lgkmcnt(0) 353; GFX6-NEXT: v_mov_b32_e32 v1, s2 354; GFX6-NEXT: v_mov_b32_e32 v0, s0 355; GFX6-NEXT: ds_write_b32 v0, v1 356; GFX6-NEXT: s_endpgm 357; 358; GFX7-LABEL: local_system_monotonic_store: 359; GFX7: ; %bb.0: ; %entry 360; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 361; GFX7-NEXT: s_mov_b32 m0, -1 362; GFX7-NEXT: s_waitcnt lgkmcnt(0) 363; GFX7-NEXT: v_mov_b32_e32 v0, s1 364; GFX7-NEXT: v_mov_b32_e32 v1, s0 365; GFX7-NEXT: ds_write_b32 v0, v1 366; GFX7-NEXT: s_endpgm 367; 368; GFX10-WGP-LABEL: local_system_monotonic_store: 369; GFX10-WGP: ; %bb.0: ; %entry 370; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 371; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 372; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 373; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 374; GFX10-WGP-NEXT: ds_write_b32 v0, v1 375; GFX10-WGP-NEXT: s_endpgm 376; 377; GFX10-CU-LABEL: local_system_monotonic_store: 378; GFX10-CU: ; %bb.0: ; %entry 379; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 380; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 381; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 382; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 383; GFX10-CU-NEXT: ds_write_b32 v0, v1 384; GFX10-CU-NEXT: s_endpgm 385; 386; SKIP-CACHE-INV-LABEL: local_system_monotonic_store: 387; SKIP-CACHE-INV: ; %bb.0: ; %entry 388; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 389; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 390; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 391; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 392; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 393; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 394; SKIP-CACHE-INV-NEXT: s_endpgm 395 i32 %in, i32 addrspace(3)* %out) { 396entry: 397 store atomic i32 %in, i32 addrspace(3)* %out monotonic, align 4 398 ret void 399} 400 401define amdgpu_kernel void @local_system_release_store( 402; GFX6-LABEL: local_system_release_store: 403; GFX6: ; %bb.0: ; %entry 404; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 405; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 406; GFX6-NEXT: s_mov_b32 m0, -1 407; GFX6-NEXT: s_waitcnt lgkmcnt(0) 408; GFX6-NEXT: v_mov_b32_e32 v1, s2 409; GFX6-NEXT: v_mov_b32_e32 v0, s0 410; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 411; GFX6-NEXT: ds_write_b32 v0, v1 412; GFX6-NEXT: s_endpgm 413; 414; GFX7-LABEL: local_system_release_store: 415; GFX7: ; %bb.0: ; %entry 416; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 417; GFX7-NEXT: s_mov_b32 m0, -1 418; GFX7-NEXT: s_waitcnt lgkmcnt(0) 419; GFX7-NEXT: v_mov_b32_e32 v0, s1 420; GFX7-NEXT: v_mov_b32_e32 v1, s0 421; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 422; GFX7-NEXT: ds_write_b32 v0, v1 423; GFX7-NEXT: s_endpgm 424; 425; GFX10-WGP-LABEL: local_system_release_store: 426; GFX10-WGP: ; %bb.0: ; %entry 427; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 428; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 429; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 430; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 431; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 432; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 433; GFX10-WGP-NEXT: ds_write_b32 v0, v1 434; GFX10-WGP-NEXT: s_endpgm 435; 436; GFX10-CU-LABEL: local_system_release_store: 437; GFX10-CU: ; %bb.0: ; %entry 438; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 439; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 440; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 441; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 442; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 443; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 444; GFX10-CU-NEXT: ds_write_b32 v0, v1 445; GFX10-CU-NEXT: s_endpgm 446; 447; SKIP-CACHE-INV-LABEL: local_system_release_store: 448; SKIP-CACHE-INV: ; %bb.0: ; %entry 449; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 450; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 451; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 452; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 453; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 454; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 455; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 456; SKIP-CACHE-INV-NEXT: s_endpgm 457 i32 %in, i32 addrspace(3)* %out) { 458entry: 459 store atomic i32 %in, i32 addrspace(3)* %out release, align 4 460 ret void 461} 462 463define amdgpu_kernel void @local_system_seq_cst_store( 464; GFX6-LABEL: local_system_seq_cst_store: 465; GFX6: ; %bb.0: ; %entry 466; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 467; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 468; GFX6-NEXT: s_mov_b32 m0, -1 469; GFX6-NEXT: s_waitcnt lgkmcnt(0) 470; GFX6-NEXT: v_mov_b32_e32 v1, s2 471; GFX6-NEXT: v_mov_b32_e32 v0, s0 472; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 473; GFX6-NEXT: ds_write_b32 v0, v1 474; GFX6-NEXT: s_endpgm 475; 476; GFX7-LABEL: local_system_seq_cst_store: 477; GFX7: ; %bb.0: ; %entry 478; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 479; GFX7-NEXT: s_mov_b32 m0, -1 480; GFX7-NEXT: s_waitcnt lgkmcnt(0) 481; GFX7-NEXT: v_mov_b32_e32 v0, s1 482; GFX7-NEXT: v_mov_b32_e32 v1, s0 483; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 484; GFX7-NEXT: ds_write_b32 v0, v1 485; GFX7-NEXT: s_endpgm 486; 487; GFX10-WGP-LABEL: local_system_seq_cst_store: 488; GFX10-WGP: ; %bb.0: ; %entry 489; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 490; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 491; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 492; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 493; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 494; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 495; GFX10-WGP-NEXT: ds_write_b32 v0, v1 496; GFX10-WGP-NEXT: s_endpgm 497; 498; GFX10-CU-LABEL: local_system_seq_cst_store: 499; GFX10-CU: ; %bb.0: ; %entry 500; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 501; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 502; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 503; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 504; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 505; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 506; GFX10-CU-NEXT: ds_write_b32 v0, v1 507; GFX10-CU-NEXT: s_endpgm 508; 509; SKIP-CACHE-INV-LABEL: local_system_seq_cst_store: 510; SKIP-CACHE-INV: ; %bb.0: ; %entry 511; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 512; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 513; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 514; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 515; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 516; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 517; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 518; SKIP-CACHE-INV-NEXT: s_endpgm 519 i32 %in, i32 addrspace(3)* %out) { 520entry: 521 store atomic i32 %in, i32 addrspace(3)* %out seq_cst, align 4 522 ret void 523} 524 525define amdgpu_kernel void @local_system_monotonic_atomicrmw( 526; GFX6-LABEL: local_system_monotonic_atomicrmw: 527; GFX6: ; %bb.0: ; %entry 528; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 529; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 530; GFX6-NEXT: s_mov_b32 m0, -1 531; GFX6-NEXT: s_waitcnt lgkmcnt(0) 532; GFX6-NEXT: v_mov_b32_e32 v0, s2 533; GFX6-NEXT: v_mov_b32_e32 v1, s0 534; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 535; GFX6-NEXT: s_endpgm 536; 537; GFX7-LABEL: local_system_monotonic_atomicrmw: 538; GFX7: ; %bb.0: ; %entry 539; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 540; GFX7-NEXT: s_mov_b32 m0, -1 541; GFX7-NEXT: s_waitcnt lgkmcnt(0) 542; GFX7-NEXT: v_mov_b32_e32 v0, s0 543; GFX7-NEXT: v_mov_b32_e32 v1, s1 544; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 545; GFX7-NEXT: s_endpgm 546; 547; GFX10-WGP-LABEL: local_system_monotonic_atomicrmw: 548; GFX10-WGP: ; %bb.0: ; %entry 549; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 550; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 551; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 552; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 553; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 554; GFX10-WGP-NEXT: s_endpgm 555; 556; GFX10-CU-LABEL: local_system_monotonic_atomicrmw: 557; GFX10-CU: ; %bb.0: ; %entry 558; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 559; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 560; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 561; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 562; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 563; GFX10-CU-NEXT: s_endpgm 564; 565; SKIP-CACHE-INV-LABEL: local_system_monotonic_atomicrmw: 566; SKIP-CACHE-INV: ; %bb.0: ; %entry 567; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 568; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 569; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 570; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 571; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 572; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 573; SKIP-CACHE-INV-NEXT: s_endpgm 574 i32 addrspace(3)* %out, i32 %in) { 575entry: 576 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in monotonic 577 ret void 578} 579 580define amdgpu_kernel void @local_system_acquire_atomicrmw( 581; GFX6-LABEL: local_system_acquire_atomicrmw: 582; GFX6: ; %bb.0: ; %entry 583; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 584; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 585; GFX6-NEXT: s_mov_b32 m0, -1 586; GFX6-NEXT: s_waitcnt lgkmcnt(0) 587; GFX6-NEXT: v_mov_b32_e32 v0, s2 588; GFX6-NEXT: v_mov_b32_e32 v1, s0 589; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 590; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 591; GFX6-NEXT: buffer_wbinvl1 592; GFX6-NEXT: s_endpgm 593; 594; GFX7-LABEL: local_system_acquire_atomicrmw: 595; GFX7: ; %bb.0: ; %entry 596; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 597; GFX7-NEXT: s_mov_b32 m0, -1 598; GFX7-NEXT: s_waitcnt lgkmcnt(0) 599; GFX7-NEXT: v_mov_b32_e32 v0, s0 600; GFX7-NEXT: v_mov_b32_e32 v1, s1 601; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 602; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 603; GFX7-NEXT: buffer_wbinvl1_vol 604; GFX7-NEXT: s_endpgm 605; 606; GFX10-WGP-LABEL: local_system_acquire_atomicrmw: 607; GFX10-WGP: ; %bb.0: ; %entry 608; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 609; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 610; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 611; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 612; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 613; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 614; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 615; GFX10-WGP-NEXT: buffer_gl0_inv 616; GFX10-WGP-NEXT: buffer_gl1_inv 617; GFX10-WGP-NEXT: s_endpgm 618; 619; GFX10-CU-LABEL: local_system_acquire_atomicrmw: 620; GFX10-CU: ; %bb.0: ; %entry 621; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 622; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 623; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 624; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 625; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 626; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 627; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 628; GFX10-CU-NEXT: buffer_gl0_inv 629; GFX10-CU-NEXT: buffer_gl1_inv 630; GFX10-CU-NEXT: s_endpgm 631; 632; SKIP-CACHE-INV-LABEL: local_system_acquire_atomicrmw: 633; SKIP-CACHE-INV: ; %bb.0: ; %entry 634; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 635; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 636; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 637; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 638; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 639; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 640; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 641; SKIP-CACHE-INV-NEXT: s_endpgm 642 i32 addrspace(3)* %out, i32 %in) { 643entry: 644 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in acquire 645 ret void 646} 647 648define amdgpu_kernel void @local_system_release_atomicrmw( 649; GFX6-LABEL: local_system_release_atomicrmw: 650; GFX6: ; %bb.0: ; %entry 651; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 652; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 653; GFX6-NEXT: s_mov_b32 m0, -1 654; GFX6-NEXT: s_waitcnt lgkmcnt(0) 655; GFX6-NEXT: v_mov_b32_e32 v0, s2 656; GFX6-NEXT: v_mov_b32_e32 v1, s0 657; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 658; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 659; GFX6-NEXT: s_endpgm 660; 661; GFX7-LABEL: local_system_release_atomicrmw: 662; GFX7: ; %bb.0: ; %entry 663; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 664; GFX7-NEXT: s_mov_b32 m0, -1 665; GFX7-NEXT: s_waitcnt lgkmcnt(0) 666; GFX7-NEXT: v_mov_b32_e32 v0, s0 667; GFX7-NEXT: v_mov_b32_e32 v1, s1 668; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 669; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 670; GFX7-NEXT: s_endpgm 671; 672; GFX10-WGP-LABEL: local_system_release_atomicrmw: 673; GFX10-WGP: ; %bb.0: ; %entry 674; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 675; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 676; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 677; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 678; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 679; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 680; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 681; GFX10-WGP-NEXT: s_endpgm 682; 683; GFX10-CU-LABEL: local_system_release_atomicrmw: 684; GFX10-CU: ; %bb.0: ; %entry 685; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 686; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 687; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 688; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 689; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 690; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 691; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 692; GFX10-CU-NEXT: s_endpgm 693; 694; SKIP-CACHE-INV-LABEL: local_system_release_atomicrmw: 695; SKIP-CACHE-INV: ; %bb.0: ; %entry 696; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 697; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 698; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 699; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 700; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 701; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 702; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 703; SKIP-CACHE-INV-NEXT: s_endpgm 704 i32 addrspace(3)* %out, i32 %in) { 705entry: 706 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in release 707 ret void 708} 709 710define amdgpu_kernel void @local_system_acq_rel_atomicrmw( 711; GFX6-LABEL: local_system_acq_rel_atomicrmw: 712; GFX6: ; %bb.0: ; %entry 713; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 714; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 715; GFX6-NEXT: s_mov_b32 m0, -1 716; GFX6-NEXT: s_waitcnt lgkmcnt(0) 717; GFX6-NEXT: v_mov_b32_e32 v0, s2 718; GFX6-NEXT: v_mov_b32_e32 v1, s0 719; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 720; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 721; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 722; GFX6-NEXT: buffer_wbinvl1 723; GFX6-NEXT: s_endpgm 724; 725; GFX7-LABEL: local_system_acq_rel_atomicrmw: 726; GFX7: ; %bb.0: ; %entry 727; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 728; GFX7-NEXT: s_mov_b32 m0, -1 729; GFX7-NEXT: s_waitcnt lgkmcnt(0) 730; GFX7-NEXT: v_mov_b32_e32 v0, s0 731; GFX7-NEXT: v_mov_b32_e32 v1, s1 732; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 733; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 734; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 735; GFX7-NEXT: buffer_wbinvl1_vol 736; GFX7-NEXT: s_endpgm 737; 738; GFX10-WGP-LABEL: local_system_acq_rel_atomicrmw: 739; GFX10-WGP: ; %bb.0: ; %entry 740; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 741; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 742; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 743; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 744; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 745; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 746; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 747; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 748; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 749; GFX10-WGP-NEXT: buffer_gl0_inv 750; GFX10-WGP-NEXT: buffer_gl1_inv 751; GFX10-WGP-NEXT: s_endpgm 752; 753; GFX10-CU-LABEL: local_system_acq_rel_atomicrmw: 754; GFX10-CU: ; %bb.0: ; %entry 755; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 756; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 757; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 758; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 759; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 760; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 761; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 762; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 763; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 764; GFX10-CU-NEXT: buffer_gl0_inv 765; GFX10-CU-NEXT: buffer_gl1_inv 766; GFX10-CU-NEXT: s_endpgm 767; 768; SKIP-CACHE-INV-LABEL: local_system_acq_rel_atomicrmw: 769; SKIP-CACHE-INV: ; %bb.0: ; %entry 770; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 771; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 772; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 773; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 774; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 775; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 776; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 777; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 778; SKIP-CACHE-INV-NEXT: s_endpgm 779 i32 addrspace(3)* %out, i32 %in) { 780entry: 781 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in acq_rel 782 ret void 783} 784 785define amdgpu_kernel void @local_system_seq_cst_atomicrmw( 786; GFX6-LABEL: local_system_seq_cst_atomicrmw: 787; GFX6: ; %bb.0: ; %entry 788; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 789; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 790; GFX6-NEXT: s_mov_b32 m0, -1 791; GFX6-NEXT: s_waitcnt lgkmcnt(0) 792; GFX6-NEXT: v_mov_b32_e32 v0, s2 793; GFX6-NEXT: v_mov_b32_e32 v1, s0 794; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 795; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 796; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 797; GFX6-NEXT: buffer_wbinvl1 798; GFX6-NEXT: s_endpgm 799; 800; GFX7-LABEL: local_system_seq_cst_atomicrmw: 801; GFX7: ; %bb.0: ; %entry 802; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 803; GFX7-NEXT: s_mov_b32 m0, -1 804; GFX7-NEXT: s_waitcnt lgkmcnt(0) 805; GFX7-NEXT: v_mov_b32_e32 v0, s0 806; GFX7-NEXT: v_mov_b32_e32 v1, s1 807; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 808; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 809; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 810; GFX7-NEXT: buffer_wbinvl1_vol 811; GFX7-NEXT: s_endpgm 812; 813; GFX10-WGP-LABEL: local_system_seq_cst_atomicrmw: 814; GFX10-WGP: ; %bb.0: ; %entry 815; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 816; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 817; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 818; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 819; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 820; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 821; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 822; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 823; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 824; GFX10-WGP-NEXT: buffer_gl0_inv 825; GFX10-WGP-NEXT: buffer_gl1_inv 826; GFX10-WGP-NEXT: s_endpgm 827; 828; GFX10-CU-LABEL: local_system_seq_cst_atomicrmw: 829; GFX10-CU: ; %bb.0: ; %entry 830; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 831; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 832; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 833; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 834; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 835; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 836; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 837; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 838; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 839; GFX10-CU-NEXT: buffer_gl0_inv 840; GFX10-CU-NEXT: buffer_gl1_inv 841; GFX10-CU-NEXT: s_endpgm 842; 843; SKIP-CACHE-INV-LABEL: local_system_seq_cst_atomicrmw: 844; SKIP-CACHE-INV: ; %bb.0: ; %entry 845; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 846; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 847; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 848; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 849; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 850; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 851; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 852; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 853; SKIP-CACHE-INV-NEXT: s_endpgm 854 i32 addrspace(3)* %out, i32 %in) { 855entry: 856 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in seq_cst 857 ret void 858} 859 860define amdgpu_kernel void @local_system_acquire_ret_atomicrmw( 861; GFX6-LABEL: local_system_acquire_ret_atomicrmw: 862; GFX6: ; %bb.0: ; %entry 863; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 864; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 865; GFX6-NEXT: s_mov_b32 m0, -1 866; GFX6-NEXT: s_waitcnt lgkmcnt(0) 867; GFX6-NEXT: v_mov_b32_e32 v0, s2 868; GFX6-NEXT: v_mov_b32_e32 v1, s0 869; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 870; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 871; GFX6-NEXT: buffer_wbinvl1 872; GFX6-NEXT: ds_write_b32 v0, v1 873; GFX6-NEXT: s_endpgm 874; 875; GFX7-LABEL: local_system_acquire_ret_atomicrmw: 876; GFX7: ; %bb.0: ; %entry 877; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 878; GFX7-NEXT: s_mov_b32 m0, -1 879; GFX7-NEXT: s_waitcnt lgkmcnt(0) 880; GFX7-NEXT: v_mov_b32_e32 v0, s0 881; GFX7-NEXT: v_mov_b32_e32 v1, s1 882; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 883; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 884; GFX7-NEXT: buffer_wbinvl1_vol 885; GFX7-NEXT: ds_write_b32 v0, v1 886; GFX7-NEXT: s_endpgm 887; 888; GFX10-WGP-LABEL: local_system_acquire_ret_atomicrmw: 889; GFX10-WGP: ; %bb.0: ; %entry 890; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 891; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 892; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 893; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 894; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 895; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 896; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 897; GFX10-WGP-NEXT: buffer_gl0_inv 898; GFX10-WGP-NEXT: buffer_gl1_inv 899; GFX10-WGP-NEXT: ds_write_b32 v0, v1 900; GFX10-WGP-NEXT: s_endpgm 901; 902; GFX10-CU-LABEL: local_system_acquire_ret_atomicrmw: 903; GFX10-CU: ; %bb.0: ; %entry 904; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 905; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 906; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 907; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 908; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 909; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 910; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 911; GFX10-CU-NEXT: buffer_gl0_inv 912; GFX10-CU-NEXT: buffer_gl1_inv 913; GFX10-CU-NEXT: ds_write_b32 v0, v1 914; GFX10-CU-NEXT: s_endpgm 915; 916; SKIP-CACHE-INV-LABEL: local_system_acquire_ret_atomicrmw: 917; SKIP-CACHE-INV: ; %bb.0: ; %entry 918; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 919; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 920; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 921; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 922; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 923; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 924; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 925; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 926; SKIP-CACHE-INV-NEXT: s_endpgm 927 i32 addrspace(3)* %out, i32 %in) { 928entry: 929 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in acquire 930 store i32 %val, i32 addrspace(3)* %out, align 4 931 ret void 932} 933 934define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( 935; GFX6-LABEL: local_system_acq_rel_ret_atomicrmw: 936; GFX6: ; %bb.0: ; %entry 937; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 938; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 939; GFX6-NEXT: s_mov_b32 m0, -1 940; GFX6-NEXT: s_waitcnt lgkmcnt(0) 941; GFX6-NEXT: v_mov_b32_e32 v0, s2 942; GFX6-NEXT: v_mov_b32_e32 v1, s0 943; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 944; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 945; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 946; GFX6-NEXT: buffer_wbinvl1 947; GFX6-NEXT: ds_write_b32 v0, v1 948; GFX6-NEXT: s_endpgm 949; 950; GFX7-LABEL: local_system_acq_rel_ret_atomicrmw: 951; GFX7: ; %bb.0: ; %entry 952; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 953; GFX7-NEXT: s_mov_b32 m0, -1 954; GFX7-NEXT: s_waitcnt lgkmcnt(0) 955; GFX7-NEXT: v_mov_b32_e32 v0, s0 956; GFX7-NEXT: v_mov_b32_e32 v1, s1 957; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 958; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 959; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 960; GFX7-NEXT: buffer_wbinvl1_vol 961; GFX7-NEXT: ds_write_b32 v0, v1 962; GFX7-NEXT: s_endpgm 963; 964; GFX10-WGP-LABEL: local_system_acq_rel_ret_atomicrmw: 965; GFX10-WGP: ; %bb.0: ; %entry 966; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 967; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 968; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 969; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 970; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 971; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 972; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 973; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 974; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 975; GFX10-WGP-NEXT: buffer_gl0_inv 976; GFX10-WGP-NEXT: buffer_gl1_inv 977; GFX10-WGP-NEXT: ds_write_b32 v0, v1 978; GFX10-WGP-NEXT: s_endpgm 979; 980; GFX10-CU-LABEL: local_system_acq_rel_ret_atomicrmw: 981; GFX10-CU: ; %bb.0: ; %entry 982; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 983; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 984; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 985; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 986; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 987; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 988; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 989; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 990; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 991; GFX10-CU-NEXT: buffer_gl0_inv 992; GFX10-CU-NEXT: buffer_gl1_inv 993; GFX10-CU-NEXT: ds_write_b32 v0, v1 994; GFX10-CU-NEXT: s_endpgm 995; 996; SKIP-CACHE-INV-LABEL: local_system_acq_rel_ret_atomicrmw: 997; SKIP-CACHE-INV: ; %bb.0: ; %entry 998; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 999; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1000; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1001; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1002; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1003; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1004; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1005; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1006; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 1007; SKIP-CACHE-INV-NEXT: s_endpgm 1008 i32 addrspace(3)* %out, i32 %in) { 1009entry: 1010 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in acq_rel 1011 store i32 %val, i32 addrspace(3)* %out, align 4 1012 ret void 1013} 1014 1015define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( 1016; GFX6-LABEL: local_system_seq_cst_ret_atomicrmw: 1017; GFX6: ; %bb.0: ; %entry 1018; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1019; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 1020; GFX6-NEXT: s_mov_b32 m0, -1 1021; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1022; GFX6-NEXT: v_mov_b32_e32 v0, s2 1023; GFX6-NEXT: v_mov_b32_e32 v1, s0 1024; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1025; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1026; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1027; GFX6-NEXT: buffer_wbinvl1 1028; GFX6-NEXT: ds_write_b32 v0, v1 1029; GFX6-NEXT: s_endpgm 1030; 1031; GFX7-LABEL: local_system_seq_cst_ret_atomicrmw: 1032; GFX7: ; %bb.0: ; %entry 1033; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1034; GFX7-NEXT: s_mov_b32 m0, -1 1035; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1036; GFX7-NEXT: v_mov_b32_e32 v0, s0 1037; GFX7-NEXT: v_mov_b32_e32 v1, s1 1038; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1039; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1040; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1041; GFX7-NEXT: buffer_wbinvl1_vol 1042; GFX7-NEXT: ds_write_b32 v0, v1 1043; GFX7-NEXT: s_endpgm 1044; 1045; GFX10-WGP-LABEL: local_system_seq_cst_ret_atomicrmw: 1046; GFX10-WGP: ; %bb.0: ; %entry 1047; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1048; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1049; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1050; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 1051; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1052; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1053; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1054; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1055; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1056; GFX10-WGP-NEXT: buffer_gl0_inv 1057; GFX10-WGP-NEXT: buffer_gl1_inv 1058; GFX10-WGP-NEXT: ds_write_b32 v0, v1 1059; GFX10-WGP-NEXT: s_endpgm 1060; 1061; GFX10-CU-LABEL: local_system_seq_cst_ret_atomicrmw: 1062; GFX10-CU: ; %bb.0: ; %entry 1063; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 1064; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1065; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1066; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 1067; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1068; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1069; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1070; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1071; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1072; GFX10-CU-NEXT: buffer_gl0_inv 1073; GFX10-CU-NEXT: buffer_gl1_inv 1074; GFX10-CU-NEXT: ds_write_b32 v0, v1 1075; GFX10-CU-NEXT: s_endpgm 1076; 1077; SKIP-CACHE-INV-LABEL: local_system_seq_cst_ret_atomicrmw: 1078; SKIP-CACHE-INV: ; %bb.0: ; %entry 1079; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1080; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1081; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1082; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 1083; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 1084; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1085; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 1086; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1087; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 1088; SKIP-CACHE-INV-NEXT: s_endpgm 1089 i32 addrspace(3)* %out, i32 %in) { 1090entry: 1091 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in seq_cst 1092 store i32 %val, i32 addrspace(3)* %out, align 4 1093 ret void 1094} 1095 1096define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg( 1097; GFX6-LABEL: local_system_monotonic_monotonic_cmpxchg: 1098; GFX6: ; %bb.0: ; %entry 1099; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1100; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1101; GFX6-NEXT: s_mov_b32 m0, -1 1102; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1103; GFX6-NEXT: v_mov_b32_e32 v0, s2 1104; GFX6-NEXT: v_mov_b32_e32 v1, s1 1105; GFX6-NEXT: v_mov_b32_e32 v2, s0 1106; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1107; GFX6-NEXT: s_endpgm 1108; 1109; GFX7-LABEL: local_system_monotonic_monotonic_cmpxchg: 1110; GFX7: ; %bb.0: ; %entry 1111; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1112; GFX7-NEXT: s_mov_b32 m0, -1 1113; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1114; GFX7-NEXT: v_mov_b32_e32 v0, s0 1115; GFX7-NEXT: v_mov_b32_e32 v1, s2 1116; GFX7-NEXT: v_mov_b32_e32 v2, s1 1117; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1118; GFX7-NEXT: s_endpgm 1119; 1120; GFX10-WGP-LABEL: local_system_monotonic_monotonic_cmpxchg: 1121; GFX10-WGP: ; %bb.0: ; %entry 1122; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1123; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1124; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1125; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1126; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1127; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1128; GFX10-WGP-NEXT: s_endpgm 1129; 1130; GFX10-CU-LABEL: local_system_monotonic_monotonic_cmpxchg: 1131; GFX10-CU: ; %bb.0: ; %entry 1132; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1133; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1134; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1135; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1136; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1137; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1138; GFX10-CU-NEXT: s_endpgm 1139; 1140; SKIP-CACHE-INV-LABEL: local_system_monotonic_monotonic_cmpxchg: 1141; SKIP-CACHE-INV: ; %bb.0: ; %entry 1142; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1143; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1144; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1145; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1146; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1147; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1148; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1149; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1150; SKIP-CACHE-INV-NEXT: s_endpgm 1151 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1152entry: 1153 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1154 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in monotonic monotonic 1155 ret void 1156} 1157 1158define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg( 1159; GFX6-LABEL: local_system_acquire_monotonic_cmpxchg: 1160; GFX6: ; %bb.0: ; %entry 1161; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1162; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1163; GFX6-NEXT: s_mov_b32 m0, -1 1164; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1165; GFX6-NEXT: v_mov_b32_e32 v0, s2 1166; GFX6-NEXT: v_mov_b32_e32 v1, s1 1167; GFX6-NEXT: v_mov_b32_e32 v2, s0 1168; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1169; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1170; GFX6-NEXT: buffer_wbinvl1 1171; GFX6-NEXT: s_endpgm 1172; 1173; GFX7-LABEL: local_system_acquire_monotonic_cmpxchg: 1174; GFX7: ; %bb.0: ; %entry 1175; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1176; GFX7-NEXT: s_mov_b32 m0, -1 1177; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1178; GFX7-NEXT: v_mov_b32_e32 v0, s0 1179; GFX7-NEXT: v_mov_b32_e32 v1, s2 1180; GFX7-NEXT: v_mov_b32_e32 v2, s1 1181; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1182; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1183; GFX7-NEXT: buffer_wbinvl1_vol 1184; GFX7-NEXT: s_endpgm 1185; 1186; GFX10-WGP-LABEL: local_system_acquire_monotonic_cmpxchg: 1187; GFX10-WGP: ; %bb.0: ; %entry 1188; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1189; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1190; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1191; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1192; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1193; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1194; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1195; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1196; GFX10-WGP-NEXT: buffer_gl0_inv 1197; GFX10-WGP-NEXT: buffer_gl1_inv 1198; GFX10-WGP-NEXT: s_endpgm 1199; 1200; GFX10-CU-LABEL: local_system_acquire_monotonic_cmpxchg: 1201; GFX10-CU: ; %bb.0: ; %entry 1202; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1203; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1204; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1205; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1206; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1207; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1208; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1209; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1210; GFX10-CU-NEXT: buffer_gl0_inv 1211; GFX10-CU-NEXT: buffer_gl1_inv 1212; GFX10-CU-NEXT: s_endpgm 1213; 1214; SKIP-CACHE-INV-LABEL: local_system_acquire_monotonic_cmpxchg: 1215; SKIP-CACHE-INV: ; %bb.0: ; %entry 1216; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1217; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1218; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1219; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1220; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1221; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1222; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1223; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1224; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1225; SKIP-CACHE-INV-NEXT: s_endpgm 1226 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1227entry: 1228 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1229 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire monotonic 1230 ret void 1231} 1232 1233define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( 1234; GFX6-LABEL: local_system_release_monotonic_cmpxchg: 1235; GFX6: ; %bb.0: ; %entry 1236; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1237; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1238; GFX6-NEXT: s_mov_b32 m0, -1 1239; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1240; GFX6-NEXT: v_mov_b32_e32 v0, s2 1241; GFX6-NEXT: v_mov_b32_e32 v1, s1 1242; GFX6-NEXT: v_mov_b32_e32 v2, s0 1243; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1244; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1245; GFX6-NEXT: s_endpgm 1246; 1247; GFX7-LABEL: local_system_release_monotonic_cmpxchg: 1248; GFX7: ; %bb.0: ; %entry 1249; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1250; GFX7-NEXT: s_mov_b32 m0, -1 1251; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1252; GFX7-NEXT: v_mov_b32_e32 v0, s0 1253; GFX7-NEXT: v_mov_b32_e32 v1, s2 1254; GFX7-NEXT: v_mov_b32_e32 v2, s1 1255; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1256; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1257; GFX7-NEXT: s_endpgm 1258; 1259; GFX10-WGP-LABEL: local_system_release_monotonic_cmpxchg: 1260; GFX10-WGP: ; %bb.0: ; %entry 1261; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1262; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1263; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1264; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1265; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1266; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1267; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1268; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1269; GFX10-WGP-NEXT: s_endpgm 1270; 1271; GFX10-CU-LABEL: local_system_release_monotonic_cmpxchg: 1272; GFX10-CU: ; %bb.0: ; %entry 1273; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1274; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1275; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1276; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1277; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1278; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1279; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1280; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1281; GFX10-CU-NEXT: s_endpgm 1282; 1283; SKIP-CACHE-INV-LABEL: local_system_release_monotonic_cmpxchg: 1284; SKIP-CACHE-INV: ; %bb.0: ; %entry 1285; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1286; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1287; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1288; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1289; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1290; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1291; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1292; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1293; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1294; SKIP-CACHE-INV-NEXT: s_endpgm 1295 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1296entry: 1297 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1298 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release monotonic 1299 ret void 1300} 1301 1302define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( 1303; GFX6-LABEL: local_system_acq_rel_monotonic_cmpxchg: 1304; GFX6: ; %bb.0: ; %entry 1305; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1306; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1307; GFX6-NEXT: s_mov_b32 m0, -1 1308; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1309; GFX6-NEXT: v_mov_b32_e32 v0, s2 1310; GFX6-NEXT: v_mov_b32_e32 v1, s1 1311; GFX6-NEXT: v_mov_b32_e32 v2, s0 1312; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1313; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1314; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1315; GFX6-NEXT: buffer_wbinvl1 1316; GFX6-NEXT: s_endpgm 1317; 1318; GFX7-LABEL: local_system_acq_rel_monotonic_cmpxchg: 1319; GFX7: ; %bb.0: ; %entry 1320; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1321; GFX7-NEXT: s_mov_b32 m0, -1 1322; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1323; GFX7-NEXT: v_mov_b32_e32 v0, s0 1324; GFX7-NEXT: v_mov_b32_e32 v1, s2 1325; GFX7-NEXT: v_mov_b32_e32 v2, s1 1326; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1327; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1328; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1329; GFX7-NEXT: buffer_wbinvl1_vol 1330; GFX7-NEXT: s_endpgm 1331; 1332; GFX10-WGP-LABEL: local_system_acq_rel_monotonic_cmpxchg: 1333; GFX10-WGP: ; %bb.0: ; %entry 1334; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1335; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1336; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1337; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1338; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1339; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1340; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1341; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1342; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1343; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1344; GFX10-WGP-NEXT: buffer_gl0_inv 1345; GFX10-WGP-NEXT: buffer_gl1_inv 1346; GFX10-WGP-NEXT: s_endpgm 1347; 1348; GFX10-CU-LABEL: local_system_acq_rel_monotonic_cmpxchg: 1349; GFX10-CU: ; %bb.0: ; %entry 1350; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1351; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1352; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1353; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1354; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1355; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1356; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1357; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1358; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1359; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1360; GFX10-CU-NEXT: buffer_gl0_inv 1361; GFX10-CU-NEXT: buffer_gl1_inv 1362; GFX10-CU-NEXT: s_endpgm 1363; 1364; SKIP-CACHE-INV-LABEL: local_system_acq_rel_monotonic_cmpxchg: 1365; SKIP-CACHE-INV: ; %bb.0: ; %entry 1366; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1367; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1368; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1369; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1370; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1371; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1372; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1373; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1374; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1375; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1376; SKIP-CACHE-INV-NEXT: s_endpgm 1377 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1378entry: 1379 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1380 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel monotonic 1381 ret void 1382} 1383 1384define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( 1385; GFX6-LABEL: local_system_seq_cst_monotonic_cmpxchg: 1386; GFX6: ; %bb.0: ; %entry 1387; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1388; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1389; GFX6-NEXT: s_mov_b32 m0, -1 1390; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1391; GFX6-NEXT: v_mov_b32_e32 v0, s2 1392; GFX6-NEXT: v_mov_b32_e32 v1, s1 1393; GFX6-NEXT: v_mov_b32_e32 v2, s0 1394; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1395; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1396; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1397; GFX6-NEXT: buffer_wbinvl1 1398; GFX6-NEXT: s_endpgm 1399; 1400; GFX7-LABEL: local_system_seq_cst_monotonic_cmpxchg: 1401; GFX7: ; %bb.0: ; %entry 1402; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1403; GFX7-NEXT: s_mov_b32 m0, -1 1404; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1405; GFX7-NEXT: v_mov_b32_e32 v0, s0 1406; GFX7-NEXT: v_mov_b32_e32 v1, s2 1407; GFX7-NEXT: v_mov_b32_e32 v2, s1 1408; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1409; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1410; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1411; GFX7-NEXT: buffer_wbinvl1_vol 1412; GFX7-NEXT: s_endpgm 1413; 1414; GFX10-WGP-LABEL: local_system_seq_cst_monotonic_cmpxchg: 1415; GFX10-WGP: ; %bb.0: ; %entry 1416; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1417; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1418; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1419; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1420; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1421; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1422; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1423; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1424; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1425; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1426; GFX10-WGP-NEXT: buffer_gl0_inv 1427; GFX10-WGP-NEXT: buffer_gl1_inv 1428; GFX10-WGP-NEXT: s_endpgm 1429; 1430; GFX10-CU-LABEL: local_system_seq_cst_monotonic_cmpxchg: 1431; GFX10-CU: ; %bb.0: ; %entry 1432; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1433; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1434; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1435; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1436; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1437; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1438; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1439; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1440; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1441; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1442; GFX10-CU-NEXT: buffer_gl0_inv 1443; GFX10-CU-NEXT: buffer_gl1_inv 1444; GFX10-CU-NEXT: s_endpgm 1445; 1446; SKIP-CACHE-INV-LABEL: local_system_seq_cst_monotonic_cmpxchg: 1447; SKIP-CACHE-INV: ; %bb.0: ; %entry 1448; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1449; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1450; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1451; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1452; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1453; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1454; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1455; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1456; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1457; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1458; SKIP-CACHE-INV-NEXT: s_endpgm 1459 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1460entry: 1461 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1462 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst monotonic 1463 ret void 1464} 1465 1466define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg( 1467; GFX6-LABEL: local_system_acquire_acquire_cmpxchg: 1468; GFX6: ; %bb.0: ; %entry 1469; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1470; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1471; GFX6-NEXT: s_mov_b32 m0, -1 1472; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1473; GFX6-NEXT: v_mov_b32_e32 v0, s2 1474; GFX6-NEXT: v_mov_b32_e32 v1, s1 1475; GFX6-NEXT: v_mov_b32_e32 v2, s0 1476; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1477; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1478; GFX6-NEXT: buffer_wbinvl1 1479; GFX6-NEXT: s_endpgm 1480; 1481; GFX7-LABEL: local_system_acquire_acquire_cmpxchg: 1482; GFX7: ; %bb.0: ; %entry 1483; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1484; GFX7-NEXT: s_mov_b32 m0, -1 1485; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1486; GFX7-NEXT: v_mov_b32_e32 v0, s0 1487; GFX7-NEXT: v_mov_b32_e32 v1, s2 1488; GFX7-NEXT: v_mov_b32_e32 v2, s1 1489; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1490; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1491; GFX7-NEXT: buffer_wbinvl1_vol 1492; GFX7-NEXT: s_endpgm 1493; 1494; GFX10-WGP-LABEL: local_system_acquire_acquire_cmpxchg: 1495; GFX10-WGP: ; %bb.0: ; %entry 1496; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1497; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1498; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1499; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1500; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1501; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1502; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1503; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1504; GFX10-WGP-NEXT: buffer_gl0_inv 1505; GFX10-WGP-NEXT: buffer_gl1_inv 1506; GFX10-WGP-NEXT: s_endpgm 1507; 1508; GFX10-CU-LABEL: local_system_acquire_acquire_cmpxchg: 1509; GFX10-CU: ; %bb.0: ; %entry 1510; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1511; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1512; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1513; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1514; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1515; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1516; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1517; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1518; GFX10-CU-NEXT: buffer_gl0_inv 1519; GFX10-CU-NEXT: buffer_gl1_inv 1520; GFX10-CU-NEXT: s_endpgm 1521; 1522; SKIP-CACHE-INV-LABEL: local_system_acquire_acquire_cmpxchg: 1523; SKIP-CACHE-INV: ; %bb.0: ; %entry 1524; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1525; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1526; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1527; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1528; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1529; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1530; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1531; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1532; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1533; SKIP-CACHE-INV-NEXT: s_endpgm 1534 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1535entry: 1536 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1537 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire acquire 1538 ret void 1539} 1540 1541define amdgpu_kernel void @local_system_release_acquire_cmpxchg( 1542; GFX6-LABEL: local_system_release_acquire_cmpxchg: 1543; GFX6: ; %bb.0: ; %entry 1544; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1545; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1546; GFX6-NEXT: s_mov_b32 m0, -1 1547; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1548; GFX6-NEXT: v_mov_b32_e32 v0, s2 1549; GFX6-NEXT: v_mov_b32_e32 v1, s1 1550; GFX6-NEXT: v_mov_b32_e32 v2, s0 1551; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1552; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1553; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1554; GFX6-NEXT: buffer_wbinvl1 1555; GFX6-NEXT: s_endpgm 1556; 1557; GFX7-LABEL: local_system_release_acquire_cmpxchg: 1558; GFX7: ; %bb.0: ; %entry 1559; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1560; GFX7-NEXT: s_mov_b32 m0, -1 1561; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1562; GFX7-NEXT: v_mov_b32_e32 v0, s0 1563; GFX7-NEXT: v_mov_b32_e32 v1, s2 1564; GFX7-NEXT: v_mov_b32_e32 v2, s1 1565; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1566; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1567; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1568; GFX7-NEXT: buffer_wbinvl1_vol 1569; GFX7-NEXT: s_endpgm 1570; 1571; GFX10-WGP-LABEL: local_system_release_acquire_cmpxchg: 1572; GFX10-WGP: ; %bb.0: ; %entry 1573; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1574; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1575; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1576; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1577; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1578; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1579; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1580; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1581; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1582; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1583; GFX10-WGP-NEXT: buffer_gl0_inv 1584; GFX10-WGP-NEXT: buffer_gl1_inv 1585; GFX10-WGP-NEXT: s_endpgm 1586; 1587; GFX10-CU-LABEL: local_system_release_acquire_cmpxchg: 1588; GFX10-CU: ; %bb.0: ; %entry 1589; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1590; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1591; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1592; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1593; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1594; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1595; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1596; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1597; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1598; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1599; GFX10-CU-NEXT: buffer_gl0_inv 1600; GFX10-CU-NEXT: buffer_gl1_inv 1601; GFX10-CU-NEXT: s_endpgm 1602; 1603; SKIP-CACHE-INV-LABEL: local_system_release_acquire_cmpxchg: 1604; SKIP-CACHE-INV: ; %bb.0: ; %entry 1605; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1606; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1607; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1608; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1609; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1610; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1611; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1612; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1613; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1614; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1615; SKIP-CACHE-INV-NEXT: s_endpgm 1616 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1617entry: 1618 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1619 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release acquire 1620 ret void 1621} 1622 1623define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( 1624; GFX6-LABEL: local_system_acq_rel_acquire_cmpxchg: 1625; GFX6: ; %bb.0: ; %entry 1626; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1627; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1628; GFX6-NEXT: s_mov_b32 m0, -1 1629; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1630; GFX6-NEXT: v_mov_b32_e32 v0, s2 1631; GFX6-NEXT: v_mov_b32_e32 v1, s1 1632; GFX6-NEXT: v_mov_b32_e32 v2, s0 1633; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1634; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1635; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1636; GFX6-NEXT: buffer_wbinvl1 1637; GFX6-NEXT: s_endpgm 1638; 1639; GFX7-LABEL: local_system_acq_rel_acquire_cmpxchg: 1640; GFX7: ; %bb.0: ; %entry 1641; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1642; GFX7-NEXT: s_mov_b32 m0, -1 1643; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1644; GFX7-NEXT: v_mov_b32_e32 v0, s0 1645; GFX7-NEXT: v_mov_b32_e32 v1, s2 1646; GFX7-NEXT: v_mov_b32_e32 v2, s1 1647; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1648; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1649; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1650; GFX7-NEXT: buffer_wbinvl1_vol 1651; GFX7-NEXT: s_endpgm 1652; 1653; GFX10-WGP-LABEL: local_system_acq_rel_acquire_cmpxchg: 1654; GFX10-WGP: ; %bb.0: ; %entry 1655; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1656; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1657; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1658; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1659; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1660; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1661; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1662; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1663; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1664; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1665; GFX10-WGP-NEXT: buffer_gl0_inv 1666; GFX10-WGP-NEXT: buffer_gl1_inv 1667; GFX10-WGP-NEXT: s_endpgm 1668; 1669; GFX10-CU-LABEL: local_system_acq_rel_acquire_cmpxchg: 1670; GFX10-CU: ; %bb.0: ; %entry 1671; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1672; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1673; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1674; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1675; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1676; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1677; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1678; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1679; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1680; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1681; GFX10-CU-NEXT: buffer_gl0_inv 1682; GFX10-CU-NEXT: buffer_gl1_inv 1683; GFX10-CU-NEXT: s_endpgm 1684; 1685; SKIP-CACHE-INV-LABEL: local_system_acq_rel_acquire_cmpxchg: 1686; SKIP-CACHE-INV: ; %bb.0: ; %entry 1687; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1688; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1689; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1690; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1691; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1692; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1693; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1694; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1695; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1696; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1697; SKIP-CACHE-INV-NEXT: s_endpgm 1698 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1699entry: 1700 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1701 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel acquire 1702 ret void 1703} 1704 1705define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( 1706; GFX6-LABEL: local_system_seq_cst_acquire_cmpxchg: 1707; GFX6: ; %bb.0: ; %entry 1708; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1709; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1710; GFX6-NEXT: s_mov_b32 m0, -1 1711; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1712; GFX6-NEXT: v_mov_b32_e32 v0, s2 1713; GFX6-NEXT: v_mov_b32_e32 v1, s1 1714; GFX6-NEXT: v_mov_b32_e32 v2, s0 1715; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1716; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1717; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1718; GFX6-NEXT: buffer_wbinvl1 1719; GFX6-NEXT: s_endpgm 1720; 1721; GFX7-LABEL: local_system_seq_cst_acquire_cmpxchg: 1722; GFX7: ; %bb.0: ; %entry 1723; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1724; GFX7-NEXT: s_mov_b32 m0, -1 1725; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1726; GFX7-NEXT: v_mov_b32_e32 v0, s0 1727; GFX7-NEXT: v_mov_b32_e32 v1, s2 1728; GFX7-NEXT: v_mov_b32_e32 v2, s1 1729; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1730; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1731; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1732; GFX7-NEXT: buffer_wbinvl1_vol 1733; GFX7-NEXT: s_endpgm 1734; 1735; GFX10-WGP-LABEL: local_system_seq_cst_acquire_cmpxchg: 1736; GFX10-WGP: ; %bb.0: ; %entry 1737; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1738; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1739; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1740; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1741; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1742; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1743; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1744; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1745; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1746; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1747; GFX10-WGP-NEXT: buffer_gl0_inv 1748; GFX10-WGP-NEXT: buffer_gl1_inv 1749; GFX10-WGP-NEXT: s_endpgm 1750; 1751; GFX10-CU-LABEL: local_system_seq_cst_acquire_cmpxchg: 1752; GFX10-CU: ; %bb.0: ; %entry 1753; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1754; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1755; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1756; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1757; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1758; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1759; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1760; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1761; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1762; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1763; GFX10-CU-NEXT: buffer_gl0_inv 1764; GFX10-CU-NEXT: buffer_gl1_inv 1765; GFX10-CU-NEXT: s_endpgm 1766; 1767; SKIP-CACHE-INV-LABEL: local_system_seq_cst_acquire_cmpxchg: 1768; SKIP-CACHE-INV: ; %bb.0: ; %entry 1769; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1770; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1771; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1772; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1773; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1774; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1775; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1776; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1777; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1778; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1779; SKIP-CACHE-INV-NEXT: s_endpgm 1780 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1781entry: 1782 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1783 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst acquire 1784 ret void 1785} 1786 1787define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( 1788; GFX6-LABEL: local_system_seq_cst_seq_cst_cmpxchg: 1789; GFX6: ; %bb.0: ; %entry 1790; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1791; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1792; GFX6-NEXT: s_mov_b32 m0, -1 1793; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1794; GFX6-NEXT: v_mov_b32_e32 v0, s2 1795; GFX6-NEXT: v_mov_b32_e32 v1, s1 1796; GFX6-NEXT: v_mov_b32_e32 v2, s0 1797; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1798; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1799; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1800; GFX6-NEXT: buffer_wbinvl1 1801; GFX6-NEXT: s_endpgm 1802; 1803; GFX7-LABEL: local_system_seq_cst_seq_cst_cmpxchg: 1804; GFX7: ; %bb.0: ; %entry 1805; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1806; GFX7-NEXT: s_mov_b32 m0, -1 1807; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1808; GFX7-NEXT: v_mov_b32_e32 v0, s0 1809; GFX7-NEXT: v_mov_b32_e32 v1, s2 1810; GFX7-NEXT: v_mov_b32_e32 v2, s1 1811; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1812; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1813; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1814; GFX7-NEXT: buffer_wbinvl1_vol 1815; GFX7-NEXT: s_endpgm 1816; 1817; GFX10-WGP-LABEL: local_system_seq_cst_seq_cst_cmpxchg: 1818; GFX10-WGP: ; %bb.0: ; %entry 1819; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1820; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1821; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1822; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1823; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1824; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1825; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1826; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1827; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1828; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1829; GFX10-WGP-NEXT: buffer_gl0_inv 1830; GFX10-WGP-NEXT: buffer_gl1_inv 1831; GFX10-WGP-NEXT: s_endpgm 1832; 1833; GFX10-CU-LABEL: local_system_seq_cst_seq_cst_cmpxchg: 1834; GFX10-CU: ; %bb.0: ; %entry 1835; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1836; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1837; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1838; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1839; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1840; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1841; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1842; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1843; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1844; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 1845; GFX10-CU-NEXT: buffer_gl0_inv 1846; GFX10-CU-NEXT: buffer_gl1_inv 1847; GFX10-CU-NEXT: s_endpgm 1848; 1849; SKIP-CACHE-INV-LABEL: local_system_seq_cst_seq_cst_cmpxchg: 1850; SKIP-CACHE-INV: ; %bb.0: ; %entry 1851; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1852; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1853; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1854; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1855; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1856; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1857; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1858; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1859; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 1860; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1861; SKIP-CACHE-INV-NEXT: s_endpgm 1862 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1863entry: 1864 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1865 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst seq_cst 1866 ret void 1867} 1868 1869define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg( 1870; GFX6-LABEL: local_system_acquire_monotonic_ret_cmpxchg: 1871; GFX6: ; %bb.0: ; %entry 1872; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1873; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1874; GFX6-NEXT: s_mov_b32 m0, -1 1875; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1876; GFX6-NEXT: v_mov_b32_e32 v0, s2 1877; GFX6-NEXT: v_mov_b32_e32 v1, s1 1878; GFX6-NEXT: v_mov_b32_e32 v2, s0 1879; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 1880; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1881; GFX6-NEXT: buffer_wbinvl1 1882; GFX6-NEXT: ds_write_b32 v0, v1 1883; GFX6-NEXT: s_endpgm 1884; 1885; GFX7-LABEL: local_system_acquire_monotonic_ret_cmpxchg: 1886; GFX7: ; %bb.0: ; %entry 1887; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1888; GFX7-NEXT: s_mov_b32 m0, -1 1889; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1890; GFX7-NEXT: v_mov_b32_e32 v0, s0 1891; GFX7-NEXT: v_mov_b32_e32 v1, s2 1892; GFX7-NEXT: v_mov_b32_e32 v2, s1 1893; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 1894; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1895; GFX7-NEXT: buffer_wbinvl1_vol 1896; GFX7-NEXT: ds_write_b32 v0, v1 1897; GFX7-NEXT: s_endpgm 1898; 1899; GFX10-WGP-LABEL: local_system_acquire_monotonic_ret_cmpxchg: 1900; GFX10-WGP: ; %bb.0: ; %entry 1901; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1902; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1903; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1904; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1905; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1906; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 1907; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1908; GFX10-WGP-NEXT: buffer_gl0_inv 1909; GFX10-WGP-NEXT: buffer_gl1_inv 1910; GFX10-WGP-NEXT: ds_write_b32 v0, v1 1911; GFX10-WGP-NEXT: s_endpgm 1912; 1913; GFX10-CU-LABEL: local_system_acquire_monotonic_ret_cmpxchg: 1914; GFX10-CU: ; %bb.0: ; %entry 1915; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1916; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 1917; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 1918; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 1919; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 1920; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 1921; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1922; GFX10-CU-NEXT: buffer_gl0_inv 1923; GFX10-CU-NEXT: buffer_gl1_inv 1924; GFX10-CU-NEXT: ds_write_b32 v0, v1 1925; GFX10-CU-NEXT: s_endpgm 1926; 1927; SKIP-CACHE-INV-LABEL: local_system_acquire_monotonic_ret_cmpxchg: 1928; SKIP-CACHE-INV: ; %bb.0: ; %entry 1929; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 1930; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 1931; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 1932; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 1933; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 1934; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 1935; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 1936; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 1937; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1938; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 1939; SKIP-CACHE-INV-NEXT: s_endpgm 1940 i32 addrspace(3)* %out, i32 %in, i32 %old) { 1941entry: 1942 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 1943 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire monotonic 1944 %val0 = extractvalue { i32, i1 } %val, 0 1945 store i32 %val0, i32 addrspace(3)* %out, align 4 1946 ret void 1947} 1948 1949define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( 1950; GFX6-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: 1951; GFX6: ; %bb.0: ; %entry 1952; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 1953; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 1954; GFX6-NEXT: s_mov_b32 m0, -1 1955; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1956; GFX6-NEXT: v_mov_b32_e32 v0, s2 1957; GFX6-NEXT: v_mov_b32_e32 v1, s1 1958; GFX6-NEXT: v_mov_b32_e32 v2, s0 1959; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1960; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 1961; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1962; GFX6-NEXT: buffer_wbinvl1 1963; GFX6-NEXT: ds_write_b32 v0, v1 1964; GFX6-NEXT: s_endpgm 1965; 1966; GFX7-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: 1967; GFX7: ; %bb.0: ; %entry 1968; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1969; GFX7-NEXT: s_mov_b32 m0, -1 1970; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1971; GFX7-NEXT: v_mov_b32_e32 v0, s0 1972; GFX7-NEXT: v_mov_b32_e32 v1, s2 1973; GFX7-NEXT: v_mov_b32_e32 v2, s1 1974; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1975; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 1976; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1977; GFX7-NEXT: buffer_wbinvl1_vol 1978; GFX7-NEXT: ds_write_b32 v0, v1 1979; GFX7-NEXT: s_endpgm 1980; 1981; GFX10-WGP-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: 1982; GFX10-WGP: ; %bb.0: ; %entry 1983; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1984; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 1985; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 1986; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 1987; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 1988; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1989; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 1990; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 1991; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1992; GFX10-WGP-NEXT: buffer_gl0_inv 1993; GFX10-WGP-NEXT: buffer_gl1_inv 1994; GFX10-WGP-NEXT: ds_write_b32 v0, v1 1995; GFX10-WGP-NEXT: s_endpgm 1996; 1997; GFX10-CU-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: 1998; GFX10-CU: ; %bb.0: ; %entry 1999; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2000; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2001; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2002; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2003; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2004; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2005; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2006; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2007; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2008; GFX10-CU-NEXT: buffer_gl0_inv 2009; GFX10-CU-NEXT: buffer_gl1_inv 2010; GFX10-CU-NEXT: ds_write_b32 v0, v1 2011; GFX10-CU-NEXT: s_endpgm 2012; 2013; SKIP-CACHE-INV-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: 2014; SKIP-CACHE-INV: ; %bb.0: ; %entry 2015; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2016; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2017; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2018; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2019; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2020; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2021; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2022; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2023; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2024; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2025; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2026; SKIP-CACHE-INV-NEXT: s_endpgm 2027 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2028entry: 2029 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2030 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel monotonic 2031 %val0 = extractvalue { i32, i1 } %val, 0 2032 store i32 %val0, i32 addrspace(3)* %out, align 4 2033 ret void 2034} 2035 2036define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( 2037; GFX6-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: 2038; GFX6: ; %bb.0: ; %entry 2039; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2040; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 2041; GFX6-NEXT: s_mov_b32 m0, -1 2042; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2043; GFX6-NEXT: v_mov_b32_e32 v0, s2 2044; GFX6-NEXT: v_mov_b32_e32 v1, s1 2045; GFX6-NEXT: v_mov_b32_e32 v2, s0 2046; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2047; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2048; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2049; GFX6-NEXT: buffer_wbinvl1 2050; GFX6-NEXT: ds_write_b32 v0, v1 2051; GFX6-NEXT: s_endpgm 2052; 2053; GFX7-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: 2054; GFX7: ; %bb.0: ; %entry 2055; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2056; GFX7-NEXT: s_mov_b32 m0, -1 2057; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2058; GFX7-NEXT: v_mov_b32_e32 v0, s0 2059; GFX7-NEXT: v_mov_b32_e32 v1, s2 2060; GFX7-NEXT: v_mov_b32_e32 v2, s1 2061; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2062; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2063; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2064; GFX7-NEXT: buffer_wbinvl1_vol 2065; GFX7-NEXT: ds_write_b32 v0, v1 2066; GFX7-NEXT: s_endpgm 2067; 2068; GFX10-WGP-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: 2069; GFX10-WGP: ; %bb.0: ; %entry 2070; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2071; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2072; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2073; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2074; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2075; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2076; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2077; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2078; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2079; GFX10-WGP-NEXT: buffer_gl0_inv 2080; GFX10-WGP-NEXT: buffer_gl1_inv 2081; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2082; GFX10-WGP-NEXT: s_endpgm 2083; 2084; GFX10-CU-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: 2085; GFX10-CU: ; %bb.0: ; %entry 2086; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2087; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2088; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2089; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2090; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2091; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2092; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2093; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2094; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2095; GFX10-CU-NEXT: buffer_gl0_inv 2096; GFX10-CU-NEXT: buffer_gl1_inv 2097; GFX10-CU-NEXT: ds_write_b32 v0, v1 2098; GFX10-CU-NEXT: s_endpgm 2099; 2100; SKIP-CACHE-INV-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: 2101; SKIP-CACHE-INV: ; %bb.0: ; %entry 2102; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2103; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2104; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2105; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2106; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2107; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2108; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2109; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2110; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2111; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2112; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2113; SKIP-CACHE-INV-NEXT: s_endpgm 2114 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2115entry: 2116 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2117 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst monotonic 2118 %val0 = extractvalue { i32, i1 } %val, 0 2119 store i32 %val0, i32 addrspace(3)* %out, align 4 2120 ret void 2121} 2122 2123define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg( 2124; GFX6-LABEL: local_system_acquire_acquire_ret_cmpxchg: 2125; GFX6: ; %bb.0: ; %entry 2126; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2127; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 2128; GFX6-NEXT: s_mov_b32 m0, -1 2129; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2130; GFX6-NEXT: v_mov_b32_e32 v0, s2 2131; GFX6-NEXT: v_mov_b32_e32 v1, s1 2132; GFX6-NEXT: v_mov_b32_e32 v2, s0 2133; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2134; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2135; GFX6-NEXT: buffer_wbinvl1 2136; GFX6-NEXT: ds_write_b32 v0, v1 2137; GFX6-NEXT: s_endpgm 2138; 2139; GFX7-LABEL: local_system_acquire_acquire_ret_cmpxchg: 2140; GFX7: ; %bb.0: ; %entry 2141; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2142; GFX7-NEXT: s_mov_b32 m0, -1 2143; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2144; GFX7-NEXT: v_mov_b32_e32 v0, s0 2145; GFX7-NEXT: v_mov_b32_e32 v1, s2 2146; GFX7-NEXT: v_mov_b32_e32 v2, s1 2147; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2148; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2149; GFX7-NEXT: buffer_wbinvl1_vol 2150; GFX7-NEXT: ds_write_b32 v0, v1 2151; GFX7-NEXT: s_endpgm 2152; 2153; GFX10-WGP-LABEL: local_system_acquire_acquire_ret_cmpxchg: 2154; GFX10-WGP: ; %bb.0: ; %entry 2155; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2156; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2157; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2158; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2159; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2160; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2161; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2162; GFX10-WGP-NEXT: buffer_gl0_inv 2163; GFX10-WGP-NEXT: buffer_gl1_inv 2164; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2165; GFX10-WGP-NEXT: s_endpgm 2166; 2167; GFX10-CU-LABEL: local_system_acquire_acquire_ret_cmpxchg: 2168; GFX10-CU: ; %bb.0: ; %entry 2169; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2170; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2171; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2172; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2173; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2174; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2175; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2176; GFX10-CU-NEXT: buffer_gl0_inv 2177; GFX10-CU-NEXT: buffer_gl1_inv 2178; GFX10-CU-NEXT: ds_write_b32 v0, v1 2179; GFX10-CU-NEXT: s_endpgm 2180; 2181; SKIP-CACHE-INV-LABEL: local_system_acquire_acquire_ret_cmpxchg: 2182; SKIP-CACHE-INV: ; %bb.0: ; %entry 2183; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2184; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2185; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2186; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2187; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2188; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2189; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2190; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2191; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2192; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2193; SKIP-CACHE-INV-NEXT: s_endpgm 2194 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2195entry: 2196 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2197 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acquire acquire 2198 %val0 = extractvalue { i32, i1 } %val, 0 2199 store i32 %val0, i32 addrspace(3)* %out, align 4 2200 ret void 2201} 2202 2203define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( 2204; GFX6-LABEL: local_system_release_acquire_ret_cmpxchg: 2205; GFX6: ; %bb.0: ; %entry 2206; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2207; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 2208; GFX6-NEXT: s_mov_b32 m0, -1 2209; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2210; GFX6-NEXT: v_mov_b32_e32 v0, s2 2211; GFX6-NEXT: v_mov_b32_e32 v1, s1 2212; GFX6-NEXT: v_mov_b32_e32 v2, s0 2213; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2214; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2215; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2216; GFX6-NEXT: buffer_wbinvl1 2217; GFX6-NEXT: ds_write_b32 v0, v1 2218; GFX6-NEXT: s_endpgm 2219; 2220; GFX7-LABEL: local_system_release_acquire_ret_cmpxchg: 2221; GFX7: ; %bb.0: ; %entry 2222; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2223; GFX7-NEXT: s_mov_b32 m0, -1 2224; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2225; GFX7-NEXT: v_mov_b32_e32 v0, s0 2226; GFX7-NEXT: v_mov_b32_e32 v1, s2 2227; GFX7-NEXT: v_mov_b32_e32 v2, s1 2228; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2229; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2230; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2231; GFX7-NEXT: buffer_wbinvl1_vol 2232; GFX7-NEXT: ds_write_b32 v0, v1 2233; GFX7-NEXT: s_endpgm 2234; 2235; GFX10-WGP-LABEL: local_system_release_acquire_ret_cmpxchg: 2236; GFX10-WGP: ; %bb.0: ; %entry 2237; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2238; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2239; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2240; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2241; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2242; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2243; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2244; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2245; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2246; GFX10-WGP-NEXT: buffer_gl0_inv 2247; GFX10-WGP-NEXT: buffer_gl1_inv 2248; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2249; GFX10-WGP-NEXT: s_endpgm 2250; 2251; GFX10-CU-LABEL: local_system_release_acquire_ret_cmpxchg: 2252; GFX10-CU: ; %bb.0: ; %entry 2253; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2254; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2255; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2256; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2257; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2258; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2259; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2260; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2261; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2262; GFX10-CU-NEXT: buffer_gl0_inv 2263; GFX10-CU-NEXT: buffer_gl1_inv 2264; GFX10-CU-NEXT: ds_write_b32 v0, v1 2265; GFX10-CU-NEXT: s_endpgm 2266; 2267; SKIP-CACHE-INV-LABEL: local_system_release_acquire_ret_cmpxchg: 2268; SKIP-CACHE-INV: ; %bb.0: ; %entry 2269; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2270; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2271; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2272; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2273; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2274; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2275; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2276; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2277; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2278; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2279; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2280; SKIP-CACHE-INV-NEXT: s_endpgm 2281 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2282entry: 2283 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2284 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in release acquire 2285 %val0 = extractvalue { i32, i1 } %val, 0 2286 store i32 %val0, i32 addrspace(3)* %out, align 4 2287 ret void 2288} 2289 2290define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( 2291; GFX6-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: 2292; GFX6: ; %bb.0: ; %entry 2293; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2294; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 2295; GFX6-NEXT: s_mov_b32 m0, -1 2296; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2297; GFX6-NEXT: v_mov_b32_e32 v0, s2 2298; GFX6-NEXT: v_mov_b32_e32 v1, s1 2299; GFX6-NEXT: v_mov_b32_e32 v2, s0 2300; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2301; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2302; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2303; GFX6-NEXT: buffer_wbinvl1 2304; GFX6-NEXT: ds_write_b32 v0, v1 2305; GFX6-NEXT: s_endpgm 2306; 2307; GFX7-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: 2308; GFX7: ; %bb.0: ; %entry 2309; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2310; GFX7-NEXT: s_mov_b32 m0, -1 2311; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2312; GFX7-NEXT: v_mov_b32_e32 v0, s0 2313; GFX7-NEXT: v_mov_b32_e32 v1, s2 2314; GFX7-NEXT: v_mov_b32_e32 v2, s1 2315; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2316; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2317; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2318; GFX7-NEXT: buffer_wbinvl1_vol 2319; GFX7-NEXT: ds_write_b32 v0, v1 2320; GFX7-NEXT: s_endpgm 2321; 2322; GFX10-WGP-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: 2323; GFX10-WGP: ; %bb.0: ; %entry 2324; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2325; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2326; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2327; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2328; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2329; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2330; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2331; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2332; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2333; GFX10-WGP-NEXT: buffer_gl0_inv 2334; GFX10-WGP-NEXT: buffer_gl1_inv 2335; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2336; GFX10-WGP-NEXT: s_endpgm 2337; 2338; GFX10-CU-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: 2339; GFX10-CU: ; %bb.0: ; %entry 2340; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2341; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2342; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2343; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2344; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2345; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2346; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2347; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2348; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2349; GFX10-CU-NEXT: buffer_gl0_inv 2350; GFX10-CU-NEXT: buffer_gl1_inv 2351; GFX10-CU-NEXT: ds_write_b32 v0, v1 2352; GFX10-CU-NEXT: s_endpgm 2353; 2354; SKIP-CACHE-INV-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: 2355; SKIP-CACHE-INV: ; %bb.0: ; %entry 2356; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2357; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2358; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2359; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2360; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2361; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2362; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2363; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2364; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2365; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2366; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2367; SKIP-CACHE-INV-NEXT: s_endpgm 2368 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2369entry: 2370 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2371 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in acq_rel acquire 2372 %val0 = extractvalue { i32, i1 } %val, 0 2373 store i32 %val0, i32 addrspace(3)* %out, align 4 2374 ret void 2375} 2376 2377define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( 2378; GFX6-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: 2379; GFX6: ; %bb.0: ; %entry 2380; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2381; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 2382; GFX6-NEXT: s_mov_b32 m0, -1 2383; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2384; GFX6-NEXT: v_mov_b32_e32 v0, s2 2385; GFX6-NEXT: v_mov_b32_e32 v1, s1 2386; GFX6-NEXT: v_mov_b32_e32 v2, s0 2387; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2388; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2389; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2390; GFX6-NEXT: buffer_wbinvl1 2391; GFX6-NEXT: ds_write_b32 v0, v1 2392; GFX6-NEXT: s_endpgm 2393; 2394; GFX7-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: 2395; GFX7: ; %bb.0: ; %entry 2396; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2397; GFX7-NEXT: s_mov_b32 m0, -1 2398; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2399; GFX7-NEXT: v_mov_b32_e32 v0, s0 2400; GFX7-NEXT: v_mov_b32_e32 v1, s2 2401; GFX7-NEXT: v_mov_b32_e32 v2, s1 2402; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2403; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2404; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2405; GFX7-NEXT: buffer_wbinvl1_vol 2406; GFX7-NEXT: ds_write_b32 v0, v1 2407; GFX7-NEXT: s_endpgm 2408; 2409; GFX10-WGP-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: 2410; GFX10-WGP: ; %bb.0: ; %entry 2411; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2412; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2413; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2414; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2415; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2416; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2417; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2418; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2419; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2420; GFX10-WGP-NEXT: buffer_gl0_inv 2421; GFX10-WGP-NEXT: buffer_gl1_inv 2422; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2423; GFX10-WGP-NEXT: s_endpgm 2424; 2425; GFX10-CU-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: 2426; GFX10-CU: ; %bb.0: ; %entry 2427; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2428; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2429; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2430; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2431; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2432; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2433; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2434; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2435; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2436; GFX10-CU-NEXT: buffer_gl0_inv 2437; GFX10-CU-NEXT: buffer_gl1_inv 2438; GFX10-CU-NEXT: ds_write_b32 v0, v1 2439; GFX10-CU-NEXT: s_endpgm 2440; 2441; SKIP-CACHE-INV-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: 2442; SKIP-CACHE-INV: ; %bb.0: ; %entry 2443; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2444; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2445; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2446; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2447; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2448; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2449; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2450; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2451; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2452; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2453; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2454; SKIP-CACHE-INV-NEXT: s_endpgm 2455 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2456entry: 2457 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2458 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst acquire 2459 %val0 = extractvalue { i32, i1 } %val, 0 2460 store i32 %val0, i32 addrspace(3)* %out, align 4 2461 ret void 2462} 2463 2464define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( 2465; GFX6-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: 2466; GFX6: ; %bb.0: ; %entry 2467; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2468; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 2469; GFX6-NEXT: s_mov_b32 m0, -1 2470; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2471; GFX6-NEXT: v_mov_b32_e32 v0, s2 2472; GFX6-NEXT: v_mov_b32_e32 v1, s1 2473; GFX6-NEXT: v_mov_b32_e32 v2, s0 2474; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2475; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2476; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2477; GFX6-NEXT: buffer_wbinvl1 2478; GFX6-NEXT: ds_write_b32 v0, v1 2479; GFX6-NEXT: s_endpgm 2480; 2481; GFX7-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: 2482; GFX7: ; %bb.0: ; %entry 2483; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2484; GFX7-NEXT: s_mov_b32 m0, -1 2485; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2486; GFX7-NEXT: v_mov_b32_e32 v0, s0 2487; GFX7-NEXT: v_mov_b32_e32 v1, s2 2488; GFX7-NEXT: v_mov_b32_e32 v2, s1 2489; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2490; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2491; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2492; GFX7-NEXT: buffer_wbinvl1_vol 2493; GFX7-NEXT: ds_write_b32 v0, v1 2494; GFX7-NEXT: s_endpgm 2495; 2496; GFX10-WGP-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: 2497; GFX10-WGP: ; %bb.0: ; %entry 2498; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2499; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2500; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2501; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 2502; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 2503; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2504; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 2505; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2506; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2507; GFX10-WGP-NEXT: buffer_gl0_inv 2508; GFX10-WGP-NEXT: buffer_gl1_inv 2509; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2510; GFX10-WGP-NEXT: s_endpgm 2511; 2512; GFX10-CU-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: 2513; GFX10-CU: ; %bb.0: ; %entry 2514; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2515; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2516; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2517; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 2518; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 2519; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2520; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 2521; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2522; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2523; GFX10-CU-NEXT: buffer_gl0_inv 2524; GFX10-CU-NEXT: buffer_gl1_inv 2525; GFX10-CU-NEXT: ds_write_b32 v0, v1 2526; GFX10-CU-NEXT: s_endpgm 2527; 2528; SKIP-CACHE-INV-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: 2529; SKIP-CACHE-INV: ; %bb.0: ; %entry 2530; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 2531; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 2532; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2533; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2534; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 2535; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2536; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 2537; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2538; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 2539; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2540; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2541; SKIP-CACHE-INV-NEXT: s_endpgm 2542 i32 addrspace(3)* %out, i32 %in, i32 %old) { 2543entry: 2544 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 2545 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in seq_cst seq_cst 2546 %val0 = extractvalue { i32, i1 } %val, 0 2547 store i32 %val0, i32 addrspace(3)* %out, align 4 2548 ret void 2549} 2550 2551define amdgpu_kernel void @local_system_one_as_unordered_load( 2552; GFX6-LABEL: local_system_one_as_unordered_load: 2553; GFX6: ; %bb.0: ; %entry 2554; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2555; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 2556; GFX6-NEXT: s_mov_b32 m0, -1 2557; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2558; GFX6-NEXT: v_mov_b32_e32 v0, s2 2559; GFX6-NEXT: ds_read_b32 v0, v0 2560; GFX6-NEXT: v_mov_b32_e32 v1, s0 2561; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2562; GFX6-NEXT: ds_write_b32 v1, v0 2563; GFX6-NEXT: s_endpgm 2564; 2565; GFX7-LABEL: local_system_one_as_unordered_load: 2566; GFX7: ; %bb.0: ; %entry 2567; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2568; GFX7-NEXT: s_mov_b32 m0, -1 2569; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2570; GFX7-NEXT: v_mov_b32_e32 v0, s0 2571; GFX7-NEXT: ds_read_b32 v0, v0 2572; GFX7-NEXT: v_mov_b32_e32 v1, s1 2573; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2574; GFX7-NEXT: ds_write_b32 v1, v0 2575; GFX7-NEXT: s_endpgm 2576; 2577; GFX10-WGP-LABEL: local_system_one_as_unordered_load: 2578; GFX10-WGP: ; %bb.0: ; %entry 2579; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2580; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2581; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2582; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2583; GFX10-WGP-NEXT: ds_read_b32 v0, v0 2584; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2585; GFX10-WGP-NEXT: ds_write_b32 v1, v0 2586; GFX10-WGP-NEXT: s_endpgm 2587; 2588; GFX10-CU-LABEL: local_system_one_as_unordered_load: 2589; GFX10-CU: ; %bb.0: ; %entry 2590; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2591; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2592; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2593; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2594; GFX10-CU-NEXT: ds_read_b32 v0, v0 2595; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2596; GFX10-CU-NEXT: ds_write_b32 v1, v0 2597; GFX10-CU-NEXT: s_endpgm 2598; 2599; SKIP-CACHE-INV-LABEL: local_system_one_as_unordered_load: 2600; SKIP-CACHE-INV: ; %bb.0: ; %entry 2601; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 2602; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2603; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2604; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2605; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 2606; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2607; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2608; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 2609; SKIP-CACHE-INV-NEXT: s_endpgm 2610 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 2611entry: 2612 %val = load atomic i32, i32 addrspace(3)* %in syncscope("one-as") unordered, align 4 2613 store i32 %val, i32 addrspace(3)* %out 2614 ret void 2615} 2616 2617define amdgpu_kernel void @local_system_one_as_monotonic_load( 2618; GFX6-LABEL: local_system_one_as_monotonic_load: 2619; GFX6: ; %bb.0: ; %entry 2620; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2621; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 2622; GFX6-NEXT: s_mov_b32 m0, -1 2623; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2624; GFX6-NEXT: v_mov_b32_e32 v0, s2 2625; GFX6-NEXT: ds_read_b32 v0, v0 2626; GFX6-NEXT: v_mov_b32_e32 v1, s0 2627; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2628; GFX6-NEXT: ds_write_b32 v1, v0 2629; GFX6-NEXT: s_endpgm 2630; 2631; GFX7-LABEL: local_system_one_as_monotonic_load: 2632; GFX7: ; %bb.0: ; %entry 2633; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2634; GFX7-NEXT: s_mov_b32 m0, -1 2635; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2636; GFX7-NEXT: v_mov_b32_e32 v0, s0 2637; GFX7-NEXT: ds_read_b32 v0, v0 2638; GFX7-NEXT: v_mov_b32_e32 v1, s1 2639; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2640; GFX7-NEXT: ds_write_b32 v1, v0 2641; GFX7-NEXT: s_endpgm 2642; 2643; GFX10-WGP-LABEL: local_system_one_as_monotonic_load: 2644; GFX10-WGP: ; %bb.0: ; %entry 2645; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2646; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2647; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2648; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2649; GFX10-WGP-NEXT: ds_read_b32 v0, v0 2650; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2651; GFX10-WGP-NEXT: ds_write_b32 v1, v0 2652; GFX10-WGP-NEXT: s_endpgm 2653; 2654; GFX10-CU-LABEL: local_system_one_as_monotonic_load: 2655; GFX10-CU: ; %bb.0: ; %entry 2656; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2657; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2658; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2659; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2660; GFX10-CU-NEXT: ds_read_b32 v0, v0 2661; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2662; GFX10-CU-NEXT: ds_write_b32 v1, v0 2663; GFX10-CU-NEXT: s_endpgm 2664; 2665; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_load: 2666; SKIP-CACHE-INV: ; %bb.0: ; %entry 2667; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 2668; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2669; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2670; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2671; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 2672; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2673; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2674; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 2675; SKIP-CACHE-INV-NEXT: s_endpgm 2676 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 2677entry: 2678 %val = load atomic i32, i32 addrspace(3)* %in syncscope("one-as") monotonic, align 4 2679 store i32 %val, i32 addrspace(3)* %out 2680 ret void 2681} 2682 2683define amdgpu_kernel void @local_system_one_as_acquire_load( 2684; GFX6-LABEL: local_system_one_as_acquire_load: 2685; GFX6: ; %bb.0: ; %entry 2686; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2687; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 2688; GFX6-NEXT: s_mov_b32 m0, -1 2689; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2690; GFX6-NEXT: v_mov_b32_e32 v0, s2 2691; GFX6-NEXT: ds_read_b32 v0, v0 2692; GFX6-NEXT: v_mov_b32_e32 v1, s0 2693; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2694; GFX6-NEXT: ds_write_b32 v1, v0 2695; GFX6-NEXT: s_endpgm 2696; 2697; GFX7-LABEL: local_system_one_as_acquire_load: 2698; GFX7: ; %bb.0: ; %entry 2699; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2700; GFX7-NEXT: s_mov_b32 m0, -1 2701; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2702; GFX7-NEXT: v_mov_b32_e32 v0, s0 2703; GFX7-NEXT: ds_read_b32 v0, v0 2704; GFX7-NEXT: v_mov_b32_e32 v1, s1 2705; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2706; GFX7-NEXT: ds_write_b32 v1, v0 2707; GFX7-NEXT: s_endpgm 2708; 2709; GFX10-WGP-LABEL: local_system_one_as_acquire_load: 2710; GFX10-WGP: ; %bb.0: ; %entry 2711; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2712; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2713; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2714; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2715; GFX10-WGP-NEXT: ds_read_b32 v0, v0 2716; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2717; GFX10-WGP-NEXT: ds_write_b32 v1, v0 2718; GFX10-WGP-NEXT: s_endpgm 2719; 2720; GFX10-CU-LABEL: local_system_one_as_acquire_load: 2721; GFX10-CU: ; %bb.0: ; %entry 2722; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2723; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2724; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2725; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2726; GFX10-CU-NEXT: ds_read_b32 v0, v0 2727; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2728; GFX10-CU-NEXT: ds_write_b32 v1, v0 2729; GFX10-CU-NEXT: s_endpgm 2730; 2731; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_load: 2732; SKIP-CACHE-INV: ; %bb.0: ; %entry 2733; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 2734; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2735; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2736; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2737; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 2738; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2739; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2740; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 2741; SKIP-CACHE-INV-NEXT: s_endpgm 2742 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 2743entry: 2744 %val = load atomic i32, i32 addrspace(3)* %in syncscope("one-as") acquire, align 4 2745 store i32 %val, i32 addrspace(3)* %out 2746 ret void 2747} 2748 2749define amdgpu_kernel void @local_system_one_as_seq_cst_load( 2750; GFX6-LABEL: local_system_one_as_seq_cst_load: 2751; GFX6: ; %bb.0: ; %entry 2752; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2753; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 2754; GFX6-NEXT: s_mov_b32 m0, -1 2755; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2756; GFX6-NEXT: v_mov_b32_e32 v0, s2 2757; GFX6-NEXT: ds_read_b32 v0, v0 2758; GFX6-NEXT: v_mov_b32_e32 v1, s0 2759; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2760; GFX6-NEXT: ds_write_b32 v1, v0 2761; GFX6-NEXT: s_endpgm 2762; 2763; GFX7-LABEL: local_system_one_as_seq_cst_load: 2764; GFX7: ; %bb.0: ; %entry 2765; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2766; GFX7-NEXT: s_mov_b32 m0, -1 2767; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2768; GFX7-NEXT: v_mov_b32_e32 v0, s0 2769; GFX7-NEXT: ds_read_b32 v0, v0 2770; GFX7-NEXT: v_mov_b32_e32 v1, s1 2771; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2772; GFX7-NEXT: ds_write_b32 v1, v0 2773; GFX7-NEXT: s_endpgm 2774; 2775; GFX10-WGP-LABEL: local_system_one_as_seq_cst_load: 2776; GFX10-WGP: ; %bb.0: ; %entry 2777; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2778; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2779; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 2780; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 2781; GFX10-WGP-NEXT: ds_read_b32 v0, v0 2782; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2783; GFX10-WGP-NEXT: ds_write_b32 v1, v0 2784; GFX10-WGP-NEXT: s_endpgm 2785; 2786; GFX10-CU-LABEL: local_system_one_as_seq_cst_load: 2787; GFX10-CU: ; %bb.0: ; %entry 2788; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2789; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2790; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 2791; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 2792; GFX10-CU-NEXT: ds_read_b32 v0, v0 2793; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2794; GFX10-CU-NEXT: ds_write_b32 v1, v0 2795; GFX10-CU-NEXT: s_endpgm 2796; 2797; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_load: 2798; SKIP-CACHE-INV: ; %bb.0: ; %entry 2799; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 2800; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2801; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2802; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 2803; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 2804; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 2805; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2806; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0 2807; SKIP-CACHE-INV-NEXT: s_endpgm 2808 i32 addrspace(3)* %in, i32 addrspace(3)* %out) { 2809entry: 2810 %val = load atomic i32, i32 addrspace(3)* %in syncscope("one-as") seq_cst, align 4 2811 store i32 %val, i32 addrspace(3)* %out 2812 ret void 2813} 2814 2815define amdgpu_kernel void @local_system_one_as_unordered_store( 2816; GFX6-LABEL: local_system_one_as_unordered_store: 2817; GFX6: ; %bb.0: ; %entry 2818; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2819; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 2820; GFX6-NEXT: s_mov_b32 m0, -1 2821; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2822; GFX6-NEXT: v_mov_b32_e32 v1, s2 2823; GFX6-NEXT: v_mov_b32_e32 v0, s0 2824; GFX6-NEXT: ds_write_b32 v0, v1 2825; GFX6-NEXT: s_endpgm 2826; 2827; GFX7-LABEL: local_system_one_as_unordered_store: 2828; GFX7: ; %bb.0: ; %entry 2829; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2830; GFX7-NEXT: s_mov_b32 m0, -1 2831; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2832; GFX7-NEXT: v_mov_b32_e32 v0, s1 2833; GFX7-NEXT: v_mov_b32_e32 v1, s0 2834; GFX7-NEXT: ds_write_b32 v0, v1 2835; GFX7-NEXT: s_endpgm 2836; 2837; GFX10-WGP-LABEL: local_system_one_as_unordered_store: 2838; GFX10-WGP: ; %bb.0: ; %entry 2839; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2840; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2841; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 2842; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 2843; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2844; GFX10-WGP-NEXT: s_endpgm 2845; 2846; GFX10-CU-LABEL: local_system_one_as_unordered_store: 2847; GFX10-CU: ; %bb.0: ; %entry 2848; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2849; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2850; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 2851; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 2852; GFX10-CU-NEXT: ds_write_b32 v0, v1 2853; GFX10-CU-NEXT: s_endpgm 2854; 2855; SKIP-CACHE-INV-LABEL: local_system_one_as_unordered_store: 2856; SKIP-CACHE-INV: ; %bb.0: ; %entry 2857; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 2858; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2859; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2860; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 2861; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2862; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2863; SKIP-CACHE-INV-NEXT: s_endpgm 2864 i32 %in, i32 addrspace(3)* %out) { 2865entry: 2866 store atomic i32 %in, i32 addrspace(3)* %out syncscope("one-as") unordered, align 4 2867 ret void 2868} 2869 2870define amdgpu_kernel void @local_system_one_as_monotonic_store( 2871; GFX6-LABEL: local_system_one_as_monotonic_store: 2872; GFX6: ; %bb.0: ; %entry 2873; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2874; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 2875; GFX6-NEXT: s_mov_b32 m0, -1 2876; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2877; GFX6-NEXT: v_mov_b32_e32 v1, s2 2878; GFX6-NEXT: v_mov_b32_e32 v0, s0 2879; GFX6-NEXT: ds_write_b32 v0, v1 2880; GFX6-NEXT: s_endpgm 2881; 2882; GFX7-LABEL: local_system_one_as_monotonic_store: 2883; GFX7: ; %bb.0: ; %entry 2884; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2885; GFX7-NEXT: s_mov_b32 m0, -1 2886; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2887; GFX7-NEXT: v_mov_b32_e32 v0, s1 2888; GFX7-NEXT: v_mov_b32_e32 v1, s0 2889; GFX7-NEXT: ds_write_b32 v0, v1 2890; GFX7-NEXT: s_endpgm 2891; 2892; GFX10-WGP-LABEL: local_system_one_as_monotonic_store: 2893; GFX10-WGP: ; %bb.0: ; %entry 2894; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2895; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2896; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 2897; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 2898; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2899; GFX10-WGP-NEXT: s_endpgm 2900; 2901; GFX10-CU-LABEL: local_system_one_as_monotonic_store: 2902; GFX10-CU: ; %bb.0: ; %entry 2903; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2904; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2905; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 2906; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 2907; GFX10-CU-NEXT: ds_write_b32 v0, v1 2908; GFX10-CU-NEXT: s_endpgm 2909; 2910; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_store: 2911; SKIP-CACHE-INV: ; %bb.0: ; %entry 2912; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 2913; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2914; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2915; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 2916; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2917; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2918; SKIP-CACHE-INV-NEXT: s_endpgm 2919 i32 %in, i32 addrspace(3)* %out) { 2920entry: 2921 store atomic i32 %in, i32 addrspace(3)* %out syncscope("one-as") monotonic, align 4 2922 ret void 2923} 2924 2925define amdgpu_kernel void @local_system_one_as_release_store( 2926; GFX6-LABEL: local_system_one_as_release_store: 2927; GFX6: ; %bb.0: ; %entry 2928; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2929; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 2930; GFX6-NEXT: s_mov_b32 m0, -1 2931; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2932; GFX6-NEXT: v_mov_b32_e32 v1, s2 2933; GFX6-NEXT: v_mov_b32_e32 v0, s0 2934; GFX6-NEXT: ds_write_b32 v0, v1 2935; GFX6-NEXT: s_endpgm 2936; 2937; GFX7-LABEL: local_system_one_as_release_store: 2938; GFX7: ; %bb.0: ; %entry 2939; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2940; GFX7-NEXT: s_mov_b32 m0, -1 2941; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2942; GFX7-NEXT: v_mov_b32_e32 v0, s1 2943; GFX7-NEXT: v_mov_b32_e32 v1, s0 2944; GFX7-NEXT: ds_write_b32 v0, v1 2945; GFX7-NEXT: s_endpgm 2946; 2947; GFX10-WGP-LABEL: local_system_one_as_release_store: 2948; GFX10-WGP: ; %bb.0: ; %entry 2949; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2950; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 2951; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 2952; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 2953; GFX10-WGP-NEXT: ds_write_b32 v0, v1 2954; GFX10-WGP-NEXT: s_endpgm 2955; 2956; GFX10-CU-LABEL: local_system_one_as_release_store: 2957; GFX10-CU: ; %bb.0: ; %entry 2958; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2959; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 2960; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 2961; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 2962; GFX10-CU-NEXT: ds_write_b32 v0, v1 2963; GFX10-CU-NEXT: s_endpgm 2964; 2965; SKIP-CACHE-INV-LABEL: local_system_one_as_release_store: 2966; SKIP-CACHE-INV: ; %bb.0: ; %entry 2967; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 2968; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 2969; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 2970; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 2971; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 2972; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 2973; SKIP-CACHE-INV-NEXT: s_endpgm 2974 i32 %in, i32 addrspace(3)* %out) { 2975entry: 2976 store atomic i32 %in, i32 addrspace(3)* %out syncscope("one-as") release, align 4 2977 ret void 2978} 2979 2980define amdgpu_kernel void @local_system_one_as_seq_cst_store( 2981; GFX6-LABEL: local_system_one_as_seq_cst_store: 2982; GFX6: ; %bb.0: ; %entry 2983; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 2984; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 2985; GFX6-NEXT: s_mov_b32 m0, -1 2986; GFX6-NEXT: s_waitcnt lgkmcnt(0) 2987; GFX6-NEXT: v_mov_b32_e32 v1, s2 2988; GFX6-NEXT: v_mov_b32_e32 v0, s0 2989; GFX6-NEXT: ds_write_b32 v0, v1 2990; GFX6-NEXT: s_endpgm 2991; 2992; GFX7-LABEL: local_system_one_as_seq_cst_store: 2993; GFX7: ; %bb.0: ; %entry 2994; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 2995; GFX7-NEXT: s_mov_b32 m0, -1 2996; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2997; GFX7-NEXT: v_mov_b32_e32 v0, s1 2998; GFX7-NEXT: v_mov_b32_e32 v1, s0 2999; GFX7-NEXT: ds_write_b32 v0, v1 3000; GFX7-NEXT: s_endpgm 3001; 3002; GFX10-WGP-LABEL: local_system_one_as_seq_cst_store: 3003; GFX10-WGP: ; %bb.0: ; %entry 3004; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3005; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3006; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 3007; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 3008; GFX10-WGP-NEXT: ds_write_b32 v0, v1 3009; GFX10-WGP-NEXT: s_endpgm 3010; 3011; GFX10-CU-LABEL: local_system_one_as_seq_cst_store: 3012; GFX10-CU: ; %bb.0: ; %entry 3013; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3014; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3015; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 3016; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 3017; GFX10-CU-NEXT: ds_write_b32 v0, v1 3018; GFX10-CU-NEXT: s_endpgm 3019; 3020; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_store: 3021; SKIP-CACHE-INV: ; %bb.0: ; %entry 3022; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3023; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3024; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3025; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 3026; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3027; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 3028; SKIP-CACHE-INV-NEXT: s_endpgm 3029 i32 %in, i32 addrspace(3)* %out) { 3030entry: 3031 store atomic i32 %in, i32 addrspace(3)* %out syncscope("one-as") seq_cst, align 4 3032 ret void 3033} 3034 3035define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw( 3036; GFX6-LABEL: local_system_one_as_monotonic_atomicrmw: 3037; GFX6: ; %bb.0: ; %entry 3038; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3039; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 3040; GFX6-NEXT: s_mov_b32 m0, -1 3041; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3042; GFX6-NEXT: v_mov_b32_e32 v0, s2 3043; GFX6-NEXT: v_mov_b32_e32 v1, s0 3044; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3045; GFX6-NEXT: s_endpgm 3046; 3047; GFX7-LABEL: local_system_one_as_monotonic_atomicrmw: 3048; GFX7: ; %bb.0: ; %entry 3049; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3050; GFX7-NEXT: s_mov_b32 m0, -1 3051; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3052; GFX7-NEXT: v_mov_b32_e32 v0, s0 3053; GFX7-NEXT: v_mov_b32_e32 v1, s1 3054; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3055; GFX7-NEXT: s_endpgm 3056; 3057; GFX10-WGP-LABEL: local_system_one_as_monotonic_atomicrmw: 3058; GFX10-WGP: ; %bb.0: ; %entry 3059; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3060; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3061; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3062; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3063; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3064; GFX10-WGP-NEXT: s_endpgm 3065; 3066; GFX10-CU-LABEL: local_system_one_as_monotonic_atomicrmw: 3067; GFX10-CU: ; %bb.0: ; %entry 3068; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3069; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3070; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3071; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3072; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3073; GFX10-CU-NEXT: s_endpgm 3074; 3075; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_atomicrmw: 3076; SKIP-CACHE-INV: ; %bb.0: ; %entry 3077; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3078; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3079; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3080; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3081; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3082; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3083; SKIP-CACHE-INV-NEXT: s_endpgm 3084 i32 addrspace(3)* %out, i32 %in) { 3085entry: 3086 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") monotonic 3087 ret void 3088} 3089 3090define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw( 3091; GFX6-LABEL: local_system_one_as_acquire_atomicrmw: 3092; GFX6: ; %bb.0: ; %entry 3093; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3094; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 3095; GFX6-NEXT: s_mov_b32 m0, -1 3096; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3097; GFX6-NEXT: v_mov_b32_e32 v0, s2 3098; GFX6-NEXT: v_mov_b32_e32 v1, s0 3099; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3100; GFX6-NEXT: s_endpgm 3101; 3102; GFX7-LABEL: local_system_one_as_acquire_atomicrmw: 3103; GFX7: ; %bb.0: ; %entry 3104; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3105; GFX7-NEXT: s_mov_b32 m0, -1 3106; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3107; GFX7-NEXT: v_mov_b32_e32 v0, s0 3108; GFX7-NEXT: v_mov_b32_e32 v1, s1 3109; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3110; GFX7-NEXT: s_endpgm 3111; 3112; GFX10-WGP-LABEL: local_system_one_as_acquire_atomicrmw: 3113; GFX10-WGP: ; %bb.0: ; %entry 3114; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3115; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3116; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3117; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3118; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3119; GFX10-WGP-NEXT: s_endpgm 3120; 3121; GFX10-CU-LABEL: local_system_one_as_acquire_atomicrmw: 3122; GFX10-CU: ; %bb.0: ; %entry 3123; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3124; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3125; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3126; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3127; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3128; GFX10-CU-NEXT: s_endpgm 3129; 3130; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_atomicrmw: 3131; SKIP-CACHE-INV: ; %bb.0: ; %entry 3132; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3133; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3134; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3135; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3136; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3137; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3138; SKIP-CACHE-INV-NEXT: s_endpgm 3139 i32 addrspace(3)* %out, i32 %in) { 3140entry: 3141 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") acquire 3142 ret void 3143} 3144 3145define amdgpu_kernel void @local_system_one_as_release_atomicrmw( 3146; GFX6-LABEL: local_system_one_as_release_atomicrmw: 3147; GFX6: ; %bb.0: ; %entry 3148; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3149; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 3150; GFX6-NEXT: s_mov_b32 m0, -1 3151; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3152; GFX6-NEXT: v_mov_b32_e32 v0, s2 3153; GFX6-NEXT: v_mov_b32_e32 v1, s0 3154; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3155; GFX6-NEXT: s_endpgm 3156; 3157; GFX7-LABEL: local_system_one_as_release_atomicrmw: 3158; GFX7: ; %bb.0: ; %entry 3159; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3160; GFX7-NEXT: s_mov_b32 m0, -1 3161; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3162; GFX7-NEXT: v_mov_b32_e32 v0, s0 3163; GFX7-NEXT: v_mov_b32_e32 v1, s1 3164; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3165; GFX7-NEXT: s_endpgm 3166; 3167; GFX10-WGP-LABEL: local_system_one_as_release_atomicrmw: 3168; GFX10-WGP: ; %bb.0: ; %entry 3169; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3170; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3171; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3172; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3173; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3174; GFX10-WGP-NEXT: s_endpgm 3175; 3176; GFX10-CU-LABEL: local_system_one_as_release_atomicrmw: 3177; GFX10-CU: ; %bb.0: ; %entry 3178; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3179; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3180; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3181; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3182; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3183; GFX10-CU-NEXT: s_endpgm 3184; 3185; SKIP-CACHE-INV-LABEL: local_system_one_as_release_atomicrmw: 3186; SKIP-CACHE-INV: ; %bb.0: ; %entry 3187; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3188; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3189; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3190; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3191; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3192; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3193; SKIP-CACHE-INV-NEXT: s_endpgm 3194 i32 addrspace(3)* %out, i32 %in) { 3195entry: 3196 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") release 3197 ret void 3198} 3199 3200define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw( 3201; GFX6-LABEL: local_system_one_as_acq_rel_atomicrmw: 3202; GFX6: ; %bb.0: ; %entry 3203; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3204; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 3205; GFX6-NEXT: s_mov_b32 m0, -1 3206; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3207; GFX6-NEXT: v_mov_b32_e32 v0, s2 3208; GFX6-NEXT: v_mov_b32_e32 v1, s0 3209; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3210; GFX6-NEXT: s_endpgm 3211; 3212; GFX7-LABEL: local_system_one_as_acq_rel_atomicrmw: 3213; GFX7: ; %bb.0: ; %entry 3214; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3215; GFX7-NEXT: s_mov_b32 m0, -1 3216; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3217; GFX7-NEXT: v_mov_b32_e32 v0, s0 3218; GFX7-NEXT: v_mov_b32_e32 v1, s1 3219; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3220; GFX7-NEXT: s_endpgm 3221; 3222; GFX10-WGP-LABEL: local_system_one_as_acq_rel_atomicrmw: 3223; GFX10-WGP: ; %bb.0: ; %entry 3224; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3225; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3226; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3227; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3228; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3229; GFX10-WGP-NEXT: s_endpgm 3230; 3231; GFX10-CU-LABEL: local_system_one_as_acq_rel_atomicrmw: 3232; GFX10-CU: ; %bb.0: ; %entry 3233; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3234; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3235; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3236; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3237; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3238; GFX10-CU-NEXT: s_endpgm 3239; 3240; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_atomicrmw: 3241; SKIP-CACHE-INV: ; %bb.0: ; %entry 3242; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3243; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3244; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3245; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3246; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3247; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3248; SKIP-CACHE-INV-NEXT: s_endpgm 3249 i32 addrspace(3)* %out, i32 %in) { 3250entry: 3251 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") acq_rel 3252 ret void 3253} 3254 3255define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw( 3256; GFX6-LABEL: local_system_one_as_seq_cst_atomicrmw: 3257; GFX6: ; %bb.0: ; %entry 3258; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3259; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 3260; GFX6-NEXT: s_mov_b32 m0, -1 3261; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3262; GFX6-NEXT: v_mov_b32_e32 v0, s2 3263; GFX6-NEXT: v_mov_b32_e32 v1, s0 3264; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3265; GFX6-NEXT: s_endpgm 3266; 3267; GFX7-LABEL: local_system_one_as_seq_cst_atomicrmw: 3268; GFX7: ; %bb.0: ; %entry 3269; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3270; GFX7-NEXT: s_mov_b32 m0, -1 3271; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3272; GFX7-NEXT: v_mov_b32_e32 v0, s0 3273; GFX7-NEXT: v_mov_b32_e32 v1, s1 3274; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3275; GFX7-NEXT: s_endpgm 3276; 3277; GFX10-WGP-LABEL: local_system_one_as_seq_cst_atomicrmw: 3278; GFX10-WGP: ; %bb.0: ; %entry 3279; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3280; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3281; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3282; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3283; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3284; GFX10-WGP-NEXT: s_endpgm 3285; 3286; GFX10-CU-LABEL: local_system_one_as_seq_cst_atomicrmw: 3287; GFX10-CU: ; %bb.0: ; %entry 3288; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3289; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3290; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3291; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3292; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3293; GFX10-CU-NEXT: s_endpgm 3294; 3295; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_atomicrmw: 3296; SKIP-CACHE-INV: ; %bb.0: ; %entry 3297; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3298; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3299; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3300; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3301; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3302; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 3303; SKIP-CACHE-INV-NEXT: s_endpgm 3304 i32 addrspace(3)* %out, i32 %in) { 3305entry: 3306 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") seq_cst 3307 ret void 3308} 3309 3310define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw( 3311; GFX6-LABEL: local_system_one_as_acquire_ret_atomicrmw: 3312; GFX6: ; %bb.0: ; %entry 3313; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3314; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 3315; GFX6-NEXT: s_mov_b32 m0, -1 3316; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3317; GFX6-NEXT: v_mov_b32_e32 v0, s2 3318; GFX6-NEXT: v_mov_b32_e32 v1, s0 3319; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3320; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3321; GFX6-NEXT: ds_write_b32 v0, v1 3322; GFX6-NEXT: s_endpgm 3323; 3324; GFX7-LABEL: local_system_one_as_acquire_ret_atomicrmw: 3325; GFX7: ; %bb.0: ; %entry 3326; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3327; GFX7-NEXT: s_mov_b32 m0, -1 3328; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3329; GFX7-NEXT: v_mov_b32_e32 v0, s0 3330; GFX7-NEXT: v_mov_b32_e32 v1, s1 3331; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3332; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3333; GFX7-NEXT: ds_write_b32 v0, v1 3334; GFX7-NEXT: s_endpgm 3335; 3336; GFX10-WGP-LABEL: local_system_one_as_acquire_ret_atomicrmw: 3337; GFX10-WGP: ; %bb.0: ; %entry 3338; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3339; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3340; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3341; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3342; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3343; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3344; GFX10-WGP-NEXT: ds_write_b32 v0, v1 3345; GFX10-WGP-NEXT: s_endpgm 3346; 3347; GFX10-CU-LABEL: local_system_one_as_acquire_ret_atomicrmw: 3348; GFX10-CU: ; %bb.0: ; %entry 3349; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3350; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3351; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3352; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3353; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3354; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3355; GFX10-CU-NEXT: ds_write_b32 v0, v1 3356; GFX10-CU-NEXT: s_endpgm 3357; 3358; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_ret_atomicrmw: 3359; SKIP-CACHE-INV: ; %bb.0: ; %entry 3360; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3361; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3362; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3363; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3364; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3365; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3366; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3367; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 3368; SKIP-CACHE-INV-NEXT: s_endpgm 3369 i32 addrspace(3)* %out, i32 %in) { 3370entry: 3371 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") acquire 3372 store i32 %val, i32 addrspace(3)* %out, align 4 3373 ret void 3374} 3375 3376define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw( 3377; GFX6-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: 3378; GFX6: ; %bb.0: ; %entry 3379; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3380; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 3381; GFX6-NEXT: s_mov_b32 m0, -1 3382; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3383; GFX6-NEXT: v_mov_b32_e32 v0, s2 3384; GFX6-NEXT: v_mov_b32_e32 v1, s0 3385; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3386; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3387; GFX6-NEXT: ds_write_b32 v0, v1 3388; GFX6-NEXT: s_endpgm 3389; 3390; GFX7-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: 3391; GFX7: ; %bb.0: ; %entry 3392; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3393; GFX7-NEXT: s_mov_b32 m0, -1 3394; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3395; GFX7-NEXT: v_mov_b32_e32 v0, s0 3396; GFX7-NEXT: v_mov_b32_e32 v1, s1 3397; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3398; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3399; GFX7-NEXT: ds_write_b32 v0, v1 3400; GFX7-NEXT: s_endpgm 3401; 3402; GFX10-WGP-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: 3403; GFX10-WGP: ; %bb.0: ; %entry 3404; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3405; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3406; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3407; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3408; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3409; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3410; GFX10-WGP-NEXT: ds_write_b32 v0, v1 3411; GFX10-WGP-NEXT: s_endpgm 3412; 3413; GFX10-CU-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: 3414; GFX10-CU: ; %bb.0: ; %entry 3415; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3416; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3417; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3418; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3419; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3420; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3421; GFX10-CU-NEXT: ds_write_b32 v0, v1 3422; GFX10-CU-NEXT: s_endpgm 3423; 3424; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: 3425; SKIP-CACHE-INV: ; %bb.0: ; %entry 3426; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3427; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3428; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3429; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3430; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3431; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3432; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3433; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 3434; SKIP-CACHE-INV-NEXT: s_endpgm 3435 i32 addrspace(3)* %out, i32 %in) { 3436entry: 3437 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") acq_rel 3438 store i32 %val, i32 addrspace(3)* %out, align 4 3439 ret void 3440} 3441 3442define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw( 3443; GFX6-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: 3444; GFX6: ; %bb.0: ; %entry 3445; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3446; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa 3447; GFX6-NEXT: s_mov_b32 m0, -1 3448; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3449; GFX6-NEXT: v_mov_b32_e32 v0, s2 3450; GFX6-NEXT: v_mov_b32_e32 v1, s0 3451; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3452; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3453; GFX6-NEXT: ds_write_b32 v0, v1 3454; GFX6-NEXT: s_endpgm 3455; 3456; GFX7-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: 3457; GFX7: ; %bb.0: ; %entry 3458; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3459; GFX7-NEXT: s_mov_b32 m0, -1 3460; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3461; GFX7-NEXT: v_mov_b32_e32 v0, s0 3462; GFX7-NEXT: v_mov_b32_e32 v1, s1 3463; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3464; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3465; GFX7-NEXT: ds_write_b32 v0, v1 3466; GFX7-NEXT: s_endpgm 3467; 3468; GFX10-WGP-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: 3469; GFX10-WGP: ; %bb.0: ; %entry 3470; GFX10-WGP-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3471; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3472; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3473; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 3474; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3475; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3476; GFX10-WGP-NEXT: ds_write_b32 v0, v1 3477; GFX10-WGP-NEXT: s_endpgm 3478; 3479; GFX10-CU-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: 3480; GFX10-CU: ; %bb.0: ; %entry 3481; GFX10-CU-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 3482; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3483; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3484; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 3485; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3486; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3487; GFX10-CU-NEXT: ds_write_b32 v0, v1 3488; GFX10-CU-NEXT: s_endpgm 3489; 3490; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: 3491; SKIP-CACHE-INV: ; %bb.0: ; %entry 3492; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 3493; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3494; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3495; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 3496; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 3497; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 3498; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3499; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 3500; SKIP-CACHE-INV-NEXT: s_endpgm 3501 i32 addrspace(3)* %out, i32 %in) { 3502entry: 3503 %val = atomicrmw volatile xchg i32 addrspace(3)* %out, i32 %in syncscope("one-as") seq_cst 3504 store i32 %val, i32 addrspace(3)* %out, align 4 3505 ret void 3506} 3507 3508define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg( 3509; GFX6-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: 3510; GFX6: ; %bb.0: ; %entry 3511; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3512; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 3513; GFX6-NEXT: s_mov_b32 m0, -1 3514; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3515; GFX6-NEXT: v_mov_b32_e32 v0, s2 3516; GFX6-NEXT: v_mov_b32_e32 v1, s1 3517; GFX6-NEXT: v_mov_b32_e32 v2, s0 3518; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3519; GFX6-NEXT: s_endpgm 3520; 3521; GFX7-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: 3522; GFX7: ; %bb.0: ; %entry 3523; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3524; GFX7-NEXT: s_mov_b32 m0, -1 3525; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3526; GFX7-NEXT: v_mov_b32_e32 v0, s0 3527; GFX7-NEXT: v_mov_b32_e32 v1, s2 3528; GFX7-NEXT: v_mov_b32_e32 v2, s1 3529; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3530; GFX7-NEXT: s_endpgm 3531; 3532; GFX10-WGP-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: 3533; GFX10-WGP: ; %bb.0: ; %entry 3534; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3535; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3536; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3537; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3538; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 3539; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3540; GFX10-WGP-NEXT: s_endpgm 3541; 3542; GFX10-CU-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: 3543; GFX10-CU: ; %bb.0: ; %entry 3544; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3545; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3546; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3547; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3548; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 3549; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3550; GFX10-CU-NEXT: s_endpgm 3551; 3552; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: 3553; SKIP-CACHE-INV: ; %bb.0: ; %entry 3554; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 3555; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3556; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3557; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3558; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3559; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3560; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3561; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3562; SKIP-CACHE-INV-NEXT: s_endpgm 3563 i32 addrspace(3)* %out, i32 %in, i32 %old) { 3564entry: 3565 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 3566 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic 3567 ret void 3568} 3569 3570define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg( 3571; GFX6-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: 3572; GFX6: ; %bb.0: ; %entry 3573; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3574; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 3575; GFX6-NEXT: s_mov_b32 m0, -1 3576; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3577; GFX6-NEXT: v_mov_b32_e32 v0, s2 3578; GFX6-NEXT: v_mov_b32_e32 v1, s1 3579; GFX6-NEXT: v_mov_b32_e32 v2, s0 3580; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3581; GFX6-NEXT: s_endpgm 3582; 3583; GFX7-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: 3584; GFX7: ; %bb.0: ; %entry 3585; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3586; GFX7-NEXT: s_mov_b32 m0, -1 3587; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3588; GFX7-NEXT: v_mov_b32_e32 v0, s0 3589; GFX7-NEXT: v_mov_b32_e32 v1, s2 3590; GFX7-NEXT: v_mov_b32_e32 v2, s1 3591; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3592; GFX7-NEXT: s_endpgm 3593; 3594; GFX10-WGP-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: 3595; GFX10-WGP: ; %bb.0: ; %entry 3596; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3597; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3598; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3599; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3600; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 3601; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3602; GFX10-WGP-NEXT: s_endpgm 3603; 3604; GFX10-CU-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: 3605; GFX10-CU: ; %bb.0: ; %entry 3606; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3607; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3608; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3609; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3610; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 3611; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3612; GFX10-CU-NEXT: s_endpgm 3613; 3614; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: 3615; SKIP-CACHE-INV: ; %bb.0: ; %entry 3616; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 3617; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3618; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3619; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3620; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3621; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3622; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3623; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3624; SKIP-CACHE-INV-NEXT: s_endpgm 3625 i32 addrspace(3)* %out, i32 %in, i32 %old) { 3626entry: 3627 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 3628 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 3629 ret void 3630} 3631 3632define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg( 3633; GFX6-LABEL: local_system_one_as_release_monotonic_cmpxchg: 3634; GFX6: ; %bb.0: ; %entry 3635; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3636; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 3637; GFX6-NEXT: s_mov_b32 m0, -1 3638; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3639; GFX6-NEXT: v_mov_b32_e32 v0, s2 3640; GFX6-NEXT: v_mov_b32_e32 v1, s1 3641; GFX6-NEXT: v_mov_b32_e32 v2, s0 3642; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3643; GFX6-NEXT: s_endpgm 3644; 3645; GFX7-LABEL: local_system_one_as_release_monotonic_cmpxchg: 3646; GFX7: ; %bb.0: ; %entry 3647; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3648; GFX7-NEXT: s_mov_b32 m0, -1 3649; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3650; GFX7-NEXT: v_mov_b32_e32 v0, s0 3651; GFX7-NEXT: v_mov_b32_e32 v1, s2 3652; GFX7-NEXT: v_mov_b32_e32 v2, s1 3653; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3654; GFX7-NEXT: s_endpgm 3655; 3656; GFX10-WGP-LABEL: local_system_one_as_release_monotonic_cmpxchg: 3657; GFX10-WGP: ; %bb.0: ; %entry 3658; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3659; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3660; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3661; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3662; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 3663; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3664; GFX10-WGP-NEXT: s_endpgm 3665; 3666; GFX10-CU-LABEL: local_system_one_as_release_monotonic_cmpxchg: 3667; GFX10-CU: ; %bb.0: ; %entry 3668; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3669; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3670; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3671; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3672; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 3673; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3674; GFX10-CU-NEXT: s_endpgm 3675; 3676; SKIP-CACHE-INV-LABEL: local_system_one_as_release_monotonic_cmpxchg: 3677; SKIP-CACHE-INV: ; %bb.0: ; %entry 3678; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 3679; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3680; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3681; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3682; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3683; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3684; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3685; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3686; SKIP-CACHE-INV-NEXT: s_endpgm 3687 i32 addrspace(3)* %out, i32 %in, i32 %old) { 3688entry: 3689 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 3690 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic 3691 ret void 3692} 3693 3694define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg( 3695; GFX6-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: 3696; GFX6: ; %bb.0: ; %entry 3697; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3698; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 3699; GFX6-NEXT: s_mov_b32 m0, -1 3700; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3701; GFX6-NEXT: v_mov_b32_e32 v0, s2 3702; GFX6-NEXT: v_mov_b32_e32 v1, s1 3703; GFX6-NEXT: v_mov_b32_e32 v2, s0 3704; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3705; GFX6-NEXT: s_endpgm 3706; 3707; GFX7-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: 3708; GFX7: ; %bb.0: ; %entry 3709; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3710; GFX7-NEXT: s_mov_b32 m0, -1 3711; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3712; GFX7-NEXT: v_mov_b32_e32 v0, s0 3713; GFX7-NEXT: v_mov_b32_e32 v1, s2 3714; GFX7-NEXT: v_mov_b32_e32 v2, s1 3715; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3716; GFX7-NEXT: s_endpgm 3717; 3718; GFX10-WGP-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: 3719; GFX10-WGP: ; %bb.0: ; %entry 3720; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3721; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3722; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3723; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3724; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 3725; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3726; GFX10-WGP-NEXT: s_endpgm 3727; 3728; GFX10-CU-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: 3729; GFX10-CU: ; %bb.0: ; %entry 3730; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3731; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3732; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3733; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3734; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 3735; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3736; GFX10-CU-NEXT: s_endpgm 3737; 3738; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: 3739; SKIP-CACHE-INV: ; %bb.0: ; %entry 3740; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 3741; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3742; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3743; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3744; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3745; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3746; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3747; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3748; SKIP-CACHE-INV-NEXT: s_endpgm 3749 i32 addrspace(3)* %out, i32 %in, i32 %old) { 3750entry: 3751 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 3752 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 3753 ret void 3754} 3755 3756define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg( 3757; GFX6-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: 3758; GFX6: ; %bb.0: ; %entry 3759; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3760; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 3761; GFX6-NEXT: s_mov_b32 m0, -1 3762; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3763; GFX6-NEXT: v_mov_b32_e32 v0, s2 3764; GFX6-NEXT: v_mov_b32_e32 v1, s1 3765; GFX6-NEXT: v_mov_b32_e32 v2, s0 3766; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3767; GFX6-NEXT: s_endpgm 3768; 3769; GFX7-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: 3770; GFX7: ; %bb.0: ; %entry 3771; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3772; GFX7-NEXT: s_mov_b32 m0, -1 3773; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3774; GFX7-NEXT: v_mov_b32_e32 v0, s0 3775; GFX7-NEXT: v_mov_b32_e32 v1, s2 3776; GFX7-NEXT: v_mov_b32_e32 v2, s1 3777; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3778; GFX7-NEXT: s_endpgm 3779; 3780; GFX10-WGP-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: 3781; GFX10-WGP: ; %bb.0: ; %entry 3782; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3783; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3784; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3785; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3786; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 3787; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3788; GFX10-WGP-NEXT: s_endpgm 3789; 3790; GFX10-CU-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: 3791; GFX10-CU: ; %bb.0: ; %entry 3792; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3793; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3794; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3795; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3796; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 3797; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3798; GFX10-CU-NEXT: s_endpgm 3799; 3800; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: 3801; SKIP-CACHE-INV: ; %bb.0: ; %entry 3802; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 3803; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3804; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3805; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3806; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3807; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3808; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3809; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3810; SKIP-CACHE-INV-NEXT: s_endpgm 3811 i32 addrspace(3)* %out, i32 %in, i32 %old) { 3812entry: 3813 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 3814 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 3815 ret void 3816} 3817 3818define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg( 3819; GFX6-LABEL: local_system_one_as_acquire_acquire_cmpxchg: 3820; GFX6: ; %bb.0: ; %entry 3821; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3822; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 3823; GFX6-NEXT: s_mov_b32 m0, -1 3824; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3825; GFX6-NEXT: v_mov_b32_e32 v0, s2 3826; GFX6-NEXT: v_mov_b32_e32 v1, s1 3827; GFX6-NEXT: v_mov_b32_e32 v2, s0 3828; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3829; GFX6-NEXT: s_endpgm 3830; 3831; GFX7-LABEL: local_system_one_as_acquire_acquire_cmpxchg: 3832; GFX7: ; %bb.0: ; %entry 3833; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3834; GFX7-NEXT: s_mov_b32 m0, -1 3835; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3836; GFX7-NEXT: v_mov_b32_e32 v0, s0 3837; GFX7-NEXT: v_mov_b32_e32 v1, s2 3838; GFX7-NEXT: v_mov_b32_e32 v2, s1 3839; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3840; GFX7-NEXT: s_endpgm 3841; 3842; GFX10-WGP-LABEL: local_system_one_as_acquire_acquire_cmpxchg: 3843; GFX10-WGP: ; %bb.0: ; %entry 3844; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3845; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3846; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3847; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3848; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 3849; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3850; GFX10-WGP-NEXT: s_endpgm 3851; 3852; GFX10-CU-LABEL: local_system_one_as_acquire_acquire_cmpxchg: 3853; GFX10-CU: ; %bb.0: ; %entry 3854; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3855; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3856; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3857; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3858; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 3859; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3860; GFX10-CU-NEXT: s_endpgm 3861; 3862; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_acquire_cmpxchg: 3863; SKIP-CACHE-INV: ; %bb.0: ; %entry 3864; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 3865; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3866; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3867; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3868; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3869; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3870; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3871; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3872; SKIP-CACHE-INV-NEXT: s_endpgm 3873 i32 addrspace(3)* %out, i32 %in, i32 %old) { 3874entry: 3875 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 3876 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 3877 ret void 3878} 3879 3880define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg( 3881; GFX6-LABEL: local_system_one_as_release_acquire_cmpxchg: 3882; GFX6: ; %bb.0: ; %entry 3883; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3884; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 3885; GFX6-NEXT: s_mov_b32 m0, -1 3886; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3887; GFX6-NEXT: v_mov_b32_e32 v0, s2 3888; GFX6-NEXT: v_mov_b32_e32 v1, s1 3889; GFX6-NEXT: v_mov_b32_e32 v2, s0 3890; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3891; GFX6-NEXT: s_endpgm 3892; 3893; GFX7-LABEL: local_system_one_as_release_acquire_cmpxchg: 3894; GFX7: ; %bb.0: ; %entry 3895; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3896; GFX7-NEXT: s_mov_b32 m0, -1 3897; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3898; GFX7-NEXT: v_mov_b32_e32 v0, s0 3899; GFX7-NEXT: v_mov_b32_e32 v1, s2 3900; GFX7-NEXT: v_mov_b32_e32 v2, s1 3901; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3902; GFX7-NEXT: s_endpgm 3903; 3904; GFX10-WGP-LABEL: local_system_one_as_release_acquire_cmpxchg: 3905; GFX10-WGP: ; %bb.0: ; %entry 3906; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3907; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3908; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3909; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3910; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 3911; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3912; GFX10-WGP-NEXT: s_endpgm 3913; 3914; GFX10-CU-LABEL: local_system_one_as_release_acquire_cmpxchg: 3915; GFX10-CU: ; %bb.0: ; %entry 3916; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3917; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3918; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3919; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3920; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 3921; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3922; GFX10-CU-NEXT: s_endpgm 3923; 3924; SKIP-CACHE-INV-LABEL: local_system_one_as_release_acquire_cmpxchg: 3925; SKIP-CACHE-INV: ; %bb.0: ; %entry 3926; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 3927; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3928; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3929; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3930; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3931; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3932; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3933; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3934; SKIP-CACHE-INV-NEXT: s_endpgm 3935 i32 addrspace(3)* %out, i32 %in, i32 %old) { 3936entry: 3937 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 3938 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire 3939 ret void 3940} 3941 3942define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg( 3943; GFX6-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: 3944; GFX6: ; %bb.0: ; %entry 3945; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 3946; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 3947; GFX6-NEXT: s_mov_b32 m0, -1 3948; GFX6-NEXT: s_waitcnt lgkmcnt(0) 3949; GFX6-NEXT: v_mov_b32_e32 v0, s2 3950; GFX6-NEXT: v_mov_b32_e32 v1, s1 3951; GFX6-NEXT: v_mov_b32_e32 v2, s0 3952; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3953; GFX6-NEXT: s_endpgm 3954; 3955; GFX7-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: 3956; GFX7: ; %bb.0: ; %entry 3957; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3958; GFX7-NEXT: s_mov_b32 m0, -1 3959; GFX7-NEXT: s_waitcnt lgkmcnt(0) 3960; GFX7-NEXT: v_mov_b32_e32 v0, s0 3961; GFX7-NEXT: v_mov_b32_e32 v1, s2 3962; GFX7-NEXT: v_mov_b32_e32 v2, s1 3963; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3964; GFX7-NEXT: s_endpgm 3965; 3966; GFX10-WGP-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: 3967; GFX10-WGP: ; %bb.0: ; %entry 3968; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3969; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 3970; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 3971; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 3972; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 3973; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3974; GFX10-WGP-NEXT: s_endpgm 3975; 3976; GFX10-CU-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: 3977; GFX10-CU: ; %bb.0: ; %entry 3978; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3979; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 3980; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 3981; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 3982; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 3983; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3984; GFX10-CU-NEXT: s_endpgm 3985; 3986; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: 3987; SKIP-CACHE-INV: ; %bb.0: ; %entry 3988; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 3989; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 3990; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 3991; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 3992; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 3993; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 3994; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 3995; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 3996; SKIP-CACHE-INV-NEXT: s_endpgm 3997 i32 addrspace(3)* %out, i32 %in, i32 %old) { 3998entry: 3999 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4000 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 4001 ret void 4002} 4003 4004define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg( 4005; GFX6-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: 4006; GFX6: ; %bb.0: ; %entry 4007; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4008; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4009; GFX6-NEXT: s_mov_b32 m0, -1 4010; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4011; GFX6-NEXT: v_mov_b32_e32 v0, s2 4012; GFX6-NEXT: v_mov_b32_e32 v1, s1 4013; GFX6-NEXT: v_mov_b32_e32 v2, s0 4014; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4015; GFX6-NEXT: s_endpgm 4016; 4017; GFX7-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: 4018; GFX7: ; %bb.0: ; %entry 4019; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4020; GFX7-NEXT: s_mov_b32 m0, -1 4021; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4022; GFX7-NEXT: v_mov_b32_e32 v0, s0 4023; GFX7-NEXT: v_mov_b32_e32 v1, s2 4024; GFX7-NEXT: v_mov_b32_e32 v2, s1 4025; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4026; GFX7-NEXT: s_endpgm 4027; 4028; GFX10-WGP-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: 4029; GFX10-WGP: ; %bb.0: ; %entry 4030; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4031; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4032; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4033; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4034; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4035; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4036; GFX10-WGP-NEXT: s_endpgm 4037; 4038; GFX10-CU-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: 4039; GFX10-CU: ; %bb.0: ; %entry 4040; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4041; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4042; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4043; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4044; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4045; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4046; GFX10-CU-NEXT: s_endpgm 4047; 4048; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: 4049; SKIP-CACHE-INV: ; %bb.0: ; %entry 4050; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4051; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4052; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4053; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4054; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4055; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4056; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4057; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4058; SKIP-CACHE-INV-NEXT: s_endpgm 4059 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4060entry: 4061 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4062 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 4063 ret void 4064} 4065 4066define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg( 4067; GFX6-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: 4068; GFX6: ; %bb.0: ; %entry 4069; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4070; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4071; GFX6-NEXT: s_mov_b32 m0, -1 4072; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4073; GFX6-NEXT: v_mov_b32_e32 v0, s2 4074; GFX6-NEXT: v_mov_b32_e32 v1, s1 4075; GFX6-NEXT: v_mov_b32_e32 v2, s0 4076; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4077; GFX6-NEXT: s_endpgm 4078; 4079; GFX7-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: 4080; GFX7: ; %bb.0: ; %entry 4081; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4082; GFX7-NEXT: s_mov_b32 m0, -1 4083; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4084; GFX7-NEXT: v_mov_b32_e32 v0, s0 4085; GFX7-NEXT: v_mov_b32_e32 v1, s2 4086; GFX7-NEXT: v_mov_b32_e32 v2, s1 4087; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4088; GFX7-NEXT: s_endpgm 4089; 4090; GFX10-WGP-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: 4091; GFX10-WGP: ; %bb.0: ; %entry 4092; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4093; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4094; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4095; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4096; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4097; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4098; GFX10-WGP-NEXT: s_endpgm 4099; 4100; GFX10-CU-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: 4101; GFX10-CU: ; %bb.0: ; %entry 4102; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4103; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4104; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4105; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4106; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4107; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4108; GFX10-CU-NEXT: s_endpgm 4109; 4110; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: 4111; SKIP-CACHE-INV: ; %bb.0: ; %entry 4112; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4113; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4114; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4115; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4116; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4117; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4118; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4119; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 4120; SKIP-CACHE-INV-NEXT: s_endpgm 4121 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4122entry: 4123 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4124 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 4125 ret void 4126} 4127 4128define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg( 4129; GFX6-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: 4130; GFX6: ; %bb.0: ; %entry 4131; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4132; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4133; GFX6-NEXT: s_mov_b32 m0, -1 4134; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4135; GFX6-NEXT: v_mov_b32_e32 v0, s2 4136; GFX6-NEXT: v_mov_b32_e32 v1, s1 4137; GFX6-NEXT: v_mov_b32_e32 v2, s0 4138; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4139; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4140; GFX6-NEXT: ds_write_b32 v0, v1 4141; GFX6-NEXT: s_endpgm 4142; 4143; GFX7-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: 4144; GFX7: ; %bb.0: ; %entry 4145; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4146; GFX7-NEXT: s_mov_b32 m0, -1 4147; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4148; GFX7-NEXT: v_mov_b32_e32 v0, s0 4149; GFX7-NEXT: v_mov_b32_e32 v1, s2 4150; GFX7-NEXT: v_mov_b32_e32 v2, s1 4151; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4152; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4153; GFX7-NEXT: ds_write_b32 v0, v1 4154; GFX7-NEXT: s_endpgm 4155; 4156; GFX10-WGP-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: 4157; GFX10-WGP: ; %bb.0: ; %entry 4158; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4159; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4160; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4161; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4162; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4163; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4164; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4165; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4166; GFX10-WGP-NEXT: s_endpgm 4167; 4168; GFX10-CU-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: 4169; GFX10-CU: ; %bb.0: ; %entry 4170; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4171; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4172; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4173; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4174; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4175; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4176; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4177; GFX10-CU-NEXT: ds_write_b32 v0, v1 4178; GFX10-CU-NEXT: s_endpgm 4179; 4180; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: 4181; SKIP-CACHE-INV: ; %bb.0: ; %entry 4182; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4183; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4184; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4185; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4186; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4187; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4188; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4189; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4190; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4191; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4192; SKIP-CACHE-INV-NEXT: s_endpgm 4193 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4194entry: 4195 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4196 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic 4197 %val0 = extractvalue { i32, i1 } %val, 0 4198 store i32 %val0, i32 addrspace(3)* %out, align 4 4199 ret void 4200} 4201 4202define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg( 4203; GFX6-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4204; GFX6: ; %bb.0: ; %entry 4205; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4206; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4207; GFX6-NEXT: s_mov_b32 m0, -1 4208; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4209; GFX6-NEXT: v_mov_b32_e32 v0, s2 4210; GFX6-NEXT: v_mov_b32_e32 v1, s1 4211; GFX6-NEXT: v_mov_b32_e32 v2, s0 4212; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4213; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4214; GFX6-NEXT: ds_write_b32 v0, v1 4215; GFX6-NEXT: s_endpgm 4216; 4217; GFX7-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4218; GFX7: ; %bb.0: ; %entry 4219; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4220; GFX7-NEXT: s_mov_b32 m0, -1 4221; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4222; GFX7-NEXT: v_mov_b32_e32 v0, s0 4223; GFX7-NEXT: v_mov_b32_e32 v1, s2 4224; GFX7-NEXT: v_mov_b32_e32 v2, s1 4225; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4226; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4227; GFX7-NEXT: ds_write_b32 v0, v1 4228; GFX7-NEXT: s_endpgm 4229; 4230; GFX10-WGP-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4231; GFX10-WGP: ; %bb.0: ; %entry 4232; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4233; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4234; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4235; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4236; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4237; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4238; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4239; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4240; GFX10-WGP-NEXT: s_endpgm 4241; 4242; GFX10-CU-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4243; GFX10-CU: ; %bb.0: ; %entry 4244; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4245; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4246; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4247; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4248; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4249; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4250; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4251; GFX10-CU-NEXT: ds_write_b32 v0, v1 4252; GFX10-CU-NEXT: s_endpgm 4253; 4254; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: 4255; SKIP-CACHE-INV: ; %bb.0: ; %entry 4256; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4257; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4258; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4259; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4260; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4261; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4262; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4263; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4264; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4265; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4266; SKIP-CACHE-INV-NEXT: s_endpgm 4267 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4268entry: 4269 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4270 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic 4271 %val0 = extractvalue { i32, i1 } %val, 0 4272 store i32 %val0, i32 addrspace(3)* %out, align 4 4273 ret void 4274} 4275 4276define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg( 4277; GFX6-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: 4278; GFX6: ; %bb.0: ; %entry 4279; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4280; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4281; GFX6-NEXT: s_mov_b32 m0, -1 4282; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4283; GFX6-NEXT: v_mov_b32_e32 v0, s2 4284; GFX6-NEXT: v_mov_b32_e32 v1, s1 4285; GFX6-NEXT: v_mov_b32_e32 v2, s0 4286; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4287; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4288; GFX6-NEXT: ds_write_b32 v0, v1 4289; GFX6-NEXT: s_endpgm 4290; 4291; GFX7-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: 4292; GFX7: ; %bb.0: ; %entry 4293; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4294; GFX7-NEXT: s_mov_b32 m0, -1 4295; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4296; GFX7-NEXT: v_mov_b32_e32 v0, s0 4297; GFX7-NEXT: v_mov_b32_e32 v1, s2 4298; GFX7-NEXT: v_mov_b32_e32 v2, s1 4299; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4300; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4301; GFX7-NEXT: ds_write_b32 v0, v1 4302; GFX7-NEXT: s_endpgm 4303; 4304; GFX10-WGP-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: 4305; GFX10-WGP: ; %bb.0: ; %entry 4306; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4307; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4308; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4309; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4310; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4311; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4312; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4313; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4314; GFX10-WGP-NEXT: s_endpgm 4315; 4316; GFX10-CU-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: 4317; GFX10-CU: ; %bb.0: ; %entry 4318; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4319; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4320; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4321; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4322; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4323; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4324; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4325; GFX10-CU-NEXT: ds_write_b32 v0, v1 4326; GFX10-CU-NEXT: s_endpgm 4327; 4328; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: 4329; SKIP-CACHE-INV: ; %bb.0: ; %entry 4330; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4331; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4332; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4333; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4334; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4335; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4336; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4337; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4338; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4339; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4340; SKIP-CACHE-INV-NEXT: s_endpgm 4341 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4342entry: 4343 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4344 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic 4345 %val0 = extractvalue { i32, i1 } %val, 0 4346 store i32 %val0, i32 addrspace(3)* %out, align 4 4347 ret void 4348} 4349 4350define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg( 4351; GFX6-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: 4352; GFX6: ; %bb.0: ; %entry 4353; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4354; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4355; GFX6-NEXT: s_mov_b32 m0, -1 4356; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4357; GFX6-NEXT: v_mov_b32_e32 v0, s2 4358; GFX6-NEXT: v_mov_b32_e32 v1, s1 4359; GFX6-NEXT: v_mov_b32_e32 v2, s0 4360; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4361; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4362; GFX6-NEXT: ds_write_b32 v0, v1 4363; GFX6-NEXT: s_endpgm 4364; 4365; GFX7-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: 4366; GFX7: ; %bb.0: ; %entry 4367; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4368; GFX7-NEXT: s_mov_b32 m0, -1 4369; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4370; GFX7-NEXT: v_mov_b32_e32 v0, s0 4371; GFX7-NEXT: v_mov_b32_e32 v1, s2 4372; GFX7-NEXT: v_mov_b32_e32 v2, s1 4373; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4374; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4375; GFX7-NEXT: ds_write_b32 v0, v1 4376; GFX7-NEXT: s_endpgm 4377; 4378; GFX10-WGP-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: 4379; GFX10-WGP: ; %bb.0: ; %entry 4380; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4381; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4382; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4383; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4384; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4385; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4386; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4387; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4388; GFX10-WGP-NEXT: s_endpgm 4389; 4390; GFX10-CU-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: 4391; GFX10-CU: ; %bb.0: ; %entry 4392; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4393; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4394; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4395; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4396; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4397; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4398; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4399; GFX10-CU-NEXT: ds_write_b32 v0, v1 4400; GFX10-CU-NEXT: s_endpgm 4401; 4402; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: 4403; SKIP-CACHE-INV: ; %bb.0: ; %entry 4404; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4405; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4406; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4407; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4408; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4409; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4410; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4411; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4412; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4413; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4414; SKIP-CACHE-INV-NEXT: s_endpgm 4415 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4416entry: 4417 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4418 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire 4419 %val0 = extractvalue { i32, i1 } %val, 0 4420 store i32 %val0, i32 addrspace(3)* %out, align 4 4421 ret void 4422} 4423 4424define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg( 4425; GFX6-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: 4426; GFX6: ; %bb.0: ; %entry 4427; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4428; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4429; GFX6-NEXT: s_mov_b32 m0, -1 4430; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4431; GFX6-NEXT: v_mov_b32_e32 v0, s2 4432; GFX6-NEXT: v_mov_b32_e32 v1, s1 4433; GFX6-NEXT: v_mov_b32_e32 v2, s0 4434; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4435; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4436; GFX6-NEXT: ds_write_b32 v0, v1 4437; GFX6-NEXT: s_endpgm 4438; 4439; GFX7-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: 4440; GFX7: ; %bb.0: ; %entry 4441; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4442; GFX7-NEXT: s_mov_b32 m0, -1 4443; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4444; GFX7-NEXT: v_mov_b32_e32 v0, s0 4445; GFX7-NEXT: v_mov_b32_e32 v1, s2 4446; GFX7-NEXT: v_mov_b32_e32 v2, s1 4447; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4448; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4449; GFX7-NEXT: ds_write_b32 v0, v1 4450; GFX7-NEXT: s_endpgm 4451; 4452; GFX10-WGP-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: 4453; GFX10-WGP: ; %bb.0: ; %entry 4454; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4455; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4456; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4457; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4458; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4459; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4460; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4461; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4462; GFX10-WGP-NEXT: s_endpgm 4463; 4464; GFX10-CU-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: 4465; GFX10-CU: ; %bb.0: ; %entry 4466; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4467; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4468; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4469; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4470; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4471; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4472; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4473; GFX10-CU-NEXT: ds_write_b32 v0, v1 4474; GFX10-CU-NEXT: s_endpgm 4475; 4476; SKIP-CACHE-INV-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: 4477; SKIP-CACHE-INV: ; %bb.0: ; %entry 4478; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4479; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4480; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4481; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4482; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4483; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4484; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4485; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4486; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4487; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4488; SKIP-CACHE-INV-NEXT: s_endpgm 4489 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4490entry: 4491 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4492 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") release acquire 4493 %val0 = extractvalue { i32, i1 } %val, 0 4494 store i32 %val0, i32 addrspace(3)* %out, align 4 4495 ret void 4496} 4497 4498define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg( 4499; GFX6-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: 4500; GFX6: ; %bb.0: ; %entry 4501; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4502; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4503; GFX6-NEXT: s_mov_b32 m0, -1 4504; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4505; GFX6-NEXT: v_mov_b32_e32 v0, s2 4506; GFX6-NEXT: v_mov_b32_e32 v1, s1 4507; GFX6-NEXT: v_mov_b32_e32 v2, s0 4508; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4509; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4510; GFX6-NEXT: ds_write_b32 v0, v1 4511; GFX6-NEXT: s_endpgm 4512; 4513; GFX7-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: 4514; GFX7: ; %bb.0: ; %entry 4515; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4516; GFX7-NEXT: s_mov_b32 m0, -1 4517; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4518; GFX7-NEXT: v_mov_b32_e32 v0, s0 4519; GFX7-NEXT: v_mov_b32_e32 v1, s2 4520; GFX7-NEXT: v_mov_b32_e32 v2, s1 4521; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4522; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4523; GFX7-NEXT: ds_write_b32 v0, v1 4524; GFX7-NEXT: s_endpgm 4525; 4526; GFX10-WGP-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: 4527; GFX10-WGP: ; %bb.0: ; %entry 4528; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4529; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4530; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4531; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4532; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4533; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4534; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4535; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4536; GFX10-WGP-NEXT: s_endpgm 4537; 4538; GFX10-CU-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: 4539; GFX10-CU: ; %bb.0: ; %entry 4540; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4541; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4542; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4543; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4544; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4545; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4546; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4547; GFX10-CU-NEXT: ds_write_b32 v0, v1 4548; GFX10-CU-NEXT: s_endpgm 4549; 4550; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: 4551; SKIP-CACHE-INV: ; %bb.0: ; %entry 4552; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4553; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4554; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4555; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4556; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4557; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4558; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4559; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4560; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4561; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4562; SKIP-CACHE-INV-NEXT: s_endpgm 4563 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4564entry: 4565 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4566 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire 4567 %val0 = extractvalue { i32, i1 } %val, 0 4568 store i32 %val0, i32 addrspace(3)* %out, align 4 4569 ret void 4570} 4571 4572define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg( 4573; GFX6-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: 4574; GFX6: ; %bb.0: ; %entry 4575; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4576; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4577; GFX6-NEXT: s_mov_b32 m0, -1 4578; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4579; GFX6-NEXT: v_mov_b32_e32 v0, s2 4580; GFX6-NEXT: v_mov_b32_e32 v1, s1 4581; GFX6-NEXT: v_mov_b32_e32 v2, s0 4582; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4583; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4584; GFX6-NEXT: ds_write_b32 v0, v1 4585; GFX6-NEXT: s_endpgm 4586; 4587; GFX7-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: 4588; GFX7: ; %bb.0: ; %entry 4589; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4590; GFX7-NEXT: s_mov_b32 m0, -1 4591; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4592; GFX7-NEXT: v_mov_b32_e32 v0, s0 4593; GFX7-NEXT: v_mov_b32_e32 v1, s2 4594; GFX7-NEXT: v_mov_b32_e32 v2, s1 4595; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4596; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4597; GFX7-NEXT: ds_write_b32 v0, v1 4598; GFX7-NEXT: s_endpgm 4599; 4600; GFX10-WGP-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: 4601; GFX10-WGP: ; %bb.0: ; %entry 4602; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4603; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4604; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4605; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4606; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4607; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4608; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4609; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4610; GFX10-WGP-NEXT: s_endpgm 4611; 4612; GFX10-CU-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: 4613; GFX10-CU: ; %bb.0: ; %entry 4614; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4615; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4616; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4617; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4618; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4619; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4620; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4621; GFX10-CU-NEXT: ds_write_b32 v0, v1 4622; GFX10-CU-NEXT: s_endpgm 4623; 4624; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: 4625; SKIP-CACHE-INV: ; %bb.0: ; %entry 4626; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4627; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4628; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4629; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4630; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4631; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4632; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4633; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4634; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4635; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4636; SKIP-CACHE-INV-NEXT: s_endpgm 4637 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4638entry: 4639 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4640 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire 4641 %val0 = extractvalue { i32, i1 } %val, 0 4642 store i32 %val0, i32 addrspace(3)* %out, align 4 4643 ret void 4644} 4645 4646define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg( 4647; GFX6-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 4648; GFX6: ; %bb.0: ; %entry 4649; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 4650; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa 4651; GFX6-NEXT: s_mov_b32 m0, -1 4652; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4653; GFX6-NEXT: v_mov_b32_e32 v0, s2 4654; GFX6-NEXT: v_mov_b32_e32 v1, s1 4655; GFX6-NEXT: v_mov_b32_e32 v2, s0 4656; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4657; GFX6-NEXT: s_waitcnt lgkmcnt(0) 4658; GFX6-NEXT: ds_write_b32 v0, v1 4659; GFX6-NEXT: s_endpgm 4660; 4661; GFX7-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 4662; GFX7: ; %bb.0: ; %entry 4663; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4664; GFX7-NEXT: s_mov_b32 m0, -1 4665; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4666; GFX7-NEXT: v_mov_b32_e32 v0, s0 4667; GFX7-NEXT: v_mov_b32_e32 v1, s2 4668; GFX7-NEXT: v_mov_b32_e32 v2, s1 4669; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4670; GFX7-NEXT: s_waitcnt lgkmcnt(0) 4671; GFX7-NEXT: ds_write_b32 v0, v1 4672; GFX7-NEXT: s_endpgm 4673; 4674; GFX10-WGP-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 4675; GFX10-WGP: ; %bb.0: ; %entry 4676; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4677; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4678; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 4679; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 4680; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 4681; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4682; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) 4683; GFX10-WGP-NEXT: ds_write_b32 v0, v1 4684; GFX10-WGP-NEXT: s_endpgm 4685; 4686; GFX10-CU-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 4687; GFX10-CU: ; %bb.0: ; %entry 4688; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4689; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4690; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 4691; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 4692; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 4693; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4694; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) 4695; GFX10-CU-NEXT: ds_write_b32 v0, v1 4696; GFX10-CU-NEXT: s_endpgm 4697; 4698; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: 4699; SKIP-CACHE-INV: ; %bb.0: ; %entry 4700; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 4701; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb 4702; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 4703; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4704; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 4705; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 4706; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 4707; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 4708; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) 4709; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 4710; SKIP-CACHE-INV-NEXT: s_endpgm 4711 i32 addrspace(3)* %out, i32 %in, i32 %old) { 4712entry: 4713 %gep = getelementptr i32, i32 addrspace(3)* %out, i32 4 4714 %val = cmpxchg volatile i32 addrspace(3)* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst 4715 %val0 = extractvalue { i32, i1 } %val, 0 4716 store i32 %val0, i32 addrspace(3)* %out, align 4 4717 ret void 4718} 4719 4720