1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s 4 5; Test end to end matching of addressing modes when MUBUF is used for 6; global memory. 7 8define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) { 9; GFX6-LABEL: mubuf_store_sgpr_ptr: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_mov_b32 s0, s2 12; GFX6-NEXT: s_mov_b32 s1, s3 13; GFX6-NEXT: v_mov_b32_e32 v0, 0 14; GFX6-NEXT: s_mov_b32 s2, -1 15; GFX6-NEXT: s_mov_b32 s3, 0xf000 16; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 17; GFX6-NEXT: s_endpgm 18; 19; GFX7-LABEL: mubuf_store_sgpr_ptr: 20; GFX7: ; %bb.0: 21; GFX7-NEXT: s_mov_b32 s0, s2 22; GFX7-NEXT: s_mov_b32 s1, s3 23; GFX7-NEXT: v_mov_b32_e32 v0, 0 24; GFX7-NEXT: s_mov_b32 s2, -1 25; GFX7-NEXT: s_mov_b32 s3, 0xf000 26; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 27; GFX7-NEXT: s_endpgm 28 store i32 0, i32 addrspace(1)* %ptr 29 ret void 30} 31 32define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) { 33; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095: 34; GFX6: ; %bb.0: 35; GFX6-NEXT: s_mov_b32 s0, s2 36; GFX6-NEXT: s_mov_b32 s1, s3 37; GFX6-NEXT: v_mov_b32_e32 v0, 0 38; GFX6-NEXT: s_mov_b32 s2, -1 39; GFX6-NEXT: s_mov_b32 s3, 0xf000 40; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 41; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], s4 42; GFX6-NEXT: s_endpgm 43; 44; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095: 45; GFX7: ; %bb.0: 46; GFX7-NEXT: s_mov_b32 s0, s2 47; GFX7-NEXT: s_mov_b32 s1, s3 48; GFX7-NEXT: v_mov_b32_e32 v0, 0 49; GFX7-NEXT: s_mov_b32 s2, -1 50; GFX7-NEXT: s_mov_b32 s3, 0xf000 51; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 52; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4 53; GFX7-NEXT: s_endpgm 54 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 55 store i32 0, i32 addrspace(1)* %gep 56 ret void 57} 58 59define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) { 60; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296: 61; GFX6: ; %bb.0: 62; GFX6-NEXT: s_mov_b32 s4, 0 63; GFX6-NEXT: s_mov_b32 s5, 4 64; GFX6-NEXT: v_mov_b32_e32 v0, s4 65; GFX6-NEXT: s_mov_b32 s0, s2 66; GFX6-NEXT: s_mov_b32 s1, s3 67; GFX6-NEXT: v_mov_b32_e32 v2, 0 68; GFX6-NEXT: s_mov_b32 s3, 0xf000 69; GFX6-NEXT: s_mov_b32 s2, s4 70; GFX6-NEXT: v_mov_b32_e32 v1, s5 71; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 72; GFX6-NEXT: s_endpgm 73; 74; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296: 75; GFX7: ; %bb.0: 76; GFX7-NEXT: s_mov_b32 s4, 0 77; GFX7-NEXT: s_mov_b32 s5, 4 78; GFX7-NEXT: v_mov_b32_e32 v0, s4 79; GFX7-NEXT: s_mov_b32 s0, s2 80; GFX7-NEXT: s_mov_b32 s1, s3 81; GFX7-NEXT: v_mov_b32_e32 v2, 0 82; GFX7-NEXT: s_mov_b32 s3, 0xf000 83; GFX7-NEXT: s_mov_b32 s2, s4 84; GFX7-NEXT: v_mov_b32_e32 v1, s5 85; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 86; GFX7-NEXT: s_endpgm 87 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 88 store i32 0, i32 addrspace(1)* %gep 89 ret void 90} 91 92define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) { 93; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297: 94; GFX6: ; %bb.0: 95; GFX6-NEXT: s_mov_b32 s4, 4 96; GFX6-NEXT: s_mov_b32 s5, s4 97; GFX6-NEXT: v_mov_b32_e32 v0, s4 98; GFX6-NEXT: s_mov_b32 s0, s2 99; GFX6-NEXT: s_mov_b32 s1, s3 100; GFX6-NEXT: s_mov_b32 s2, 0 101; GFX6-NEXT: v_mov_b32_e32 v2, 0 102; GFX6-NEXT: s_mov_b32 s3, 0xf000 103; GFX6-NEXT: v_mov_b32_e32 v1, s5 104; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 105; GFX6-NEXT: s_endpgm 106; 107; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297: 108; GFX7: ; %bb.0: 109; GFX7-NEXT: s_mov_b32 s4, 4 110; GFX7-NEXT: s_mov_b32 s5, s4 111; GFX7-NEXT: v_mov_b32_e32 v0, s4 112; GFX7-NEXT: s_mov_b32 s0, s2 113; GFX7-NEXT: s_mov_b32 s1, s3 114; GFX7-NEXT: s_mov_b32 s2, 0 115; GFX7-NEXT: v_mov_b32_e32 v2, 0 116; GFX7-NEXT: s_mov_b32 s3, 0xf000 117; GFX7-NEXT: v_mov_b32_e32 v1, s5 118; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 119; GFX7-NEXT: s_endpgm 120 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297 121 store i32 0, i32 addrspace(1)* %gep 122 ret void 123} 124 125define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) { 126; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096: 127; GFX6: ; %bb.0: 128; GFX6-NEXT: s_mov_b32 s0, s2 129; GFX6-NEXT: s_mov_b32 s1, s3 130; GFX6-NEXT: v_mov_b32_e32 v0, 0 131; GFX6-NEXT: s_mov_b32 s2, -1 132; GFX6-NEXT: s_mov_b32 s3, 0xf000 133; GFX6-NEXT: s_movk_i32 s4, 0x4000 134; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], s4 135; GFX6-NEXT: s_endpgm 136; 137; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4096: 138; GFX7: ; %bb.0: 139; GFX7-NEXT: s_mov_b32 s0, s2 140; GFX7-NEXT: s_mov_b32 s1, s3 141; GFX7-NEXT: v_mov_b32_e32 v0, 0 142; GFX7-NEXT: s_mov_b32 s2, -1 143; GFX7-NEXT: s_mov_b32 s3, 0xf000 144; GFX7-NEXT: s_movk_i32 s4, 0x4000 145; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4 146; GFX7-NEXT: s_endpgm 147 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096 148 store i32 0, i32 addrspace(1)* %gep 149 ret void 150} 151 152define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { 153; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095: 154; GFX6: ; %bb.0: 155; GFX6-NEXT: s_mov_b32 s2, 0 156; GFX6-NEXT: v_mov_b32_e32 v2, 0 157; GFX6-NEXT: s_mov_b32 s3, 0xf000 158; GFX6-NEXT: s_mov_b64 s[0:1], 0 159; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 160; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 161; GFX6-NEXT: s_endpgm 162; 163; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4095: 164; GFX7: ; %bb.0: 165; GFX7-NEXT: s_mov_b32 s2, 0 166; GFX7-NEXT: v_mov_b32_e32 v2, 0 167; GFX7-NEXT: s_mov_b32 s3, 0xf000 168; GFX7-NEXT: s_mov_b64 s[0:1], 0 169; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 170; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 171; GFX7-NEXT: s_endpgm 172 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 173 store i32 0, i32 addrspace(1)* %gep 174 ret void 175} 176 177define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) { 178; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296: 179; GFX6: ; %bb.0: 180; GFX6-NEXT: s_mov_b32 s0, 0 181; GFX6-NEXT: s_mov_b32 s1, 4 182; GFX6-NEXT: v_mov_b32_e32 v2, 0 183; GFX6-NEXT: s_mov_b32 s3, 0xf000 184; GFX6-NEXT: s_mov_b32 s2, s0 185; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 186; GFX6-NEXT: s_endpgm 187; 188; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967296: 189; GFX7: ; %bb.0: 190; GFX7-NEXT: s_mov_b32 s0, 0 191; GFX7-NEXT: s_mov_b32 s1, 4 192; GFX7-NEXT: v_mov_b32_e32 v2, 0 193; GFX7-NEXT: s_mov_b32 s3, 0xf000 194; GFX7-NEXT: s_mov_b32 s2, s0 195; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 196; GFX7-NEXT: s_endpgm 197 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 198 store i32 0, i32 addrspace(1)* %gep 199 ret void 200} 201 202define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) { 203; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297: 204; GFX6: ; %bb.0: 205; GFX6-NEXT: s_mov_b32 s0, 4 206; GFX6-NEXT: s_mov_b32 s1, s0 207; GFX6-NEXT: s_mov_b32 s2, 0 208; GFX6-NEXT: v_mov_b32_e32 v2, 0 209; GFX6-NEXT: s_mov_b32 s3, 0xf000 210; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 211; GFX6-NEXT: s_endpgm 212; 213; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297: 214; GFX7: ; %bb.0: 215; GFX7-NEXT: s_mov_b32 s0, 4 216; GFX7-NEXT: s_mov_b32 s1, s0 217; GFX7-NEXT: s_mov_b32 s2, 0 218; GFX7-NEXT: v_mov_b32_e32 v2, 0 219; GFX7-NEXT: s_mov_b32 s3, 0xf000 220; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 221; GFX7-NEXT: s_endpgm 222 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297 223 store i32 0, i32 addrspace(1)* %gep 224 ret void 225} 226 227define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) { 228; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096: 229; GFX6: ; %bb.0: 230; GFX6-NEXT: s_mov_b32 s2, 0 231; GFX6-NEXT: v_mov_b32_e32 v2, 0 232; GFX6-NEXT: s_mov_b32 s3, 0xf000 233; GFX6-NEXT: s_mov_b64 s[0:1], 0 234; GFX6-NEXT: s_movk_i32 s4, 0x4000 235; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 236; GFX6-NEXT: s_endpgm 237; 238; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4096: 239; GFX7: ; %bb.0: 240; GFX7-NEXT: s_mov_b32 s2, 0 241; GFX7-NEXT: v_mov_b32_e32 v2, 0 242; GFX7-NEXT: s_mov_b32 s3, 0xf000 243; GFX7-NEXT: s_mov_b64 s[0:1], 0 244; GFX7-NEXT: s_movk_i32 s4, 0x4000 245; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 246; GFX7-NEXT: s_endpgm 247 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096 248 store i32 0, i32 addrspace(1)* %gep 249 ret void 250} 251 252define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) { 253; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset: 254; GFX6: ; %bb.0: 255; GFX6-NEXT: s_mov_b32 s0, s2 256; GFX6-NEXT: s_mov_b32 s1, s3 257; GFX6-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 258; GFX6-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 259; GFX6-NEXT: v_mov_b32_e32 v0, s4 260; GFX6-NEXT: s_mov_b32 s2, 0 261; GFX6-NEXT: v_mov_b32_e32 v2, 0 262; GFX6-NEXT: s_mov_b32 s3, 0xf000 263; GFX6-NEXT: v_mov_b32_e32 v1, s5 264; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 265; GFX6-NEXT: s_endpgm 266; 267; GFX7-LABEL: mubuf_store_sgpr_ptr_sgpr_offset: 268; GFX7: ; %bb.0: 269; GFX7-NEXT: s_mov_b32 s0, s2 270; GFX7-NEXT: s_mov_b32 s1, s3 271; GFX7-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 272; GFX7-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 273; GFX7-NEXT: v_mov_b32_e32 v0, s4 274; GFX7-NEXT: s_mov_b32 s2, 0 275; GFX7-NEXT: v_mov_b32_e32 v2, 0 276; GFX7-NEXT: s_mov_b32 s3, 0xf000 277; GFX7-NEXT: v_mov_b32_e32 v1, s5 278; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 279; GFX7-NEXT: s_endpgm 280 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset 281 store i32 0, i32 addrspace(1)* %gep 282 ret void 283} 284 285define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { 286; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset: 287; GFX6: ; %bb.0: 288; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 289; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 290; GFX6-NEXT: s_mov_b32 s2, 0 291; GFX6-NEXT: v_mov_b32_e32 v2, 0 292; GFX6-NEXT: s_mov_b32 s3, 0xf000 293; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 294; GFX6-NEXT: s_endpgm 295; 296; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset: 297; GFX7: ; %bb.0: 298; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 299; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 300; GFX7-NEXT: s_mov_b32 s2, 0 301; GFX7-NEXT: v_mov_b32_e32 v2, 0 302; GFX7-NEXT: s_mov_b32 s3, 0xf000 303; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 304; GFX7-NEXT: s_endpgm 305 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset 306 store i32 0, i32 addrspace(1)* %gep 307 ret void 308} 309 310define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) { 311; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256: 312; GFX6: ; %bb.0: 313; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 314; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 315; GFX6-NEXT: s_mov_b32 s2, 0 316; GFX6-NEXT: v_mov_b32_e32 v2, 0 317; GFX6-NEXT: s_mov_b32 s3, 0xf000 318; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 319; GFX6-NEXT: s_endpgm 320; 321; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256: 322; GFX7: ; %bb.0: 323; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 324; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 325; GFX7-NEXT: s_mov_b32 s2, 0 326; GFX7-NEXT: v_mov_b32_e32 v2, 0 327; GFX7-NEXT: s_mov_b32 s3, 0xf000 328; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 329; GFX7-NEXT: s_endpgm 330 %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset 331 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256 332 store i32 0, i32 addrspace(1)* %gep1 333 ret void 334} 335 336define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { 337; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: 338; GFX6: ; %bb.0: 339; GFX6-NEXT: s_movk_i32 s4, 0x400 340; GFX6-NEXT: s_mov_b32 s5, 0 341; GFX6-NEXT: v_mov_b32_e32 v2, s4 342; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 343; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 344; GFX6-NEXT: v_mov_b32_e32 v3, s5 345; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 346; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 347; GFX6-NEXT: v_mov_b32_e32 v2, 0 348; GFX6-NEXT: s_mov_b32 s3, 0xf000 349; GFX6-NEXT: s_mov_b32 s2, s5 350; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 351; GFX6-NEXT: s_endpgm 352; 353; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: 354; GFX7: ; %bb.0: 355; GFX7-NEXT: s_movk_i32 s4, 0x400 356; GFX7-NEXT: s_mov_b32 s5, 0 357; GFX7-NEXT: v_mov_b32_e32 v2, s4 358; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 359; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 360; GFX7-NEXT: v_mov_b32_e32 v3, s5 361; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 362; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 363; GFX7-NEXT: v_mov_b32_e32 v2, 0 364; GFX7-NEXT: s_mov_b32 s3, 0xf000 365; GFX7-NEXT: s_mov_b32 s2, s5 366; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 367; GFX7-NEXT: s_endpgm 368 %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256 369 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset 370 store i32 0, i32 addrspace(1)* %gep1 371 ret void 372} 373 374define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { 375; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset: 376; GFX6: ; %bb.0: 377; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 378; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 379; GFX6-NEXT: s_mov_b32 s0, s2 380; GFX6-NEXT: s_mov_b32 s1, s3 381; GFX6-NEXT: s_mov_b32 s2, 0 382; GFX6-NEXT: v_mov_b32_e32 v2, 0 383; GFX6-NEXT: s_mov_b32 s3, 0xf000 384; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 385; GFX6-NEXT: s_endpgm 386; 387; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset: 388; GFX7: ; %bb.0: 389; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 390; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 391; GFX7-NEXT: s_mov_b32 s0, s2 392; GFX7-NEXT: s_mov_b32 s1, s3 393; GFX7-NEXT: s_mov_b32 s2, 0 394; GFX7-NEXT: v_mov_b32_e32 v2, 0 395; GFX7-NEXT: s_mov_b32 s3, 0xf000 396; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 397; GFX7-NEXT: s_endpgm 398 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset 399 store i32 0, i32 addrspace(1)* %gep 400 ret void 401} 402 403define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) { 404; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095: 405; GFX6: ; %bb.0: 406; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 407; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 408; GFX6-NEXT: s_mov_b32 s0, s2 409; GFX6-NEXT: s_mov_b32 s1, s3 410; GFX6-NEXT: s_mov_b32 s2, 0 411; GFX6-NEXT: v_mov_b32_e32 v2, 0 412; GFX6-NEXT: s_mov_b32 s3, 0xf000 413; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 414; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 415; GFX6-NEXT: s_endpgm 416; 417; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095: 418; GFX7: ; %bb.0: 419; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 420; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 421; GFX7-NEXT: s_mov_b32 s0, s2 422; GFX7-NEXT: s_mov_b32 s1, s3 423; GFX7-NEXT: s_mov_b32 s2, 0 424; GFX7-NEXT: v_mov_b32_e32 v2, 0 425; GFX7-NEXT: s_mov_b32 s3, 0xf000 426; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 427; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 428; GFX7-NEXT: s_endpgm 429 %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset 430 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 4095 431 store i32 0, i32 addrspace(1)* %gep1 432 ret void 433} 434define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { 435; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: 436; GFX6: ; %bb.0: 437; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 438; GFX6-NEXT: s_add_u32 s0, s2, 0x3ffc 439; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 440; GFX6-NEXT: s_addc_u32 s1, s3, 0 441; GFX6-NEXT: s_mov_b32 s2, 0 442; GFX6-NEXT: v_mov_b32_e32 v2, 0 443; GFX6-NEXT: s_mov_b32 s3, 0xf000 444; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 445; GFX6-NEXT: s_endpgm 446; 447; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: 448; GFX7: ; %bb.0: 449; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 450; GFX7-NEXT: s_add_u32 s0, s2, 0x3ffc 451; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 452; GFX7-NEXT: s_addc_u32 s1, s3, 0 453; GFX7-NEXT: s_mov_b32 s2, 0 454; GFX7-NEXT: v_mov_b32_e32 v2, 0 455; GFX7-NEXT: s_mov_b32 s3, 0xf000 456; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 457; GFX7-NEXT: s_endpgm 458 %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095 459 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset 460 store i32 0, i32 addrspace(1)* %gep1 461 ret void 462} 463 464define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) { 465; GFX6-LABEL: mubuf_load_sgpr_ptr: 466; GFX6: ; %bb.0: 467; GFX6-NEXT: s_mov_b32 s0, s2 468; GFX6-NEXT: s_mov_b32 s1, s3 469; GFX6-NEXT: s_mov_b32 s2, -1 470; GFX6-NEXT: s_mov_b32 s3, 0xf000 471; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0 472; GFX6-NEXT: s_waitcnt vmcnt(0) 473; GFX6-NEXT: ; return to shader part epilog 474; 475; GFX7-LABEL: mubuf_load_sgpr_ptr: 476; GFX7: ; %bb.0: 477; GFX7-NEXT: s_mov_b32 s0, s2 478; GFX7-NEXT: s_mov_b32 s1, s3 479; GFX7-NEXT: s_mov_b32 s2, -1 480; GFX7-NEXT: s_mov_b32 s3, 0xf000 481; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], 0 482; GFX7-NEXT: s_waitcnt vmcnt(0) 483; GFX7-NEXT: ; return to shader part epilog 484 %val = load volatile float, float addrspace(1)* %ptr 485 ret float %val 486} 487 488define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) { 489; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095: 490; GFX6: ; %bb.0: 491; GFX6-NEXT: s_mov_b32 s0, s2 492; GFX6-NEXT: s_mov_b32 s1, s3 493; GFX6-NEXT: s_mov_b32 s2, -1 494; GFX6-NEXT: s_mov_b32 s3, 0xf000 495; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 496; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], s4 497; GFX6-NEXT: s_waitcnt vmcnt(0) 498; GFX6-NEXT: ; return to shader part epilog 499; 500; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095: 501; GFX7: ; %bb.0: 502; GFX7-NEXT: s_mov_b32 s0, s2 503; GFX7-NEXT: s_mov_b32 s1, s3 504; GFX7-NEXT: s_mov_b32 s2, -1 505; GFX7-NEXT: s_mov_b32 s3, 0xf000 506; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 507; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4 508; GFX7-NEXT: s_waitcnt vmcnt(0) 509; GFX7-NEXT: ; return to shader part epilog 510 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095 511 %val = load volatile float, float addrspace(1)* %gep 512 ret float %val 513} 514 515define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) { 516; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296: 517; GFX6: ; %bb.0: 518; GFX6-NEXT: s_mov_b32 s4, 0 519; GFX6-NEXT: s_mov_b32 s5, 4 520; GFX6-NEXT: v_mov_b32_e32 v0, s4 521; GFX6-NEXT: s_mov_b32 s0, s2 522; GFX6-NEXT: s_mov_b32 s1, s3 523; GFX6-NEXT: s_mov_b32 s3, 0xf000 524; GFX6-NEXT: s_mov_b32 s2, s4 525; GFX6-NEXT: v_mov_b32_e32 v1, s5 526; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 527; GFX6-NEXT: s_waitcnt vmcnt(0) 528; GFX6-NEXT: ; return to shader part epilog 529; 530; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296: 531; GFX7: ; %bb.0: 532; GFX7-NEXT: s_mov_b32 s4, 0 533; GFX7-NEXT: s_mov_b32 s5, 4 534; GFX7-NEXT: v_mov_b32_e32 v0, s4 535; GFX7-NEXT: s_mov_b32 s0, s2 536; GFX7-NEXT: s_mov_b32 s1, s3 537; GFX7-NEXT: s_mov_b32 s3, 0xf000 538; GFX7-NEXT: s_mov_b32 s2, s4 539; GFX7-NEXT: v_mov_b32_e32 v1, s5 540; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 541; GFX7-NEXT: s_waitcnt vmcnt(0) 542; GFX7-NEXT: ; return to shader part epilog 543 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296 544 %val = load volatile float, float addrspace(1)* %gep 545 ret float %val 546} 547 548define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) { 549; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297: 550; GFX6: ; %bb.0: 551; GFX6-NEXT: s_mov_b32 s4, 4 552; GFX6-NEXT: s_mov_b32 s5, s4 553; GFX6-NEXT: v_mov_b32_e32 v0, s4 554; GFX6-NEXT: s_mov_b32 s0, s2 555; GFX6-NEXT: s_mov_b32 s1, s3 556; GFX6-NEXT: s_mov_b32 s2, 0 557; GFX6-NEXT: s_mov_b32 s3, 0xf000 558; GFX6-NEXT: v_mov_b32_e32 v1, s5 559; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 560; GFX6-NEXT: s_waitcnt vmcnt(0) 561; GFX6-NEXT: ; return to shader part epilog 562; 563; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297: 564; GFX7: ; %bb.0: 565; GFX7-NEXT: s_mov_b32 s4, 4 566; GFX7-NEXT: s_mov_b32 s5, s4 567; GFX7-NEXT: v_mov_b32_e32 v0, s4 568; GFX7-NEXT: s_mov_b32 s0, s2 569; GFX7-NEXT: s_mov_b32 s1, s3 570; GFX7-NEXT: s_mov_b32 s2, 0 571; GFX7-NEXT: s_mov_b32 s3, 0xf000 572; GFX7-NEXT: v_mov_b32_e32 v1, s5 573; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 574; GFX7-NEXT: s_waitcnt vmcnt(0) 575; GFX7-NEXT: ; return to shader part epilog 576 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297 577 %val = load volatile float, float addrspace(1)* %gep 578 ret float %val 579} 580 581define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) { 582; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096: 583; GFX6: ; %bb.0: 584; GFX6-NEXT: s_mov_b32 s0, s2 585; GFX6-NEXT: s_mov_b32 s1, s3 586; GFX6-NEXT: s_mov_b32 s2, -1 587; GFX6-NEXT: s_mov_b32 s3, 0xf000 588; GFX6-NEXT: s_movk_i32 s4, 0x4000 589; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], s4 590; GFX6-NEXT: s_waitcnt vmcnt(0) 591; GFX6-NEXT: ; return to shader part epilog 592; 593; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4096: 594; GFX7: ; %bb.0: 595; GFX7-NEXT: s_mov_b32 s0, s2 596; GFX7-NEXT: s_mov_b32 s1, s3 597; GFX7-NEXT: s_mov_b32 s2, -1 598; GFX7-NEXT: s_mov_b32 s3, 0xf000 599; GFX7-NEXT: s_movk_i32 s4, 0x4000 600; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4 601; GFX7-NEXT: s_waitcnt vmcnt(0) 602; GFX7-NEXT: ; return to shader part epilog 603 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096 604 %val = load volatile float, float addrspace(1)* %gep 605 ret float %val 606} 607 608define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) { 609; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095: 610; GFX6: ; %bb.0: 611; GFX6-NEXT: s_mov_b32 s2, 0 612; GFX6-NEXT: s_mov_b32 s3, 0xf000 613; GFX6-NEXT: s_mov_b64 s[0:1], 0 614; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 615; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 616; GFX6-NEXT: s_waitcnt vmcnt(0) 617; GFX6-NEXT: ; return to shader part epilog 618; 619; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4095: 620; GFX7: ; %bb.0: 621; GFX7-NEXT: s_mov_b32 s2, 0 622; GFX7-NEXT: s_mov_b32 s3, 0xf000 623; GFX7-NEXT: s_mov_b64 s[0:1], 0 624; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 625; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 626; GFX7-NEXT: s_waitcnt vmcnt(0) 627; GFX7-NEXT: ; return to shader part epilog 628 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095 629 %val = load volatile float, float addrspace(1)* %gep 630 ret float %val 631} 632 633define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) { 634; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296: 635; GFX6: ; %bb.0: 636; GFX6-NEXT: s_mov_b32 s0, 0 637; GFX6-NEXT: s_mov_b32 s1, 4 638; GFX6-NEXT: s_mov_b32 s3, 0xf000 639; GFX6-NEXT: s_mov_b32 s2, s0 640; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 641; GFX6-NEXT: s_waitcnt vmcnt(0) 642; GFX6-NEXT: ; return to shader part epilog 643; 644; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967296: 645; GFX7: ; %bb.0: 646; GFX7-NEXT: s_mov_b32 s0, 0 647; GFX7-NEXT: s_mov_b32 s1, 4 648; GFX7-NEXT: s_mov_b32 s3, 0xf000 649; GFX7-NEXT: s_mov_b32 s2, s0 650; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 651; GFX7-NEXT: s_waitcnt vmcnt(0) 652; GFX7-NEXT: ; return to shader part epilog 653 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296 654 %val = load volatile float, float addrspace(1)* %gep 655 ret float %val 656} 657 658define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) { 659; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297: 660; GFX6: ; %bb.0: 661; GFX6-NEXT: s_mov_b32 s0, 4 662; GFX6-NEXT: s_mov_b32 s1, s0 663; GFX6-NEXT: s_mov_b32 s2, 0 664; GFX6-NEXT: s_mov_b32 s3, 0xf000 665; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 666; GFX6-NEXT: s_waitcnt vmcnt(0) 667; GFX6-NEXT: ; return to shader part epilog 668; 669; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297: 670; GFX7: ; %bb.0: 671; GFX7-NEXT: s_mov_b32 s0, 4 672; GFX7-NEXT: s_mov_b32 s1, s0 673; GFX7-NEXT: s_mov_b32 s2, 0 674; GFX7-NEXT: s_mov_b32 s3, 0xf000 675; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 676; GFX7-NEXT: s_waitcnt vmcnt(0) 677; GFX7-NEXT: ; return to shader part epilog 678 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297 679 %val = load volatile float, float addrspace(1)* %gep 680 ret float %val 681} 682 683define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) { 684; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096: 685; GFX6: ; %bb.0: 686; GFX6-NEXT: s_mov_b32 s2, 0 687; GFX6-NEXT: s_mov_b32 s3, 0xf000 688; GFX6-NEXT: s_mov_b64 s[0:1], 0 689; GFX6-NEXT: s_movk_i32 s4, 0x4000 690; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 691; GFX6-NEXT: s_waitcnt vmcnt(0) 692; GFX6-NEXT: ; return to shader part epilog 693; 694; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4096: 695; GFX7: ; %bb.0: 696; GFX7-NEXT: s_mov_b32 s2, 0 697; GFX7-NEXT: s_mov_b32 s3, 0xf000 698; GFX7-NEXT: s_mov_b64 s[0:1], 0 699; GFX7-NEXT: s_movk_i32 s4, 0x4000 700; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 701; GFX7-NEXT: s_waitcnt vmcnt(0) 702; GFX7-NEXT: ; return to shader part epilog 703 %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096 704 %val = load volatile float, float addrspace(1)* %gep 705 ret float %val 706} 707 708define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) { 709; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset: 710; GFX6: ; %bb.0: 711; GFX6-NEXT: s_mov_b32 s0, s2 712; GFX6-NEXT: s_mov_b32 s1, s3 713; GFX6-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 714; GFX6-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 715; GFX6-NEXT: v_mov_b32_e32 v0, s4 716; GFX6-NEXT: s_mov_b32 s2, 0 717; GFX6-NEXT: s_mov_b32 s3, 0xf000 718; GFX6-NEXT: v_mov_b32_e32 v1, s5 719; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 720; GFX6-NEXT: s_waitcnt vmcnt(0) 721; GFX6-NEXT: ; return to shader part epilog 722; 723; GFX7-LABEL: mubuf_load_sgpr_ptr_sgpr_offset: 724; GFX7: ; %bb.0: 725; GFX7-NEXT: s_mov_b32 s0, s2 726; GFX7-NEXT: s_mov_b32 s1, s3 727; GFX7-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 728; GFX7-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 729; GFX7-NEXT: v_mov_b32_e32 v0, s4 730; GFX7-NEXT: s_mov_b32 s2, 0 731; GFX7-NEXT: s_mov_b32 s3, 0xf000 732; GFX7-NEXT: v_mov_b32_e32 v1, s5 733; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 734; GFX7-NEXT: s_waitcnt vmcnt(0) 735; GFX7-NEXT: ; return to shader part epilog 736 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset 737 %val = load volatile float, float addrspace(1)* %gep 738 ret float %val 739} 740 741define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { 742; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset: 743; GFX6: ; %bb.0: 744; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 745; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 746; GFX6-NEXT: s_mov_b32 s2, 0 747; GFX6-NEXT: s_mov_b32 s3, 0xf000 748; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 749; GFX6-NEXT: s_waitcnt vmcnt(0) 750; GFX6-NEXT: ; return to shader part epilog 751; 752; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset: 753; GFX7: ; %bb.0: 754; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 755; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 756; GFX7-NEXT: s_mov_b32 s2, 0 757; GFX7-NEXT: s_mov_b32 s3, 0xf000 758; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 759; GFX7-NEXT: s_waitcnt vmcnt(0) 760; GFX7-NEXT: ; return to shader part epilog 761 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset 762 %val = load volatile float, float addrspace(1)* %gep 763 ret float %val 764} 765 766define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) { 767; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256: 768; GFX6: ; %bb.0: 769; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 770; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 771; GFX6-NEXT: s_mov_b32 s2, 0 772; GFX6-NEXT: s_mov_b32 s3, 0xf000 773; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 774; GFX6-NEXT: s_waitcnt vmcnt(0) 775; GFX6-NEXT: ; return to shader part epilog 776; 777; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256: 778; GFX7: ; %bb.0: 779; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 780; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 781; GFX7-NEXT: s_mov_b32 s2, 0 782; GFX7-NEXT: s_mov_b32 s3, 0xf000 783; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 784; GFX7-NEXT: s_waitcnt vmcnt(0) 785; GFX7-NEXT: ; return to shader part epilog 786 %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %soffset 787 %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 256 788 %val = load volatile float, float addrspace(1)* %gep1 789 ret float %val 790} 791 792define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { 793; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: 794; GFX6: ; %bb.0: 795; GFX6-NEXT: s_movk_i32 s4, 0x400 796; GFX6-NEXT: s_mov_b32 s5, 0 797; GFX6-NEXT: v_mov_b32_e32 v2, s4 798; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 799; GFX6-NEXT: v_mov_b32_e32 v3, s5 800; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 801; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 802; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 803; GFX6-NEXT: s_mov_b32 s3, 0xf000 804; GFX6-NEXT: s_mov_b32 s2, s5 805; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 806; GFX6-NEXT: s_waitcnt vmcnt(0) 807; GFX6-NEXT: ; return to shader part epilog 808; 809; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: 810; GFX7: ; %bb.0: 811; GFX7-NEXT: s_movk_i32 s4, 0x400 812; GFX7-NEXT: s_mov_b32 s5, 0 813; GFX7-NEXT: v_mov_b32_e32 v2, s4 814; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 815; GFX7-NEXT: v_mov_b32_e32 v3, s5 816; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 817; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 818; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 819; GFX7-NEXT: s_mov_b32 s3, 0xf000 820; GFX7-NEXT: s_mov_b32 s2, s5 821; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 822; GFX7-NEXT: s_waitcnt vmcnt(0) 823; GFX7-NEXT: ; return to shader part epilog 824 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256 825 %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %soffset 826 %val = load volatile float, float addrspace(1)* %gep1 827 ret float %val 828} 829 830define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) { 831; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset: 832; GFX6: ; %bb.0: 833; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 834; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 835; GFX6-NEXT: s_mov_b32 s0, s2 836; GFX6-NEXT: s_mov_b32 s1, s3 837; GFX6-NEXT: s_mov_b32 s2, 0 838; GFX6-NEXT: s_mov_b32 s3, 0xf000 839; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 840; GFX6-NEXT: s_waitcnt vmcnt(0) 841; GFX6-NEXT: ; return to shader part epilog 842; 843; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset: 844; GFX7: ; %bb.0: 845; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 846; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 847; GFX7-NEXT: s_mov_b32 s0, s2 848; GFX7-NEXT: s_mov_b32 s1, s3 849; GFX7-NEXT: s_mov_b32 s2, 0 850; GFX7-NEXT: s_mov_b32 s3, 0xf000 851; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 852; GFX7-NEXT: s_waitcnt vmcnt(0) 853; GFX7-NEXT: ; return to shader part epilog 854 %gep = getelementptr float, float addrspace(1)* %ptr, i32 %voffset 855 %val = load volatile float, float addrspace(1)* %gep 856 ret float %val 857} 858 859define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) { 860; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095: 861; GFX6: ; %bb.0: 862; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 863; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 864; GFX6-NEXT: s_mov_b32 s0, s2 865; GFX6-NEXT: s_mov_b32 s1, s3 866; GFX6-NEXT: s_mov_b32 s2, 0 867; GFX6-NEXT: s_mov_b32 s3, 0xf000 868; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 869; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 870; GFX6-NEXT: s_waitcnt vmcnt(0) 871; GFX6-NEXT: ; return to shader part epilog 872; 873; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095: 874; GFX7: ; %bb.0: 875; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 876; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 877; GFX7-NEXT: s_mov_b32 s0, s2 878; GFX7-NEXT: s_mov_b32 s1, s3 879; GFX7-NEXT: s_mov_b32 s2, 0 880; GFX7-NEXT: s_mov_b32 s3, 0xf000 881; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 882; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 883; GFX7-NEXT: s_waitcnt vmcnt(0) 884; GFX7-NEXT: ; return to shader part epilog 885 %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %voffset 886 %gep1 = getelementptr float, float addrspace(1)* %gep0, i64 4095 887 %val = load volatile float, float addrspace(1)* %gep1 888 ret float %val 889} 890define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) { 891; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset: 892; GFX6: ; %bb.0: 893; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 894; GFX6-NEXT: s_add_u32 s4, s2, 0x3ffc 895; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 896; GFX6-NEXT: s_mov_b32 s6, 0 897; GFX6-NEXT: s_addc_u32 s5, s3, 0 898; GFX6-NEXT: s_mov_b32 s7, 0xf000 899; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 900; GFX6-NEXT: s_waitcnt vmcnt(0) 901; GFX6-NEXT: ; return to shader part epilog 902; 903; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset: 904; GFX7: ; %bb.0: 905; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 906; GFX7-NEXT: s_add_u32 s4, s2, 0x3ffc 907; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 908; GFX7-NEXT: s_mov_b32 s6, 0 909; GFX7-NEXT: s_addc_u32 s5, s3, 0 910; GFX7-NEXT: s_mov_b32 s7, 0xf000 911; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 912; GFX7-NEXT: s_waitcnt vmcnt(0) 913; GFX7-NEXT: ; return to shader part epilog 914 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095 915 %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %voffset 916 %val = load volatile float, float addrspace(1)* %gep1 917 ret float %val 918} 919 920define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) { 921; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095: 922; GFX6: ; %bb.0: 923; GFX6-NEXT: s_mov_b32 s0, s2 924; GFX6-NEXT: s_mov_b32 s1, s3 925; GFX6-NEXT: v_mov_b32_e32 v0, 2 926; GFX6-NEXT: s_mov_b32 s2, -1 927; GFX6-NEXT: s_mov_b32 s3, 0xf000 928; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 929; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 930; GFX6-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc 931; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 932; GFX6-NEXT: buffer_wbinvl1 933; GFX6-NEXT: s_waitcnt expcnt(0) 934; GFX6-NEXT: ; return to shader part epilog 935; 936; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095: 937; GFX7: ; %bb.0: 938; GFX7-NEXT: s_mov_b32 s0, s2 939; GFX7-NEXT: s_mov_b32 s1, s3 940; GFX7-NEXT: v_mov_b32_e32 v0, 2 941; GFX7-NEXT: s_mov_b32 s2, -1 942; GFX7-NEXT: s_mov_b32 s3, 0xf000 943; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 944; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 945; GFX7-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc 946; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 947; GFX7-NEXT: buffer_wbinvl1 948; GFX7-NEXT: ; return to shader part epilog 949 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 950 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 951 %cast = bitcast i32 %result to float 952 ret float %cast 953} 954 955define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) { 956; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296: 957; GFX6: ; %bb.0: 958; GFX6-NEXT: s_mov_b32 s4, 0 959; GFX6-NEXT: s_mov_b32 s5, 4 960; GFX6-NEXT: v_mov_b32_e32 v1, s4 961; GFX6-NEXT: s_mov_b32 s0, s2 962; GFX6-NEXT: s_mov_b32 s1, s3 963; GFX6-NEXT: v_mov_b32_e32 v0, 2 964; GFX6-NEXT: s_mov_b32 s3, 0xf000 965; GFX6-NEXT: s_mov_b32 s2, s4 966; GFX6-NEXT: v_mov_b32_e32 v2, s5 967; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 968; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc 969; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 970; GFX6-NEXT: buffer_wbinvl1 971; GFX6-NEXT: s_waitcnt expcnt(0) 972; GFX6-NEXT: ; return to shader part epilog 973; 974; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296: 975; GFX7: ; %bb.0: 976; GFX7-NEXT: s_mov_b32 s4, 0 977; GFX7-NEXT: s_mov_b32 s5, 4 978; GFX7-NEXT: v_mov_b32_e32 v1, s4 979; GFX7-NEXT: s_mov_b32 s0, s2 980; GFX7-NEXT: s_mov_b32 s1, s3 981; GFX7-NEXT: v_mov_b32_e32 v0, 2 982; GFX7-NEXT: s_mov_b32 s3, 0xf000 983; GFX7-NEXT: s_mov_b32 s2, s4 984; GFX7-NEXT: v_mov_b32_e32 v2, s5 985; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 986; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc 987; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 988; GFX7-NEXT: buffer_wbinvl1 989; GFX7-NEXT: ; return to shader part epilog 990 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 991 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 992 %cast = bitcast i32 %result to float 993 ret float %cast 994} 995 996define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { 997; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095: 998; GFX6: ; %bb.0: 999; GFX6-NEXT: v_mov_b32_e32 v2, 2 1000; GFX6-NEXT: s_mov_b32 s2, 0 1001; GFX6-NEXT: s_mov_b32 s3, 0xf000 1002; GFX6-NEXT: s_mov_b64 s[0:1], 0 1003; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 1004; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1005; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc 1006; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1007; GFX6-NEXT: buffer_wbinvl1 1008; GFX6-NEXT: v_mov_b32_e32 v0, v2 1009; GFX6-NEXT: s_waitcnt expcnt(0) 1010; GFX6-NEXT: ; return to shader part epilog 1011; 1012; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095: 1013; GFX7: ; %bb.0: 1014; GFX7-NEXT: v_mov_b32_e32 v2, 2 1015; GFX7-NEXT: s_mov_b32 s2, 0 1016; GFX7-NEXT: s_mov_b32 s3, 0xf000 1017; GFX7-NEXT: s_mov_b64 s[0:1], 0 1018; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 1019; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1020; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc 1021; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1022; GFX7-NEXT: buffer_wbinvl1 1023; GFX7-NEXT: v_mov_b32_e32 v0, v2 1024; GFX7-NEXT: ; return to shader part epilog 1025 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 1026 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 1027 %cast = bitcast i32 %result to float 1028 ret float %cast 1029} 1030 1031define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) { 1032; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296: 1033; GFX6: ; %bb.0: 1034; GFX6-NEXT: s_mov_b32 s0, 0 1035; GFX6-NEXT: s_mov_b32 s1, 4 1036; GFX6-NEXT: v_mov_b32_e32 v2, 2 1037; GFX6-NEXT: s_mov_b32 s3, 0xf000 1038; GFX6-NEXT: s_mov_b32 s2, s0 1039; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1040; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc 1041; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1042; GFX6-NEXT: buffer_wbinvl1 1043; GFX6-NEXT: v_mov_b32_e32 v0, v2 1044; GFX6-NEXT: s_waitcnt expcnt(0) 1045; GFX6-NEXT: ; return to shader part epilog 1046; 1047; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296: 1048; GFX7: ; %bb.0: 1049; GFX7-NEXT: s_mov_b32 s0, 0 1050; GFX7-NEXT: s_mov_b32 s1, 4 1051; GFX7-NEXT: v_mov_b32_e32 v2, 2 1052; GFX7-NEXT: s_mov_b32 s3, 0xf000 1053; GFX7-NEXT: s_mov_b32 s2, s0 1054; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1055; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc 1056; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1057; GFX7-NEXT: buffer_wbinvl1 1058; GFX7-NEXT: v_mov_b32_e32 v0, v2 1059; GFX7-NEXT: ; return to shader part epilog 1060 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 1061 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 1062 %cast = bitcast i32 %result to float 1063 ret float %cast 1064} 1065 1066define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { 1067; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset: 1068; GFX6: ; %bb.0: 1069; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1070; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 1071; GFX6-NEXT: s_mov_b32 s0, s2 1072; GFX6-NEXT: s_mov_b32 s1, s3 1073; GFX6-NEXT: v_mov_b32_e32 v2, 2 1074; GFX6-NEXT: s_mov_b32 s2, 0 1075; GFX6-NEXT: s_mov_b32 s3, 0xf000 1076; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1077; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc 1078; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1079; GFX6-NEXT: buffer_wbinvl1 1080; GFX6-NEXT: v_mov_b32_e32 v0, v2 1081; GFX6-NEXT: s_waitcnt expcnt(0) 1082; GFX6-NEXT: ; return to shader part epilog 1083; 1084; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset: 1085; GFX7: ; %bb.0: 1086; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1087; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 1088; GFX7-NEXT: s_mov_b32 s0, s2 1089; GFX7-NEXT: s_mov_b32 s1, s3 1090; GFX7-NEXT: v_mov_b32_e32 v2, 2 1091; GFX7-NEXT: s_mov_b32 s2, 0 1092; GFX7-NEXT: s_mov_b32 s3, 0xf000 1093; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1094; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc 1095; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1096; GFX7-NEXT: buffer_wbinvl1 1097; GFX7-NEXT: v_mov_b32_e32 v0, v2 1098; GFX7-NEXT: ; return to shader part epilog 1099 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset 1100 %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst 1101 %cast = bitcast i32 %result to float 1102 ret float %cast 1103} 1104 1105define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) { 1106; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095: 1107; GFX6: ; %bb.0: 1108; GFX6-NEXT: s_mov_b32 s0, s2 1109; GFX6-NEXT: s_mov_b32 s1, s3 1110; GFX6-NEXT: v_mov_b32_e32 v2, v0 1111; GFX6-NEXT: s_mov_b32 s2, -1 1112; GFX6-NEXT: s_mov_b32 s3, 0xf000 1113; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 1114; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1115; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc 1116; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1117; GFX6-NEXT: buffer_wbinvl1 1118; GFX6-NEXT: v_mov_b32_e32 v0, v1 1119; GFX6-NEXT: s_waitcnt expcnt(0) 1120; GFX6-NEXT: ; return to shader part epilog 1121; 1122; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095: 1123; GFX7: ; %bb.0: 1124; GFX7-NEXT: s_mov_b32 s0, s2 1125; GFX7-NEXT: s_mov_b32 s1, s3 1126; GFX7-NEXT: v_mov_b32_e32 v2, v0 1127; GFX7-NEXT: s_mov_b32 s2, -1 1128; GFX7-NEXT: s_mov_b32 s3, 0xf000 1129; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 1130; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1131; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc 1132; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1133; GFX7-NEXT: buffer_wbinvl1 1134; GFX7-NEXT: v_mov_b32_e32 v0, v1 1135; GFX7-NEXT: ; return to shader part epilog 1136 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 1137 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1138 %result = extractvalue { i32, i1 } %result.struct, 0 1139 %cast = bitcast i32 %result to float 1140 ret float %cast 1141} 1142 1143define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) { 1144; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296: 1145; GFX6: ; %bb.0: 1146; GFX6-NEXT: s_mov_b32 s4, 0 1147; GFX6-NEXT: s_mov_b32 s5, 4 1148; GFX6-NEXT: v_mov_b32_e32 v3, s4 1149; GFX6-NEXT: s_mov_b32 s0, s2 1150; GFX6-NEXT: s_mov_b32 s1, s3 1151; GFX6-NEXT: v_mov_b32_e32 v2, v0 1152; GFX6-NEXT: s_mov_b32 s3, 0xf000 1153; GFX6-NEXT: s_mov_b32 s2, s4 1154; GFX6-NEXT: v_mov_b32_e32 v4, s5 1155; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1156; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc 1157; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1158; GFX6-NEXT: buffer_wbinvl1 1159; GFX6-NEXT: v_mov_b32_e32 v0, v1 1160; GFX6-NEXT: s_waitcnt expcnt(0) 1161; GFX6-NEXT: ; return to shader part epilog 1162; 1163; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296: 1164; GFX7: ; %bb.0: 1165; GFX7-NEXT: s_mov_b32 s4, 0 1166; GFX7-NEXT: s_mov_b32 s5, 4 1167; GFX7-NEXT: v_mov_b32_e32 v3, s4 1168; GFX7-NEXT: s_mov_b32 s0, s2 1169; GFX7-NEXT: s_mov_b32 s1, s3 1170; GFX7-NEXT: v_mov_b32_e32 v2, v0 1171; GFX7-NEXT: s_mov_b32 s3, 0xf000 1172; GFX7-NEXT: s_mov_b32 s2, s4 1173; GFX7-NEXT: v_mov_b32_e32 v4, s5 1174; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1175; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc 1176; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1177; GFX7-NEXT: buffer_wbinvl1 1178; GFX7-NEXT: v_mov_b32_e32 v0, v1 1179; GFX7-NEXT: ; return to shader part epilog 1180 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 1181 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1182 %result = extractvalue { i32, i1 } %result.struct, 0 1183 %cast = bitcast i32 %result to float 1184 ret float %cast 1185} 1186 1187define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr, i32 %old, i32 %in) { 1188; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095: 1189; GFX6: ; %bb.0: 1190; GFX6-NEXT: v_mov_b32_e32 v4, v2 1191; GFX6-NEXT: s_mov_b32 s2, 0 1192; GFX6-NEXT: s_mov_b32 s3, 0xf000 1193; GFX6-NEXT: s_mov_b64 s[0:1], 0 1194; GFX6-NEXT: s_movk_i32 s4, 0x3ffc 1195; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1196; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc 1197; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1198; GFX6-NEXT: buffer_wbinvl1 1199; GFX6-NEXT: v_mov_b32_e32 v0, v3 1200; GFX6-NEXT: s_waitcnt expcnt(0) 1201; GFX6-NEXT: ; return to shader part epilog 1202; 1203; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095: 1204; GFX7: ; %bb.0: 1205; GFX7-NEXT: v_mov_b32_e32 v4, v2 1206; GFX7-NEXT: s_mov_b32 s2, 0 1207; GFX7-NEXT: s_mov_b32 s3, 0xf000 1208; GFX7-NEXT: s_mov_b64 s[0:1], 0 1209; GFX7-NEXT: s_movk_i32 s4, 0x3ffc 1210; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1211; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc 1212; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1213; GFX7-NEXT: buffer_wbinvl1 1214; GFX7-NEXT: v_mov_b32_e32 v0, v3 1215; GFX7-NEXT: ; return to shader part epilog 1216 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095 1217 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1218 %result = extractvalue { i32, i1 } %result.struct, 0 1219 %cast = bitcast i32 %result to float 1220 ret float %cast 1221} 1222 1223define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr, i32 %old, i32 %in) { 1224; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296: 1225; GFX6: ; %bb.0: 1226; GFX6-NEXT: s_mov_b32 s0, 0 1227; GFX6-NEXT: v_mov_b32_e32 v4, v2 1228; GFX6-NEXT: s_mov_b32 s1, 4 1229; GFX6-NEXT: s_mov_b32 s3, 0xf000 1230; GFX6-NEXT: s_mov_b32 s2, s0 1231; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1232; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc 1233; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1234; GFX6-NEXT: buffer_wbinvl1 1235; GFX6-NEXT: v_mov_b32_e32 v0, v3 1236; GFX6-NEXT: s_waitcnt expcnt(0) 1237; GFX6-NEXT: ; return to shader part epilog 1238; 1239; GFX7-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296: 1240; GFX7: ; %bb.0: 1241; GFX7-NEXT: s_mov_b32 s0, 0 1242; GFX7-NEXT: v_mov_b32_e32 v4, v2 1243; GFX7-NEXT: s_mov_b32 s1, 4 1244; GFX7-NEXT: s_mov_b32 s3, 0xf000 1245; GFX7-NEXT: s_mov_b32 s2, s0 1246; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1247; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc 1248; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1249; GFX7-NEXT: buffer_wbinvl1 1250; GFX7-NEXT: v_mov_b32_e32 v0, v3 1251; GFX7-NEXT: ; return to shader part epilog 1252 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296 1253 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1254 %result = extractvalue { i32, i1 } %result.struct, 0 1255 %cast = bitcast i32 %result to float 1256 ret float %cast 1257} 1258 1259define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset, i32 %old, i32 %in) { 1260; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset: 1261; GFX6: ; %bb.0: 1262; GFX6-NEXT: v_mov_b32_e32 v3, v1 1263; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1264; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 1265; GFX6-NEXT: s_mov_b32 s0, s2 1266; GFX6-NEXT: s_mov_b32 s1, s3 1267; GFX6-NEXT: s_mov_b32 s2, 0 1268; GFX6-NEXT: s_mov_b32 s3, 0xf000 1269; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1270; GFX6-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc 1271; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1272; GFX6-NEXT: buffer_wbinvl1 1273; GFX6-NEXT: v_mov_b32_e32 v0, v2 1274; GFX6-NEXT: s_waitcnt expcnt(0) 1275; GFX6-NEXT: ; return to shader part epilog 1276; 1277; GFX7-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset: 1278; GFX7: ; %bb.0: 1279; GFX7-NEXT: v_mov_b32_e32 v3, v1 1280; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1281; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 1282; GFX7-NEXT: s_mov_b32 s0, s2 1283; GFX7-NEXT: s_mov_b32 s1, s3 1284; GFX7-NEXT: s_mov_b32 s2, 0 1285; GFX7-NEXT: s_mov_b32 s3, 0xf000 1286; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1287; GFX7-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc 1288; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1289; GFX7-NEXT: buffer_wbinvl1 1290; GFX7-NEXT: v_mov_b32_e32 v0, v2 1291; GFX7-NEXT: ; return to shader part epilog 1292 %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset 1293 %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst 1294 %result = extractvalue { i32, i1 } %result.struct, 0 1295 %cast = bitcast i32 %result to float 1296 ret float %cast 1297} 1298