1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s 3 4; GCN-LABEL: {{^}}use_dispatch_ptr: 5; GCN: s_load_dword s{{[0-9]+}}, s[4:5] 6define hidden void @use_dispatch_ptr() #1 { 7 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 8 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 9 %value = load volatile i32, i32 addrspace(4)* %header_ptr 10 ret void 11} 12 13; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr: 14; GCN: enable_sgpr_dispatch_ptr = 1 15; GCN-NOT: s[4:5] 16; GCN-NOT: s4 17; GCN-NOT: s5 18define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 { 19 call void @use_dispatch_ptr() 20 ret void 21} 22 23; GCN-LABEL: {{^}}use_queue_ptr: 24; GCN: s_load_dword s{{[0-9]+}}, s[4:5] 25define hidden void @use_queue_ptr() #1 { 26 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 27 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 28 %value = load volatile i32, i32 addrspace(4)* %header_ptr 29 ret void 30} 31 32; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr: 33; GCN: enable_sgpr_queue_ptr = 1 34; GCN-NOT: s[4:5] 35; GCN-NOT: s4 36; GCN-NOT: s5 37define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 { 38 call void @use_queue_ptr() 39 ret void 40} 41 42; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast: 43; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x10 44; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]] 45; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16 46; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] 47; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}} 48; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}} 49define hidden void @use_queue_ptr_addrspacecast() #1 { 50 %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32* 51 store volatile i32 0, i32* %asc 52 ret void 53} 54 55; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast: 56; CIVI: enable_sgpr_queue_ptr = 1 57; CIVI-NOT: s[4:5] 58; CIVI-NOT: s4 59; CIVI-NOT: s5 60define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 { 61 call void @use_queue_ptr_addrspacecast() 62 ret void 63} 64 65; Not really supported in callable functions. 66; GCN-LABEL: {{^}}use_kernarg_segment_ptr: 67; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0{{$}} 68; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0{{$}} 69define hidden void @use_kernarg_segment_ptr() #1 { 70 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 71 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 72 %value = load volatile i32, i32 addrspace(4)* %header_ptr 73 ret void 74} 75 76; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr: 77; GCN: enable_sgpr_kernarg_segment_ptr = 1 78define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 { 79 call void @use_kernarg_segment_ptr() 80 ret void 81} 82 83; GCN-LABEL: {{^}}use_dispatch_id: 84; GCN: ; use s[4:5] 85define hidden void @use_dispatch_id() #1 { 86 %id = call i64 @llvm.amdgcn.dispatch.id() 87 call void asm sideeffect "; use $0", "s"(i64 %id) 88 ret void 89} 90 91; No kernarg segment so that there is a mov to check. With kernarg 92; pointer enabled, it happens to end up in the right place anyway. 93 94; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id: 95; GCN: enable_sgpr_dispatch_id = 1 96; GCN-NOT: s[4:5] 97; GCN-NOT: s4 98; GCN-NOT: s5 99define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 { 100 call void @use_dispatch_id() 101 ret void 102} 103 104; GCN-LABEL: {{^}}use_workgroup_id_x: 105; GCN: s_waitcnt 106; GCN: ; use s4 107define hidden void @use_workgroup_id_x() #1 { 108 %val = call i32 @llvm.amdgcn.workgroup.id.x() 109 call void asm sideeffect "; use $0", "s"(i32 %val) 110 ret void 111} 112 113; GCN-LABEL: {{^}}use_stack_workgroup_id_x: 114; GCN: s_waitcnt 115; GCN-NOT: s32 116; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} 117; GCN: ; use s4 118; GCN: s_setpc_b64 119define hidden void @use_stack_workgroup_id_x() #1 { 120 %alloca = alloca i32, addrspace(5) 121 store volatile i32 0, i32 addrspace(5)* %alloca 122 %val = call i32 @llvm.amdgcn.workgroup.id.x() 123 call void asm sideeffect "; use $0", "s"(i32 %val) 124 ret void 125} 126 127; GCN-LABEL: {{^}}use_workgroup_id_y: 128; GCN: s_waitcnt 129; GCN: ; use s4 130define hidden void @use_workgroup_id_y() #1 { 131 %val = call i32 @llvm.amdgcn.workgroup.id.y() 132 call void asm sideeffect "; use $0", "s"(i32 %val) 133 ret void 134} 135 136; GCN-LABEL: {{^}}use_workgroup_id_z: 137; GCN: s_waitcnt 138; GCN: ; use s4 139define hidden void @use_workgroup_id_z() #1 { 140 %val = call i32 @llvm.amdgcn.workgroup.id.z() 141 call void asm sideeffect "; use $0", "s"(i32 %val) 142 ret void 143} 144 145; GCN-LABEL: {{^}}use_workgroup_id_xy: 146; GCN: ; use s4 147; GCN: ; use s5 148define hidden void @use_workgroup_id_xy() #1 { 149 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 150 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 151 call void asm sideeffect "; use $0", "s"(i32 %val0) 152 call void asm sideeffect "; use $0", "s"(i32 %val1) 153 ret void 154} 155 156; GCN-LABEL: {{^}}use_workgroup_id_xyz: 157; GCN: ; use s4 158; GCN: ; use s5 159; GCN: ; use s6 160define hidden void @use_workgroup_id_xyz() #1 { 161 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 162 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 163 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 164 call void asm sideeffect "; use $0", "s"(i32 %val0) 165 call void asm sideeffect "; use $0", "s"(i32 %val1) 166 call void asm sideeffect "; use $0", "s"(i32 %val2) 167 ret void 168} 169 170; GCN-LABEL: {{^}}use_workgroup_id_xz: 171; GCN: ; use s4 172; GCN: ; use s5 173define hidden void @use_workgroup_id_xz() #1 { 174 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 175 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 176 call void asm sideeffect "; use $0", "s"(i32 %val0) 177 call void asm sideeffect "; use $0", "s"(i32 %val1) 178 ret void 179} 180 181; GCN-LABEL: {{^}}use_workgroup_id_yz: 182; GCN: ; use s4 183; GCN: ; use s5 184define hidden void @use_workgroup_id_yz() #1 { 185 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 186 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 187 call void asm sideeffect "; use $0", "s"(i32 %val0) 188 call void asm sideeffect "; use $0", "s"(i32 %val1) 189 ret void 190} 191 192; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x: 193; GCN: enable_sgpr_workgroup_id_x = 1 194; GCN: enable_sgpr_workgroup_id_y = 0 195; GCN: enable_sgpr_workgroup_id_z = 0 196 197; GCN-NOT: s6 198; GCN: s_mov_b32 s4, s6 199; GCN-NEXT: s_getpc_b64 s[6:7] 200; GCN-NEXT: s_add_u32 s6, s6, use_workgroup_id_x@rel32@lo+4 201; GCN-NEXT: s_addc_u32 s7, s7, use_workgroup_id_x@rel32@hi+12 202; GCN: s_mov_b32 s32, 0 203; GCN: s_swappc_b64 204; GCN-NEXT: s_endpgm 205define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { 206 call void @use_workgroup_id_x() 207 ret void 208} 209 210; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y: 211; GCN: enable_sgpr_workgroup_id_x = 1 212; GCN: enable_sgpr_workgroup_id_y = 1 213; GCN: enable_sgpr_workgroup_id_z = 0 214 215; GCN: s_mov_b32 s4, s7 216; GCN: s_mov_b32 s32, 0 217; GCN: s_swappc_b64 218define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { 219 call void @use_workgroup_id_y() 220 ret void 221} 222 223; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z: 224; GCN: enable_sgpr_workgroup_id_x = 1 225; GCN: enable_sgpr_workgroup_id_y = 0 226; GCN: enable_sgpr_workgroup_id_z = 1 227 228; GCN: s_mov_b32 s4, s7 229 230; GCN: s_mov_b32 s32, 0 231; GCN: s_swappc_b64 232define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { 233 call void @use_workgroup_id_z() 234 ret void 235} 236 237; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy: 238; GCN: enable_sgpr_workgroup_id_x = 1 239; GCN: enable_sgpr_workgroup_id_y = 1 240; GCN: enable_sgpr_workgroup_id_z = 0 241 242; GCN: s_mov_b32 s5, s7 243; GCN: s_mov_b32 s4, s6 244 245; GCN: s_mov_b32 s32, 0 246; GCN: s_swappc_b64 247define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 { 248 call void @use_workgroup_id_xy() 249 ret void 250} 251 252; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz: 253; GCN: enable_sgpr_workgroup_id_x = 1 254; GCN: enable_sgpr_workgroup_id_y = 1 255; GCN: enable_sgpr_workgroup_id_z = 1 256 257; GCN: s_mov_b32 s4, s6 258; GCN: s_mov_b32 s5, s7 259; GCN: s_mov_b32 s6, s8 260 261; GCN: s_mov_b32 s32, 0 262; GCN: s_swappc_b64 263define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 { 264 call void @use_workgroup_id_xyz() 265 ret void 266} 267 268; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz: 269; GCN: enable_sgpr_workgroup_id_x = 1 270; GCN: enable_sgpr_workgroup_id_y = 0 271; GCN: enable_sgpr_workgroup_id_z = 1 272 273; GCN: s_mov_b32 s5, s7 274; GCN: s_mov_b32 s4, s6 275 276; GCN: s_mov_b32 s32, 0 277; GCN: s_swappc_b64 278define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 { 279 call void @use_workgroup_id_xz() 280 ret void 281} 282 283; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz: 284; GCN: enable_sgpr_workgroup_id_x = 1 285; GCN: enable_sgpr_workgroup_id_y = 1 286; GCN: enable_sgpr_workgroup_id_z = 1 287 288; GCN: s_mov_b32 s4, s7 289; GCN: s_mov_b32 s5, s8 290 291; GCN: s_mov_b32 s32, 0 292; GCN: s_swappc_b64 293define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { 294 call void @use_workgroup_id_yz() 295 ret void 296} 297 298; Argument is in right place already 299; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: 300; GCN-NOT: s4 301; GCN: v_readlane_b32 s4, v40, 0 302define hidden void @func_indirect_use_workgroup_id_x() #1 { 303 call void @use_workgroup_id_x() 304 ret void 305} 306 307; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: 308; GCN-NOT: s4 309; GCN: v_readlane_b32 s4, v40, 0 310define hidden void @func_indirect_use_workgroup_id_y() #1 { 311 call void @use_workgroup_id_y() 312 ret void 313} 314 315; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: 316; GCN-NOT: s4 317; GCN: v_readlane_b32 s4, v40, 0 318define hidden void @func_indirect_use_workgroup_id_z() #1 { 319 call void @use_workgroup_id_z() 320 ret void 321} 322 323; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x: 324; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 325; GCN: ; use s4 326define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 { 327 %val = call i32 @llvm.amdgcn.workgroup.id.x() 328 store volatile i32 %arg0, i32 addrspace(1)* undef 329 call void asm sideeffect "; use $0", "s"(i32 %val) 330 ret void 331} 332 333; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y: 334; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 335; GCN: ; use s4 336define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 { 337 %val = call i32 @llvm.amdgcn.workgroup.id.y() 338 store volatile i32 %arg0, i32 addrspace(1)* undef 339 call void asm sideeffect "; use $0", "s"(i32 %val) 340 ret void 341} 342 343; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z: 344; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 345; GCN: ; use s4 346define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 { 347 %val = call i32 @llvm.amdgcn.workgroup.id.z() 348 store volatile i32 %arg0, i32 addrspace(1)* undef 349 call void asm sideeffect "; use $0", "s"(i32 %val) 350 ret void 351} 352 353; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x: 354; GCN: enable_sgpr_workgroup_id_x = 1 355; GCN: enable_sgpr_workgroup_id_y = 0 356; GCN: enable_sgpr_workgroup_id_z = 0 357 358; GCN-DAG: v_mov_b32_e32 v0, 0x22b 359; GCN-DAG: s_mov_b32 s4, s6 360 361; GCN-DAG: s_mov_b32 s32, 0 362; GCN-NOT: s4 363; GCN: s_swappc_b64 364define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { 365 call void @other_arg_use_workgroup_id_x(i32 555) 366 ret void 367} 368 369; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y: 370; GCN: enable_sgpr_workgroup_id_x = 1 371; GCN: enable_sgpr_workgroup_id_y = 1 372; GCN: enable_sgpr_workgroup_id_z = 0 373 374; GCN-DAG: v_mov_b32_e32 v0, 0x22b 375; GCN-DAG: s_mov_b32 s4, s7 376 377; GCN-DAG: s_mov_b32 s32, 0 378; GCN: s_swappc_b64 379define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { 380 call void @other_arg_use_workgroup_id_y(i32 555) 381 ret void 382} 383 384; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z: 385; GCN: enable_sgpr_workgroup_id_x = 1 386; GCN: enable_sgpr_workgroup_id_y = 0 387; GCN: enable_sgpr_workgroup_id_z = 1 388 389; GCN-DAG: v_mov_b32_e32 v0, 0x22b 390 391; GCN: s_mov_b32 s32, 0 392; GCN: s_swappc_b64 393define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 { 394 call void @other_arg_use_workgroup_id_z(i32 555) 395 ret void 396} 397 398; GCN-LABEL: {{^}}use_every_sgpr_input: 399; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} 400; GCN: s_load_dword s{{[0-9]+}}, s[4:5] 401; GCN: s_load_dword s{{[0-9]+}}, s[6:7] 402; GCN: s_load_dword s{{[0-9]+}}, s[8:9] 403 404; GCN: ; use s[10:11] 405; GCN: ; use s12 406; GCN: ; use s13 407; GCN: ; use s14 408define hidden void @use_every_sgpr_input() #1 { 409 %alloca = alloca i32, align 4, addrspace(5) 410 store volatile i32 0, i32 addrspace(5)* %alloca 411 412 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 413 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 414 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 415 416 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 417 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 418 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 419 420 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 421 %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 422 %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc 423 424 %val3 = call i64 @llvm.amdgcn.dispatch.id() 425 call void asm sideeffect "; use $0", "s"(i64 %val3) 426 427 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 428 call void asm sideeffect "; use $0", "s"(i32 %val4) 429 430 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 431 call void asm sideeffect "; use $0", "s"(i32 %val5) 432 433 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 434 call void asm sideeffect "; use $0", "s"(i32 %val6) 435 436 ret void 437} 438 439; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input: 440; GCN: enable_sgpr_workgroup_id_x = 1 441; GCN: enable_sgpr_workgroup_id_y = 1 442; GCN: enable_sgpr_workgroup_id_z = 1 443; GCN: enable_sgpr_workgroup_info = 0 444 445; GCN: enable_sgpr_private_segment_buffer = 1 446; GCN: enable_sgpr_dispatch_ptr = 1 447; GCN: enable_sgpr_queue_ptr = 1 448; GCN: enable_sgpr_kernarg_segment_ptr = 1 449; GCN: enable_sgpr_dispatch_id = 1 450; GCN: enable_sgpr_flat_scratch_init = 1 451 452; GCN: s_mov_b32 s12, s14 453; GCN: s_mov_b32 s13, s15 454; GCN: s_mov_b32 s14, s16 455; GCN: s_mov_b32 s32, 0 456; GCN: s_swappc_b64 457define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 { 458 call void @use_every_sgpr_input() 459 ret void 460} 461 462; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input: 463; GCN-NOT: s6 464; GCN-NOT: s7 465; GCN-NOT: s8 466; GCN-NOT: s9 467; GCN-NOT: s10 468; GCN-NOT: s11 469; GCN-NOT: s12 470; GCN-NOT: s13 471; GCN-NOT: s[6:7] 472; GCN-NOT: s[8:9] 473; GCN-NOT: s[10:11] 474; GCN-NOT: s[12:13] 475; GCN: s_or_saveexec_b64 s[16:17], -1 476define hidden void @func_indirect_use_every_sgpr_input() #1 { 477 call void @use_every_sgpr_input() 478 ret void 479} 480 481; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz: 482; GCN: s_mov_b32 s4, s12 483; GCN: s_mov_b32 s5, s13 484; GCN: s_mov_b32 s6, s14 485; GCN: ; use s[10:11] 486; GCN: ; use s12 487; GCN: ; use s13 488; GCN: ; use s14 489 490; GCN: s_swappc_b64 491define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { 492 %alloca = alloca i32, align 4, addrspace(5) 493 store volatile i32 0, i32 addrspace(5)* %alloca 494 495 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 496 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 497 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 498 499 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 500 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 501 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 502 503 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 504 %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 505 %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc 506 507 %val3 = call i64 @llvm.amdgcn.dispatch.id() 508 call void asm sideeffect "; use $0", "s"(i64 %val3) 509 510 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 511 call void asm sideeffect "; use $0", "s"(i32 %val4) 512 513 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 514 call void asm sideeffect "; use $0", "s"(i32 %val5) 515 516 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 517 call void asm sideeffect "; use $0", "s"(i32 %val6) 518 519 call void @use_workgroup_id_xyz() 520 ret void 521} 522 523; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill: 524; GCN-DAG: s_mov_b32 s33, s32 525; GCN-DAG: s_add_u32 s32, s32, 0x400 526; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5] 527; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7] 528 529 530; GCN: s_mov_b32 s4, s12 531; GCN: s_mov_b32 s5, s13 532; GCN: s_mov_b32 s6, s14 533 534; GCN: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[8:9] 535 536; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s12 537; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-57-9][0-9]*]], s13 538; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-68-9][0-9]*]], s14 539 540 541 542; GCN: s_swappc_b64 543 544; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}} 545; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_X]]:[[HI_X]]{{\]}}, 0x0 546; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Y]]:[[HI_Y]]{{\]}}, 0x0 547; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Z]]:[[HI_Z]]{{\]}}, 0x0 548; GCN: ; use 549; GCN: ; use [[SAVE_X]] 550; GCN: ; use [[SAVE_Y]] 551; GCN: ; use [[SAVE_Z]] 552define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 { 553 %alloca = alloca i32, align 4, addrspace(5) 554 call void @use_workgroup_id_xyz() 555 556 store volatile i32 0, i32 addrspace(5)* %alloca 557 558 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 559 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 560 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 561 562 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 563 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 564 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 565 566 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 567 %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 568 %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc 569 570 %val3 = call i64 @llvm.amdgcn.dispatch.id() 571 call void asm sideeffect "; use $0", "s"(i64 %val3) 572 573 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 574 call void asm sideeffect "; use $0", "s"(i32 %val4) 575 576 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 577 call void asm sideeffect "; use $0", "s"(i32 %val5) 578 579 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 580 call void asm sideeffect "; use $0", "s"(i32 %val6) 581 582 ret void 583} 584 585declare i32 @llvm.amdgcn.workgroup.id.x() #0 586declare i32 @llvm.amdgcn.workgroup.id.y() #0 587declare i32 @llvm.amdgcn.workgroup.id.z() #0 588declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 589declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 590declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 591declare i64 @llvm.amdgcn.dispatch.id() #0 592declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 593 594attributes #0 = { nounwind readnone speculatable } 595attributes #1 = { nounwind noinline } 596