1; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s 2; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 3 4; GCN-LABEL: {{^}}use_dispatch_ptr: 5; GCN: s_load_dword s{{[0-9]+}}, s[4:5] 6define hidden void @use_dispatch_ptr() #1 { 7 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 8 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 9 %value = load volatile i32, i32 addrspace(4)* %header_ptr 10 ret void 11} 12 13; GCN-LABEL: {{^}}use_queue_ptr: 14; GCN: s_load_dword s{{[0-9]+}}, s[6:7] 15define hidden void @use_queue_ptr() #1 { 16 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 17 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 18 %value = load volatile i32, i32 addrspace(4)* %header_ptr 19 ret void 20} 21 22; GCN-LABEL: {{^}}use_kernarg_segment_ptr: 23; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0 24; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0 25define hidden void @use_kernarg_segment_ptr() #1 { 26 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 27 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 28 %value = load volatile i32, i32 addrspace(4)* %header_ptr 29 ret void 30} 31 32; GCN-LABEL: {{^}}use_implicitarg_ptr: 33; GCN: s_load_dword s{{[0-9]+}}, s[8:9] 34define hidden void @use_implicitarg_ptr() #1 { 35 %implicit.arg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 36 %header_ptr = bitcast i8 addrspace(4)* %implicit.arg.ptr to i32 addrspace(4)* 37 %value = load volatile i32, i32 addrspace(4)* %header_ptr 38 ret void 39} 40 41; GCN-LABEL: {{^}}use_dispatch_id: 42; GCN: ; use s[10:11] 43define hidden void @use_dispatch_id() #1 { 44 %id = call i64 @llvm.amdgcn.dispatch.id() 45 call void asm sideeffect "; use $0", "s"(i64 %id) 46 ret void 47} 48; GCN-LABEL: {{^}}use_workgroup_id_x: 49; GCN: s_waitcnt 50; GCN: ; use s12 51define hidden void @use_workgroup_id_x() #1 { 52 %val = call i32 @llvm.amdgcn.workgroup.id.x() 53 call void asm sideeffect "; use $0", "s"(i32 %val) 54 ret void 55} 56 57; GCN-LABEL: {{^}}use_stack_workgroup_id_x: 58; GCN: s_waitcnt 59; GCN-NOT: s32 60; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} 61; GCN: ; use s12 62; GCN: s_setpc_b64 63define hidden void @use_stack_workgroup_id_x() #1 { 64 %alloca = alloca i32, addrspace(5) 65 store volatile i32 0, i32 addrspace(5)* %alloca 66 %val = call i32 @llvm.amdgcn.workgroup.id.x() 67 call void asm sideeffect "; use $0", "s"(i32 %val) 68 ret void 69} 70 71; GCN-LABEL: {{^}}use_workgroup_id_y: 72; GCN: s_waitcnt 73; GCN: ; use s13 74define hidden void @use_workgroup_id_y() #1 { 75 %val = call i32 @llvm.amdgcn.workgroup.id.y() 76 call void asm sideeffect "; use $0", "s"(i32 %val) 77 ret void 78} 79 80; GCN-LABEL: {{^}}use_workgroup_id_z: 81; GCN: s_waitcnt 82; GCN: ; use s14 83define hidden void @use_workgroup_id_z() #1 { 84 %val = call i32 @llvm.amdgcn.workgroup.id.z() 85 call void asm sideeffect "; use $0", "s"(i32 %val) 86 ret void 87} 88 89; GCN-LABEL: {{^}}use_workgroup_id_xy: 90; GCN: ; use s12 91; GCN: ; use s13 92define hidden void @use_workgroup_id_xy() #1 { 93 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 94 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 95 call void asm sideeffect "; use $0", "s"(i32 %val0) 96 call void asm sideeffect "; use $0", "s"(i32 %val1) 97 ret void 98} 99 100; GCN-LABEL: {{^}}use_workgroup_id_xyz: 101; GCN: ; use s12 102; GCN: ; use s13 103; GCN: ; use s14 104define hidden void @use_workgroup_id_xyz() #1 { 105 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 106 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 107 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 108 call void asm sideeffect "; use $0", "s"(i32 %val0) 109 call void asm sideeffect "; use $0", "s"(i32 %val1) 110 call void asm sideeffect "; use $0", "s"(i32 %val2) 111 ret void 112} 113 114; GCN-LABEL: {{^}}use_workgroup_id_xz: 115; GCN: ; use s12 116; GCN: ; use s14 117define hidden void @use_workgroup_id_xz() #1 { 118 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 119 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 120 call void asm sideeffect "; use $0", "s"(i32 %val0) 121 call void asm sideeffect "; use $0", "s"(i32 %val1) 122 ret void 123} 124 125; GCN-LABEL: {{^}}use_workgroup_id_yz: 126; GCN: ; use s13 127; GCN: ; use s14 128define hidden void @use_workgroup_id_yz() #1 { 129 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 130 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 131 call void asm sideeffect "; use $0", "s"(i32 %val0) 132 call void asm sideeffect "; use $0", "s"(i32 %val1) 133 ret void 134} 135 136; Argument is in right place already 137; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: 138; GCN-NOT: s12 139; GCN-NOT: s13 140; GCN-NOT: s14 141; GCN: v_readlane_b32 s4, v40, 0 142define hidden void @func_indirect_use_workgroup_id_x() #1 { 143 call void @use_workgroup_id_x() 144 ret void 145} 146 147; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: 148; GCN-NOT: s4 149; GCN: v_readlane_b32 s4, v40, 0 150define hidden void @func_indirect_use_workgroup_id_y() #1 { 151 call void @use_workgroup_id_y() 152 ret void 153} 154 155; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: 156; GCN-NOT: s4 157; GCN: v_readlane_b32 s4, v40, 0 158define hidden void @func_indirect_use_workgroup_id_z() #1 { 159 call void @use_workgroup_id_z() 160 ret void 161} 162 163; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x: 164; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 165; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off 166; GCN: ; use s12 167define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 { 168 %val = call i32 @llvm.amdgcn.workgroup.id.x() 169 store volatile i32 %arg0, i32 addrspace(1)* undef 170 call void asm sideeffect "; use $0", "s"(i32 %val) 171 ret void 172} 173 174; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y: 175; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 176; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off 177; GCN: ; use s13 178define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 { 179 %val = call i32 @llvm.amdgcn.workgroup.id.y() 180 store volatile i32 %arg0, i32 addrspace(1)* undef 181 call void asm sideeffect "; use $0", "s"(i32 %val) 182 ret void 183} 184 185; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z: 186; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 187; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off 188; GCN: ; use s14 189define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 { 190 %val = call i32 @llvm.amdgcn.workgroup.id.z() 191 store volatile i32 %arg0, i32 addrspace(1)* undef 192 call void asm sideeffect "; use $0", "s"(i32 %val) 193 ret void 194} 195 196; GCN-LABEL: {{^}}use_every_sgpr_input: 197; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} 198; GCN: s_load_dword s{{[0-9]+}}, s[4:5] 199; GCN: s_load_dword s{{[0-9]+}}, s[6:7] 200; GCN: s_load_dword s{{[0-9]+}}, s[8:9] 201; GCN: ; use s[10:11] 202; GCN: ; use s12 203; GCN: ; use s13 204; GCN: ; use s14 205define hidden void @use_every_sgpr_input() #1 { 206 %alloca = alloca i32, align 4, addrspace(5) 207 store volatile i32 0, i32 addrspace(5)* %alloca 208 209 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 210 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 211 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 212 213 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 214 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 215 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 216 217 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 218 %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 219 %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc 220 221 %val3 = call i64 @llvm.amdgcn.dispatch.id() 222 call void asm sideeffect "; use $0", "s"(i64 %val3) 223 224 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 225 call void asm sideeffect "; use $0", "s"(i32 %val4) 226 227 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 228 call void asm sideeffect "; use $0", "s"(i32 %val5) 229 230 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 231 call void asm sideeffect "; use $0", "s"(i32 %val6) 232 233 ret void 234} 235 236; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input: 237; GCN: s_mov_b32 s12, s14 238; GCN: s_mov_b32 s13, s15 239; GCN: s_mov_b32 s14, s16 240; GCN: s_mov_b32 s32, 0 241; GCN: s_swappc_b64 242 243; GCN: .amdhsa_user_sgpr_private_segment_buffer 1 244; GCN: .amdhsa_user_sgpr_dispatch_ptr 1 245; GCN: .amdhsa_user_sgpr_queue_ptr 1 246; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1 247; GCN: .amdhsa_user_sgpr_dispatch_id 1 248; GCN: .amdhsa_user_sgpr_flat_scratch_init 1 249; GCN: .amdhsa_user_sgpr_private_segment_size 0 250; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 251; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 252; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 253; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 254; GCN: .amdhsa_system_sgpr_workgroup_info 0 255; GCN: .amdhsa_system_vgpr_workitem_id 2 256define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 { 257 call void @use_every_sgpr_input() 258 ret void 259} 260 261; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input: 262; GCN-NOT: s6 263; GCN-NOT: s7 264; GCN-NOT: s8 265; GCN-NOT: s9 266; GCN-NOT: s10 267; GCN-NOT: s11 268; GCN-NOT: s12 269; GCN-NOT: s13 270; GCN-NOT: s[6:7] 271; GCN-NOT: s[8:9] 272; GCN-NOT: s[10:11] 273; GCN-NOT: s[12:13] 274; GCN-NOT: s14 275; GCN: s_or_saveexec_b64 s[16:17], -1 276define hidden void @func_indirect_use_every_sgpr_input() #1 { 277 call void @use_every_sgpr_input() 278 ret void 279} 280 281; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz: 282; GCN-NOT: s12 283; GCN-NOT: s13 284; GCN-NOT: s14 285; GCN: ; use s[10:11] 286; GCN: ; use s12 287; GCN: ; use s13 288; GCN: ; use s14 289 290; GCN: s_swappc_b64 291define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { 292 %alloca = alloca i32, align 4, addrspace(5) 293 store volatile i32 0, i32 addrspace(5)* %alloca 294 295 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 296 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 297 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 298 299 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 300 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 301 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 302 303 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 304 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 305 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc 306 307 %val3 = call i64 @llvm.amdgcn.dispatch.id() 308 call void asm sideeffect "; use $0", "s"(i64 %val3) 309 310 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 311 call void asm sideeffect "; use $0", "s"(i32 %val4) 312 313 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 314 call void asm sideeffect "; use $0", "s"(i32 %val5) 315 316 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 317 call void asm sideeffect "; use $0", "s"(i32 %val6) 318 319 call void @use_workgroup_id_xyz() 320 ret void 321} 322 323declare i32 @llvm.amdgcn.workgroup.id.x() #0 324declare i32 @llvm.amdgcn.workgroup.id.y() #0 325declare i32 @llvm.amdgcn.workgroup.id.z() #0 326declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 327declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 328declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 329declare i64 @llvm.amdgcn.dispatch.id() #0 330declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 331 332attributes #0 = { nounwind readnone speculatable } 333attributes #1 = { nounwind noinline } 334