1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}use_workitem_id_x: 4; GCN: s_waitcnt 5; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 6; GCN-NEXT: s_waitcnt 7; GCN-NEXT: s_setpc_b64 8define void @use_workitem_id_x() #1 { 9 %val = call i32 @llvm.amdgcn.workitem.id.x() 10 store volatile i32 %val, i32 addrspace(1)* undef 11 ret void 12} 13 14; GCN-LABEL: {{^}}use_workitem_id_y: 15; GCN: s_waitcnt 16; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 17; GCN-NEXT: s_waitcnt 18; GCN-NEXT: s_setpc_b64 19define void @use_workitem_id_y() #1 { 20 %val = call i32 @llvm.amdgcn.workitem.id.y() 21 store volatile i32 %val, i32 addrspace(1)* undef 22 ret void 23} 24 25; GCN-LABEL: {{^}}use_workitem_id_z: 26; GCN: s_waitcnt 27; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 28; GCN-NEXT: s_waitcnt 29; GCN-NEXT: s_setpc_b64 30define void @use_workitem_id_z() #1 { 31 %val = call i32 @llvm.amdgcn.workitem.id.z() 32 store volatile i32 %val, i32 addrspace(1)* undef 33 ret void 34} 35 36; GCN-LABEL: {{^}}use_workitem_id_xy: 37; GCN: s_waitcnt 38; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 39; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1 40; GCN-NEXT: s_waitcnt 41; GCN-NEXT: s_setpc_b64 42define void @use_workitem_id_xy() #1 { 43 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 44 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 45 store volatile i32 %val0, i32 addrspace(1)* undef 46 store volatile i32 %val1, i32 addrspace(1)* undef 47 ret void 48} 49 50; GCN-LABEL: {{^}}use_workitem_id_xyz: 51; GCN: s_waitcnt 52; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 53; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1 54; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v2 55; GCN-NEXT: s_waitcnt 56; GCN-NEXT: s_setpc_b64 57define void @use_workitem_id_xyz() #1 { 58 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 59 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 60 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 61 store volatile i32 %val0, i32 addrspace(1)* undef 62 store volatile i32 %val1, i32 addrspace(1)* undef 63 store volatile i32 %val2, i32 addrspace(1)* undef 64 ret void 65} 66 67; GCN-LABEL: {{^}}use_workitem_id_xz: 68; GCN: s_waitcnt 69; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 70; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1 71; GCN-NEXT: s_waitcnt 72; GCN-NEXT: s_setpc_b64 73define void @use_workitem_id_xz() #1 { 74 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 75 %val1 = call i32 @llvm.amdgcn.workitem.id.z() 76 store volatile i32 %val0, i32 addrspace(1)* undef 77 store volatile i32 %val1, i32 addrspace(1)* undef 78 ret void 79} 80 81; GCN-LABEL: {{^}}use_workitem_id_yz: 82; GCN: s_waitcnt 83; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 84; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1 85; GCN-NEXT: s_waitcnt 86; GCN-NEXT: s_setpc_b64 87define void @use_workitem_id_yz() #1 { 88 %val0 = call i32 @llvm.amdgcn.workitem.id.y() 89 %val1 = call i32 @llvm.amdgcn.workitem.id.z() 90 store volatile i32 %val0, i32 addrspace(1)* undef 91 store volatile i32 %val1, i32 addrspace(1)* undef 92 ret void 93} 94 95; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x: 96; GCN: enable_vgpr_workitem_id = 0 97 98; GCN-NOT: v0 99; GCN: s_swappc_b64 100; GCN-NOT: v0 101define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 { 102 call void @use_workitem_id_x() 103 ret void 104} 105 106; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y: 107; GCN: enable_vgpr_workitem_id = 1 108 109; GCN-NOT: v0 110; GCN-NOT: v1 111; GCN: v_mov_b32_e32 v0, v1 112; GCN-NOT: v0 113; GCN-NOT: v1 114; GCN: s_swappc_b64 115define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 { 116 call void @use_workitem_id_y() 117 ret void 118} 119 120; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z: 121; GCN: enable_vgpr_workitem_id = 2 122 123; GCN-NOT: v0 124; GCN-NOT: v2 125; GCN: v_mov_b32_e32 v0, v2 126; GCN-NOT: v0 127; GCN-NOT: v2 128; GCN: s_swappc_b64 129define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 { 130 call void @use_workitem_id_z() 131 ret void 132} 133 134; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x: 135; GCN-NOT: v0 136; GCN: s_swappc_b64 137; GCN-NOT: v0 138define void @func_indirect_use_workitem_id_x() #1 { 139 call void @use_workitem_id_x() 140 ret void 141} 142 143; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y: 144; GCN-NOT: v0 145; GCN: s_swappc_b64 146; GCN-NOT: v0 147define void @func_indirect_use_workitem_id_y() #1 { 148 call void @use_workitem_id_y() 149 ret void 150} 151 152; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z: 153; GCN-NOT: v0 154; GCN: s_swappc_b64 155; GCN-NOT: v0 156define void @func_indirect_use_workitem_id_z() #1 { 157 call void @use_workitem_id_z() 158 ret void 159} 160 161; GCN-LABEL: {{^}}other_arg_use_workitem_id_x: 162; GCN: s_waitcnt 163; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 164; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 165define void @other_arg_use_workitem_id_x(i32 %arg0) #1 { 166 %val = call i32 @llvm.amdgcn.workitem.id.x() 167 store volatile i32 %arg0, i32 addrspace(1)* undef 168 store volatile i32 %val, i32 addrspace(1)* undef 169 ret void 170} 171 172; GCN-LABEL: {{^}}other_arg_use_workitem_id_y: 173; GCN: s_waitcnt 174; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 175; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 176define void @other_arg_use_workitem_id_y(i32 %arg0) #1 { 177 %val = call i32 @llvm.amdgcn.workitem.id.y() 178 store volatile i32 %arg0, i32 addrspace(1)* undef 179 store volatile i32 %val, i32 addrspace(1)* undef 180 ret void 181} 182 183; GCN-LABEL: {{^}}other_arg_use_workitem_id_z: 184; GCN: s_waitcnt 185; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 186; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 187define void @other_arg_use_workitem_id_z(i32 %arg0) #1 { 188 %val = call i32 @llvm.amdgcn.workitem.id.z() 189 store volatile i32 %arg0, i32 addrspace(1)* undef 190 store volatile i32 %val, i32 addrspace(1)* undef 191 ret void 192} 193 194 195; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x: 196; GCN: enable_vgpr_workitem_id = 0 197 198; GCN: v_mov_b32_e32 v1, v0 199; GCN: v_mov_b32_e32 v0, 0x22b 200; GCN: s_swappc_b64 201define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 { 202 call void @other_arg_use_workitem_id_x(i32 555) 203 ret void 204} 205 206 207; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y: 208; GCN: enable_vgpr_workitem_id = 1 209 210; GCN-NOT: v1 211; GCN: v_mov_b32_e32 v0, 0x22b 212; GCN-NOT: v1 213; GCN: s_swappc_b64 214; GCN-NOT: v0 215define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 { 216 call void @other_arg_use_workitem_id_y(i32 555) 217 ret void 218} 219 220; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: 221; GCN: enable_vgpr_workitem_id = 2 222 223; GCN-DAG: v_mov_b32_e32 v0, 0x22b 224; GCN-DAG: v_mov_b32_e32 v1, v2 225; GCN: s_swappc_b64 226; GCN-NOT: v0 227define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { 228 call void @other_arg_use_workitem_id_z(i32 555) 229 ret void 230} 231 232; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x: 233; GCN: s_mov_b32 s5, s32 234; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill 235; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}} 236; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 237 238; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload 239; GCN-NEXT: s_waitcnt 240; GCN-NEXT: s_setpc_b64 241define void @too_many_args_use_workitem_id_x( 242 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 243 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 244 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 245 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { 246 %val = call i32 @llvm.amdgcn.workitem.id.x() 247 store volatile i32 %val, i32 addrspace(1)* undef 248 249 store volatile i32 %arg0, i32 addrspace(1)* undef 250 store volatile i32 %arg1, i32 addrspace(1)* undef 251 store volatile i32 %arg2, i32 addrspace(1)* undef 252 store volatile i32 %arg3, i32 addrspace(1)* undef 253 store volatile i32 %arg4, i32 addrspace(1)* undef 254 store volatile i32 %arg5, i32 addrspace(1)* undef 255 store volatile i32 %arg6, i32 addrspace(1)* undef 256 store volatile i32 %arg7, i32 addrspace(1)* undef 257 258 store volatile i32 %arg8, i32 addrspace(1)* undef 259 store volatile i32 %arg9, i32 addrspace(1)* undef 260 store volatile i32 %arg10, i32 addrspace(1)* undef 261 store volatile i32 %arg11, i32 addrspace(1)* undef 262 store volatile i32 %arg12, i32 addrspace(1)* undef 263 store volatile i32 %arg13, i32 addrspace(1)* undef 264 store volatile i32 %arg14, i32 addrspace(1)* undef 265 store volatile i32 %arg15, i32 addrspace(1)* undef 266 267 store volatile i32 %arg16, i32 addrspace(1)* undef 268 store volatile i32 %arg17, i32 addrspace(1)* undef 269 store volatile i32 %arg18, i32 addrspace(1)* undef 270 store volatile i32 %arg19, i32 addrspace(1)* undef 271 store volatile i32 %arg20, i32 addrspace(1)* undef 272 store volatile i32 %arg21, i32 addrspace(1)* undef 273 store volatile i32 %arg22, i32 addrspace(1)* undef 274 store volatile i32 %arg23, i32 addrspace(1)* undef 275 276 store volatile i32 %arg24, i32 addrspace(1)* undef 277 store volatile i32 %arg25, i32 addrspace(1)* undef 278 store volatile i32 %arg26, i32 addrspace(1)* undef 279 store volatile i32 %arg27, i32 addrspace(1)* undef 280 store volatile i32 %arg28, i32 addrspace(1)* undef 281 store volatile i32 %arg29, i32 addrspace(1)* undef 282 store volatile i32 %arg30, i32 addrspace(1)* undef 283 store volatile i32 %arg31, i32 addrspace(1)* undef 284 285 ret void 286} 287 288; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x: 289; GCN: enable_vgpr_workitem_id = 0 290 291; GCN: s_mov_b32 s33, s7 292; GCN: s_mov_b32 s32, s33 293; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8 294; GCN: s_mov_b32 s4, s33 295; GCN: s_swappc_b64 296define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 { 297 call void @too_many_args_use_workitem_id_x( 298 i32 10, i32 20, i32 30, i32 40, 299 i32 50, i32 60, i32 70, i32 80, 300 i32 90, i32 100, i32 110, i32 120, 301 i32 130, i32 140, i32 150, i32 160, 302 i32 170, i32 180, i32 190, i32 200, 303 i32 210, i32 220, i32 230, i32 240, 304 i32 250, i32 260, i32 270, i32 280, 305 i32 290, i32 300, i32 310, i32 320) 306 ret void 307} 308 309; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x: 310; GCN: s_mov_b32 s5, s32 311; GCN: buffer_store_dword v1, off, s[0:3], s32 offset:8 312; GCN: s_swappc_b64 313define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { 314 store volatile i32 %arg0, i32 addrspace(1)* undef 315 call void @too_many_args_use_workitem_id_x( 316 i32 10, i32 20, i32 30, i32 40, 317 i32 50, i32 60, i32 70, i32 80, 318 i32 90, i32 100, i32 110, i32 120, 319 i32 130, i32 140, i32 150, i32 160, 320 i32 170, i32 180, i32 190, i32 200, 321 i32 210, i32 220, i32 230, i32 240, 322 i32 250, i32 260, i32 270, i32 280, 323 i32 290, i32 300, i32 310, i32 320) 324 ret void 325} 326 327; Requires loading and storing to stack slot. 328; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: 329; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill 330; GCN: s_add_u32 s32, s32, 0x400{{$}} 331; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 332 333; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8{{$}} 334 335; GCN: s_swappc_b64 336 337; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload 338; GCN: s_sub_u32 s32, s32, 0x400{{$}} 339; GCN: s_setpc_b64 340define void @too_many_args_call_too_many_args_use_workitem_id_x( 341 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 342 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 343 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 344 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { 345 call void @too_many_args_use_workitem_id_x( 346 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 347 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 348 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 349 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) 350 ret void 351} 352 353; stack layout: 354; frame[0] = emergency stack slot 355; frame[1] = byval arg32 356; frame[2] = stack passed workitem ID x 357; frame[3] = VGPR spill slot 358 359; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval: 360; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill 361; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 362; GCN-NEXT: s_waitcnt 363; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32 364; GCN: buffer_load_dword v0, off, s[0:3], s5 offset:4 365; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload 366; GCN: s_setpc_b64 367define void @too_many_args_use_workitem_id_x_byval( 368 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 369 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 370 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 371 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval %arg32) #1 { 372 %val = call i32 @llvm.amdgcn.workitem.id.x() 373 store volatile i32 %val, i32 addrspace(1)* undef 374 375 store volatile i32 %arg0, i32 addrspace(1)* undef 376 store volatile i32 %arg1, i32 addrspace(1)* undef 377 store volatile i32 %arg2, i32 addrspace(1)* undef 378 store volatile i32 %arg3, i32 addrspace(1)* undef 379 store volatile i32 %arg4, i32 addrspace(1)* undef 380 store volatile i32 %arg5, i32 addrspace(1)* undef 381 store volatile i32 %arg6, i32 addrspace(1)* undef 382 store volatile i32 %arg7, i32 addrspace(1)* undef 383 384 store volatile i32 %arg8, i32 addrspace(1)* undef 385 store volatile i32 %arg9, i32 addrspace(1)* undef 386 store volatile i32 %arg10, i32 addrspace(1)* undef 387 store volatile i32 %arg11, i32 addrspace(1)* undef 388 store volatile i32 %arg12, i32 addrspace(1)* undef 389 store volatile i32 %arg13, i32 addrspace(1)* undef 390 store volatile i32 %arg14, i32 addrspace(1)* undef 391 store volatile i32 %arg15, i32 addrspace(1)* undef 392 393 store volatile i32 %arg16, i32 addrspace(1)* undef 394 store volatile i32 %arg17, i32 addrspace(1)* undef 395 store volatile i32 %arg18, i32 addrspace(1)* undef 396 store volatile i32 %arg19, i32 addrspace(1)* undef 397 store volatile i32 %arg20, i32 addrspace(1)* undef 398 store volatile i32 %arg21, i32 addrspace(1)* undef 399 store volatile i32 %arg22, i32 addrspace(1)* undef 400 store volatile i32 %arg23, i32 addrspace(1)* undef 401 402 store volatile i32 %arg24, i32 addrspace(1)* undef 403 store volatile i32 %arg25, i32 addrspace(1)* undef 404 store volatile i32 %arg26, i32 addrspace(1)* undef 405 store volatile i32 %arg27, i32 addrspace(1)* undef 406 store volatile i32 %arg28, i32 addrspace(1)* undef 407 store volatile i32 %arg29, i32 addrspace(1)* undef 408 store volatile i32 %arg30, i32 addrspace(1)* undef 409 store volatile i32 %arg31, i32 addrspace(1)* undef 410 %private = load volatile i32, i32 addrspace(5)* %arg32 411 ret void 412} 413 414; frame[0] = emergency stack slot 415; frame[1] = 416 417; sp[0] = callee emergency stack slot reservation 418; sp[1] = byval 419; sp[2] = ?? 420; sp[3] = stack passed workitem ID x 421 422; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval: 423; GCN: enable_vgpr_workitem_id = 0 424 425; GCN: s_mov_b32 s33, s7 426; GCN: s_add_u32 s32, s33, 0x400{{$}} 427 428; GCN-NOT: s32 429; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} 430; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4 431; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12 432 433; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4 434; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}} 435; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], 436; GCN: s_swappc_b64 437define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 { 438 %alloca = alloca i32, align 4, addrspace(5) 439 store volatile i32 999, i32 addrspace(5)* %alloca 440 call void @too_many_args_use_workitem_id_x_byval( 441 i32 10, i32 20, i32 30, i32 40, 442 i32 50, i32 60, i32 70, i32 80, 443 i32 90, i32 100, i32 110, i32 120, 444 i32 130, i32 140, i32 150, i32 160, 445 i32 170, i32 180, i32 190, i32 200, 446 i32 210, i32 220, i32 230, i32 240, 447 i32 250, i32 260, i32 270, i32 280, 448 i32 290, i32 300, i32 310, i32 320, 449 i32 addrspace(5)* %alloca) 450 ret void 451} 452 453; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval: 454; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} 455; GCN: buffer_store_dword [[K]], off, s[0:3], s5 offset:4 456; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12 457 458; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s5 offset:4 459; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}} 460; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], 461; GCN: s_swappc_b64 462define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { 463 %alloca = alloca i32, align 4, addrspace(5) 464 store volatile i32 999, i32 addrspace(5)* %alloca 465 call void @too_many_args_use_workitem_id_x_byval( 466 i32 10, i32 20, i32 30, i32 40, 467 i32 50, i32 60, i32 70, i32 80, 468 i32 90, i32 100, i32 110, i32 120, 469 i32 130, i32 140, i32 150, i32 160, 470 i32 170, i32 180, i32 190, i32 200, 471 i32 210, i32 220, i32 230, i32 240, 472 i32 250, i32 260, i32 270, i32 280, 473 i32 290, i32 300, i32 310, i32 320, 474 i32 addrspace(5)* %alloca) 475 ret void 476} 477 478; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz: 479; GCN: s_mov_b32 s5, s32 480; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill 481; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}} 482; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 483; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8{{$}} 484; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 485; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12{{$}} 486; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 487 488; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload 489; GCN-NEXT: s_waitcnt 490; GCN-NEXT: s_setpc_b64 491define void @too_many_args_use_workitem_id_xyz( 492 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 493 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 494 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 495 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { 496 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 497 store volatile i32 %val0, i32 addrspace(1)* undef 498 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 499 store volatile i32 %val1, i32 addrspace(1)* undef 500 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 501 store volatile i32 %val2, i32 addrspace(1)* undef 502 503 store volatile i32 %arg0, i32 addrspace(1)* undef 504 store volatile i32 %arg1, i32 addrspace(1)* undef 505 store volatile i32 %arg2, i32 addrspace(1)* undef 506 store volatile i32 %arg3, i32 addrspace(1)* undef 507 store volatile i32 %arg4, i32 addrspace(1)* undef 508 store volatile i32 %arg5, i32 addrspace(1)* undef 509 store volatile i32 %arg6, i32 addrspace(1)* undef 510 store volatile i32 %arg7, i32 addrspace(1)* undef 511 512 store volatile i32 %arg8, i32 addrspace(1)* undef 513 store volatile i32 %arg9, i32 addrspace(1)* undef 514 store volatile i32 %arg10, i32 addrspace(1)* undef 515 store volatile i32 %arg11, i32 addrspace(1)* undef 516 store volatile i32 %arg12, i32 addrspace(1)* undef 517 store volatile i32 %arg13, i32 addrspace(1)* undef 518 store volatile i32 %arg14, i32 addrspace(1)* undef 519 store volatile i32 %arg15, i32 addrspace(1)* undef 520 521 store volatile i32 %arg16, i32 addrspace(1)* undef 522 store volatile i32 %arg17, i32 addrspace(1)* undef 523 store volatile i32 %arg18, i32 addrspace(1)* undef 524 store volatile i32 %arg19, i32 addrspace(1)* undef 525 store volatile i32 %arg20, i32 addrspace(1)* undef 526 store volatile i32 %arg21, i32 addrspace(1)* undef 527 store volatile i32 %arg22, i32 addrspace(1)* undef 528 store volatile i32 %arg23, i32 addrspace(1)* undef 529 530 store volatile i32 %arg24, i32 addrspace(1)* undef 531 store volatile i32 %arg25, i32 addrspace(1)* undef 532 store volatile i32 %arg26, i32 addrspace(1)* undef 533 store volatile i32 %arg27, i32 addrspace(1)* undef 534 store volatile i32 %arg28, i32 addrspace(1)* undef 535 store volatile i32 %arg29, i32 addrspace(1)* undef 536 store volatile i32 %arg30, i32 addrspace(1)* undef 537 store volatile i32 %arg31, i32 addrspace(1)* undef 538 539 ret void 540} 541 542; frame[0] = kernel emergency stack slot 543; frame[1] = callee emergency stack slot 544; frame[2] = ID X 545; frame[3] = ID Y 546; frame[4] = ID Z 547 548; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz: 549; GCN: enable_vgpr_workitem_id = 2 550 551; GCN: s_mov_b32 s33, s7 552; GCN: s_mov_b32 s32, s33 553 554; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 offset:8 555; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:12 556; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:16 557; GCN: s_swappc_b64 558define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 { 559 call void @too_many_args_use_workitem_id_xyz( 560 i32 10, i32 20, i32 30, i32 40, 561 i32 50, i32 60, i32 70, i32 80, 562 i32 90, i32 100, i32 110, i32 120, 563 i32 130, i32 140, i32 150, i32 160, 564 i32 170, i32 180, i32 190, i32 200, 565 i32 210, i32 220, i32 230, i32 240, 566 i32 250, i32 260, i32 270, i32 280, 567 i32 290, i32 300, i32 310, i32 320) 568 ret void 569} 570 571; workitem ID X in register, yz on stack 572; v31 = workitem ID X 573; frame[0] = emergency slot 574; frame[1] = workitem Y 575; frame[2] = workitem Z 576 577; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz: 578; GCN: s_mov_b32 s5, s32 579; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31 580; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:4{{$}} 581; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31 582; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:8{{$}} 583; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31 584 585; GCN: s_waitcnt 586; GCN-NEXT: s_setpc_b64 587; GCN: ScratchSize: 12 588define void @too_many_args_use_workitem_id_x_stack_yz( 589 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 590 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 591 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 592 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 { 593 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 594 store volatile i32 %val0, i32 addrspace(1)* undef 595 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 596 store volatile i32 %val1, i32 addrspace(1)* undef 597 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 598 store volatile i32 %val2, i32 addrspace(1)* undef 599 600 store volatile i32 %arg0, i32 addrspace(1)* undef 601 store volatile i32 %arg1, i32 addrspace(1)* undef 602 store volatile i32 %arg2, i32 addrspace(1)* undef 603 store volatile i32 %arg3, i32 addrspace(1)* undef 604 store volatile i32 %arg4, i32 addrspace(1)* undef 605 store volatile i32 %arg5, i32 addrspace(1)* undef 606 store volatile i32 %arg6, i32 addrspace(1)* undef 607 store volatile i32 %arg7, i32 addrspace(1)* undef 608 609 store volatile i32 %arg8, i32 addrspace(1)* undef 610 store volatile i32 %arg9, i32 addrspace(1)* undef 611 store volatile i32 %arg10, i32 addrspace(1)* undef 612 store volatile i32 %arg11, i32 addrspace(1)* undef 613 store volatile i32 %arg12, i32 addrspace(1)* undef 614 store volatile i32 %arg13, i32 addrspace(1)* undef 615 store volatile i32 %arg14, i32 addrspace(1)* undef 616 store volatile i32 %arg15, i32 addrspace(1)* undef 617 618 store volatile i32 %arg16, i32 addrspace(1)* undef 619 store volatile i32 %arg17, i32 addrspace(1)* undef 620 store volatile i32 %arg18, i32 addrspace(1)* undef 621 store volatile i32 %arg19, i32 addrspace(1)* undef 622 store volatile i32 %arg20, i32 addrspace(1)* undef 623 store volatile i32 %arg21, i32 addrspace(1)* undef 624 store volatile i32 %arg22, i32 addrspace(1)* undef 625 store volatile i32 %arg23, i32 addrspace(1)* undef 626 627 store volatile i32 %arg24, i32 addrspace(1)* undef 628 store volatile i32 %arg25, i32 addrspace(1)* undef 629 store volatile i32 %arg26, i32 addrspace(1)* undef 630 store volatile i32 %arg27, i32 addrspace(1)* undef 631 store volatile i32 %arg28, i32 addrspace(1)* undef 632 store volatile i32 %arg29, i32 addrspace(1)* undef 633 store volatile i32 %arg30, i32 addrspace(1)* undef 634 635 ret void 636} 637 638; frame[0] = kernel emergency stack slot 639; frame[1] = callee emergency stack slot 640; frame[2] = ID Y 641; frame[3] = ID Z 642 643; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz: 644; GCN: enable_vgpr_workitem_id = 2 645 646; GCN: s_mov_b32 s33, s7 647; GCN: s_mov_b32 s32, s33 648 649; GCN-DAG: v_mov_b32_e32 v31, v0 650; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:8 651; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:12 652; GCN: s_swappc_b64 653define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 { 654 call void @too_many_args_use_workitem_id_x_stack_yz( 655 i32 10, i32 20, i32 30, i32 40, 656 i32 50, i32 60, i32 70, i32 80, 657 i32 90, i32 100, i32 110, i32 120, 658 i32 130, i32 140, i32 150, i32 160, 659 i32 170, i32 180, i32 190, i32 200, 660 i32 210, i32 220, i32 230, i32 240, 661 i32 250, i32 260, i32 270, i32 280, 662 i32 290, i32 300, i32 310) 663 ret void 664} 665 666declare i32 @llvm.amdgcn.workitem.id.x() #0 667declare i32 @llvm.amdgcn.workitem.id.y() #0 668declare i32 @llvm.amdgcn.workitem.id.z() #0 669 670attributes #0 = { nounwind readnone speculatable } 671attributes #1 = { nounwind noinline } 672