1; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s 4 5; Make sure to run a GPU with the SGPR allocation bug. 6 7; GCN-LABEL: {{^}}use_vcc: 8; GCN: ; NumSgprs: 34 9; GCN: ; NumVgprs: 0 10define void @use_vcc() #1 { 11 call void asm sideeffect "", "~{vcc}" () #0 12 ret void 13} 14 15; GCN-LABEL: {{^}}indirect_use_vcc: 16; GCN: v_writelane_b32 v40, s33, 2 17; GCN: v_writelane_b32 v40, s30, 0 18; GCN: v_writelane_b32 v40, s31, 1 19; GCN: s_swappc_b64 20; GCN: v_readlane_b32 s4, v40, 0 21; GCN: v_readlane_b32 s5, v40, 1 22; GCN: v_readlane_b32 s33, v40, 2 23; GCN: ; NumSgprs: 36 24; GCN: ; NumVgprs: 41 25define void @indirect_use_vcc() #1 { 26 call void @use_vcc() 27 ret void 28} 29 30; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel: 31; GCN: is_dynamic_callstack = 0 32; CI: ; NumSgprs: 38 33; VI-NOBUG: ; NumSgprs: 40 34; VI-BUG: ; NumSgprs: 96 35; GCN: ; NumVgprs: 41 36define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 { 37 call void @indirect_use_vcc() 38 ret void 39} 40 41; GCN-LABEL: {{^}}use_flat_scratch: 42; CI: ; NumSgprs: 36 43; VI: ; NumSgprs: 38 44; GCN: ; NumVgprs: 0 45define void @use_flat_scratch() #1 { 46 call void asm sideeffect "", "~{flat_scratch}" () #0 47 ret void 48} 49 50; GCN-LABEL: {{^}}indirect_use_flat_scratch: 51; CI: ; NumSgprs: 38 52; VI: ; NumSgprs: 40 53; GCN: ; NumVgprs: 41 54define void @indirect_use_flat_scratch() #1 { 55 call void @use_flat_scratch() 56 ret void 57} 58 59; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel: 60; GCN: is_dynamic_callstack = 0 61; CI: ; NumSgprs: 38 62; VI-NOBUG: ; NumSgprs: 40 63; VI-BUG: ; NumSgprs: 96 64; GCN: ; NumVgprs: 41 65define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 { 66 call void @indirect_use_flat_scratch() 67 ret void 68} 69 70; GCN-LABEL: {{^}}use_10_vgpr: 71; GCN: ; NumVgprs: 10 72define void @use_10_vgpr() #1 { 73 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0 74 call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0 75 ret void 76} 77 78; GCN-LABEL: {{^}}indirect_use_10_vgpr: 79; GCN: ; NumVgprs: 41 80define void @indirect_use_10_vgpr() #0 { 81 call void @use_10_vgpr() 82 ret void 83} 84 85; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr: 86; GCN: is_dynamic_callstack = 0 87; GCN: ; NumVgprs: 41 88define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 { 89 call void @indirect_use_10_vgpr() 90 ret void 91} 92 93; GCN-LABEL: {{^}}use_50_vgpr: 94; GCN: ; NumVgprs: 50 95define void @use_50_vgpr() #1 { 96 call void asm sideeffect "", "~{v49}"() #0 97 ret void 98} 99 100; GCN-LABEL: {{^}}indirect_use_50_vgpr: 101; GCN: ; NumVgprs: 50 102define void @indirect_use_50_vgpr() #0 { 103 call void @use_50_vgpr() 104 ret void 105} 106 107; GCN-LABEL: {{^}}use_80_sgpr: 108; GCN: ; NumSgprs: 80 109define void @use_80_sgpr() #1 { 110 call void asm sideeffect "", "~{s79}"() #0 111 ret void 112} 113 114; GCN-LABEL: {{^}}indirect_use_80_sgpr: 115; GCN: ; NumSgprs: 82 116define void @indirect_use_80_sgpr() #1 { 117 call void @use_80_sgpr() 118 ret void 119} 120 121; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr: 122; GCN: is_dynamic_callstack = 0 123; CI: ; NumSgprs: 84 124; VI-NOBUG: ; NumSgprs: 86 125; VI-BUG: ; NumSgprs: 96 126define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 { 127 call void @indirect_use_80_sgpr() 128 ret void 129} 130 131 132; GCN-LABEL: {{^}}use_stack0: 133; GCN: ScratchSize: 2052 134define void @use_stack0() #1 { 135 %alloca = alloca [512 x i32], align 4, addrspace(5) 136 call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0 137 ret void 138} 139 140; GCN-LABEL: {{^}}use_stack1: 141; GCN: ScratchSize: 404 142define void @use_stack1() #1 { 143 %alloca = alloca [100 x i32], align 4, addrspace(5) 144 call void asm sideeffect "; use $0", "v"([100 x i32] addrspace(5)* %alloca) #0 145 ret void 146} 147 148; GCN-LABEL: {{^}}indirect_use_stack: 149; GCN: ScratchSize: 2132 150define void @indirect_use_stack() #1 { 151 %alloca = alloca [16 x i32], align 4, addrspace(5) 152 call void asm sideeffect "; use $0", "v"([16 x i32] addrspace(5)* %alloca) #0 153 call void @use_stack0() 154 ret void 155} 156 157; GCN-LABEL: {{^}}indirect_2_level_use_stack: 158; GCN: is_dynamic_callstack = 0 159; GCN: ScratchSize: 2132 160define amdgpu_kernel void @indirect_2_level_use_stack() #0 { 161 call void @indirect_use_stack() 162 ret void 163} 164 165 166; Should be maximum of callee usage 167; GCN-LABEL: {{^}}multi_call_use_use_stack: 168; GCN: is_dynamic_callstack = 0 169; GCN: ScratchSize: 2052 170define amdgpu_kernel void @multi_call_use_use_stack() #0 { 171 call void @use_stack0() 172 call void @use_stack1() 173 ret void 174} 175 176 177declare void @external() #0 178 179; GCN-LABEL: {{^}}usage_external: 180; GCN: is_dynamic_callstack = 1 181; NumSgprs: 48 182; NumVgprs: 24 183; GCN: ScratchSize: 16384 184define amdgpu_kernel void @usage_external() #0 { 185 call void @external() 186 ret void 187} 188 189declare void @external_recurse() #2 190 191; GCN-LABEL: {{^}}usage_external_recurse: 192; GCN: is_dynamic_callstack = 1 193; NumSgprs: 48 194; NumVgprs: 24 195; GCN: ScratchSize: 16384 196define amdgpu_kernel void @usage_external_recurse() #0 { 197 call void @external_recurse() 198 ret void 199} 200 201; GCN-LABEL: {{^}}direct_recursion_use_stack: 202; GCN: ScratchSize: 2064 203define void @direct_recursion_use_stack(i32 %val) #2 { 204 %alloca = alloca [512 x i32], align 4, addrspace(5) 205 call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0 206 %cmp = icmp eq i32 %val, 0 207 br i1 %cmp, label %ret, label %call 208 209call: 210 %val.sub1 = sub i32 %val, 1 211 call void @direct_recursion_use_stack(i32 %val.sub1) 212 br label %ret 213 214ret: 215 ret void 216} 217 218; GCN-LABEL: {{^}}usage_direct_recursion: 219; GCN: is_ptr64 = 1 220; GCN: is_dynamic_callstack = 1 221; GCN: workitem_private_segment_byte_size = 2064 222define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { 223 call void @direct_recursion_use_stack(i32 %n) 224 ret void 225} 226 227; Make sure there's no assert when a sgpr96 is used. 228; GCN-LABEL: {{^}}count_use_sgpr96_external_call 229; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}] 230; CI: NumSgprs: 48 231; VI-NOBUG: NumSgprs: 48 232; VI-BUG: NumSgprs: 96 233; GCN: NumVgprs: 24 234define amdgpu_kernel void @count_use_sgpr96_external_call() { 235entry: 236 tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1 237 call void @external() 238 ret void 239} 240 241; Make sure there's no assert when a sgpr160 is used. 242; GCN-LABEL: {{^}}count_use_sgpr160_external_call 243; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}] 244; CI: NumSgprs: 48 245; VI-NOBUG: NumSgprs: 48 246; VI-BUG: NumSgprs: 96 247; GCN: NumVgprs: 24 248define amdgpu_kernel void @count_use_sgpr160_external_call() { 249entry: 250 tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1 251 call void @external() 252 ret void 253} 254 255; Make sure there's no assert when a vgpr160 is used. 256; GCN-LABEL: {{^}}count_use_vgpr160_external_call 257; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}] 258; CI: NumSgprs: 48 259; VI-NOBUG: NumSgprs: 48 260; VI-BUG: NumSgprs: 96 261; GCN: NumVgprs: 24 262define amdgpu_kernel void @count_use_vgpr160_external_call() { 263entry: 264 tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1 265 call void @external() 266 ret void 267} 268 269attributes #0 = { nounwind noinline norecurse } 270attributes #1 = { nounwind noinline norecurse } 271attributes #2 = { nounwind noinline } 272