; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx803 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s ; This used to fail due to a v_add_i32 instruction with an illegal immediate ; operand that was created during Local Stack Slot Allocation. Test case derived ; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 ; ; GCN-LABEL: {{^}}ps_main: ; GCN-DAG: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; GCN-NOT: s_mov_b32 s0 ; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 ; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] ; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]] ; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]] ; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, s0 offen ; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, s0 offen define amdgpu_ps float @ps_main(i32 %idx) { %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } ; GCN-LABEL: {{^}}vs_main: ; GCN-DAG: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; GCN-NOT: s_mov_b32 s0 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen define amdgpu_vs float @vs_main(i32 %idx) { %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } ; GCN-LABEL: {{^}}cs_main: ; GCN-DAG: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen define amdgpu_cs float @cs_main(i32 %idx) { %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } ; GCN-LABEL: {{^}}hs_main: ; SI: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; SI-NOT: s_mov_b32 s0 ; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen ; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen ; GFX9: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 ; GFX9-NOT: s_mov_b32 s5 ; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen ; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen define amdgpu_hs float @hs_main(i32 %idx) { %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } ; GCN-LABEL: {{^}}gs_main: ; SI: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen ; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s0 offen ; GFX9: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 ; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen ; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen define amdgpu_gs float @gs_main(i32 %idx) { %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } ; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset: ; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; SI-NOT: s_mov_b32 s6 ; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s6 offen ; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s6 offen ; GFX9-NOT: s_mov_b32 s5 ; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen ; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen ; GCN: s_mov_b32 s2, s5 define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %f = fadd float %v1, %v2 %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 ret <{i32, i32, i32, float}> %r2 } ; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset: ; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s6 offen ; SI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s6 offen ; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen ; GFX9: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s5 offen ; GCN: s_mov_b32 s2, s5 define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %f = fadd float %v1, %v2 %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 ret <{i32, i32, i32, float}> %r2 }