1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,SIVI,MUBUF %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,SIVI,MUBUF %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX9_10,MUBUF,GFX9-MUBUF,GFX9_10-MUBUF %s 4; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -filetype=obj -amdgpu-use-divergent-register-indexing < %s | llvm-readobj -r - | FileCheck --check-prefix=RELS %s 5; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10_W32,GFX9_10,MUBUF,GFX10_W32-MUBUF,GFX9_10-MUBUF %s 6; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global,+wavefrontsize64 -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10_W64,GFX9_10,MUBUF,GFX10_W64-MUBUF,GFX9_10-MUBUF %s 7; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX9_10,FLATSCR,GFX9-FLATSCR %s 8; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1030 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10_W32,GFX9_10,FLATSCR,GFX10-FLATSCR,GFX9_10-FLATSCR %s 9; RUN: llc -march=amdgcn -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX9_10,FLATSCR,GFX9-FLATSCR-PAL %s 10; RUN: llc -march=amdgcn -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10_W32,GFX9_10,FLATSCR,GFX10-FLATSCR-PAL,GFX9_10-FLATSCR %s 11 12; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0 0x0 13; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1 0x0 14 15; This used to fail due to a v_add_i32 instruction with an illegal immediate 16; operand that was created during Local Stack Slot Allocation. Test case derived 17; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 18; 19; GCN-LABEL: {{^}}ps_main: 20 21; GFX9-FLATSCR-DAG: s_add_u32 flat_scratch_lo, s0, s2 22; GFX9-FLATSCR-DAG: s_addc_u32 flat_scratch_hi, s1, 0 23; GFX9-FLATSCR-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 24 25; GFX10-FLATSCR: s_add_u32 s0, s0, s2 26; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 27; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 28; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 29 30; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] 31; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 32; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 33; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 34; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 35; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 36; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 37; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff 38; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 39; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 40; GFX9-FLATSCR-PAL-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 41 42; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] 43; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 44; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0 45; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 46; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff 47; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 48; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 49; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 50; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 51 52; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 53; MUBUF-DAG: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 54; MUBUF-DAG: s_mov_b32 s2, -1 55; SI-DAG: s_mov_b32 s3, 0xe8f000 56; VI-DAG: s_mov_b32 s3, 0xe80000 57; GFX9-MUBUF-DAG: s_mov_b32 s3, 0xe00000 58; GFX10_W32-MUBUF-DAG: s_mov_b32 s3, 0x31c16000 59; GFX10_W64-MUBUF-DAG: s_mov_b32 s3, 0x31e16000 60 61; FLATSCR-NOT: SCRATCH_RSRC_DWORD 62 63; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0 64; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset: 65 66; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset: 67 68; MUBUF-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 69; MUBUF-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] 70; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 71; GFX10-FLATSCR-PAL: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 72; GCN-NOT: s_mov_b32 s0 73 74; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x280, [[CLAMP_IDX]] 75; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[LO_OFF:v[0-9]+]],{{.*}} {{v2|0x80}}, [[CLAMP_IDX]] 76 77; MUBUF: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen 78; MUBUF: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen 79; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[LO_OFF]], off 80define amdgpu_ps float @ps_main(i32 %idx) { 81 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 82 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 83 %r = fadd float %v1, %v2 84 ret float %r 85} 86 87; GCN-LABEL: {{^}}vs_main: 88; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2 89; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 90 91; GFX10-FLATSCR: s_add_u32 s0, s0, s2 92; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 93; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 94; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 95 96; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] 97; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 98; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 99; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 100; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 101; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 102; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 103; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff 104; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 105; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 106 107; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] 108; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 109; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0 110; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 111; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff 112; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 113; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 114; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 115; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 116 117; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 118; GCN-NOT: s_mov_b32 s0 119 120; FLATSCR-NOT: SCRATCH_RSRC_DWORD 121 122; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 123; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 124 125; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0 126; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset: 127 128; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 129; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 130 131define amdgpu_vs float @vs_main(i32 %idx) { 132 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 133 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 134 %r = fadd float %v1, %v2 135 ret float %r 136} 137 138; GCN-LABEL: {{^}}cs_main: 139; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2 140; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 141 142; GFX10-FLATSCR: s_add_u32 s0, s0, s2 143; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 144; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 145; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 146 147; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] 148; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 149; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10 150; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 151; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 152; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 153; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 154; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff 155; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 156; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 157 158; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] 159; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 160; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x10 161; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 162; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff 163; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 164; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 165; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 166; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 167 168; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 169 170; FLATSCR-NOT: SCRATCH_RSRC_DWORD 171 172; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 173; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 174 175; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 176; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 177define amdgpu_cs float @cs_main(i32 %idx) { 178 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 179 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 180 %r = fadd float %v1, %v2 181 ret float %r 182} 183 184; GCN-LABEL: {{^}}hs_main: 185; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 186; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 187 188; GFX10-FLATSCR: s_add_u32 s0, s0, s5 189; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 190; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 191; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 192 193; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 194; SIVI-NOT: s_mov_b32 s0 195; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 196; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 197 198; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 199; GFX9_10-NOT: s_mov_b32 s5 200; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 201; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 202 203; FLATSCR-NOT: SCRATCH_RSRC_DWORD 204; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 205; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 206define amdgpu_hs float @hs_main(i32 %idx) { 207 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 208 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 209 %r = fadd float %v1, %v2 210 ret float %r 211} 212 213; GCN-LABEL: {{^}}gs_main: 214; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 215; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 216 217; GFX10-FLATSCR: s_add_u32 s0, s0, s5 218; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 219; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 220; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 221 222; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] 223; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 224; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 225; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 226; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 227; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 228; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 229; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff 230; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 231; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 232 233; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] 234; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 235; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 236; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 237; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff 238; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 239; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 240; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 241; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 242 243; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 244; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 245; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 246 247; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 248; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 249; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 250 251; FLATSCR-NOT: SCRATCH_RSRC_DWORD 252; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 253; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 254define amdgpu_gs float @gs_main(i32 %idx) { 255 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 256 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 257 %r = fadd float %v1, %v2 258 ret float %r 259} 260 261; Mesa GS and HS shaders have the preloaded scratch wave offset SGPR fixed at 262; SGPR5, and the inreg implementation is used to reference it in the IR. The 263; following tests confirm the shader and anything inserted after the return 264; (i.e. SI_RETURN_TO_EPILOG) can access the scratch wave offset. 265 266; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset: 267; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 268; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 269 270; GFX10-FLATSCR: s_add_u32 s0, s0, s5 271; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 272; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 273; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 274 275; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] 276; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 277; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 278; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 279; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 280; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 281; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 282; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff 283; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 284; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 285 286; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] 287; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 288; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 289; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 290; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff 291; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 292; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 293; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 294; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 295 296; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 297; FLATSCR-NOT: SCRATCH_RSRC_DWORD 298 299; SIVI-NOT: s_mov_b32 s6 300; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 301; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 302 303; GFX9_10-NOT: s_mov_b32 s5 304; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 305; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 306 307; MUBUF-DAG: s_mov_b32 s2, s5 308 309; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 310; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 311define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { 312 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 313 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 314 %f = fadd float %v1, %v2 315 %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 316 %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 317 ret <{i32, i32, i32, float}> %r2 318} 319 320; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset: 321; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 322; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 323 324; GFX10-FLATSCR: s_add_u32 s0, s0, s5 325; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 326; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 327; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 328 329; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] 330; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 331; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 332; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 333; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 334; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 335; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) 336; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff 337; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 338; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 339 340; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] 341; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 342; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 343; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) 344; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff 345; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 346; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 347; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 348; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 349 350; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 351; FLATSCR-NOT: SCRATCH_RSRC_DWORD 352 353; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 354; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 355 356; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 357; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen 358 359; MUBUF-DAG: s_mov_b32 s2, s5 360 361; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 362; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off 363define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { 364 %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx 365 %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx 366 %f = fadd float %v1, %v2 367 %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 368 %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 369 ret <{i32, i32, i32, float}> %r2 370} 371