1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,SIVI,MUBUF %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,SIVI,MUBUF %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX9_10,MUBUF,GFX9-MUBUF,GFX9_10-MUBUF %s
4; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -filetype=obj -amdgpu-use-divergent-register-indexing < %s | llvm-readobj -r - | FileCheck --check-prefix=RELS %s
5; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10_W32,GFX9_10,MUBUF,GFX10_W32-MUBUF,GFX9_10-MUBUF %s
6; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global,+wavefrontsize64 -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10_W64,GFX9_10,MUBUF,GFX10_W64-MUBUF,GFX9_10-MUBUF %s
7; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX9_10,FLATSCR,GFX9-FLATSCR %s
8; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx1030 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10_W32,GFX9_10,FLATSCR,GFX10-FLATSCR,GFX9_10-FLATSCR %s
9; RUN: llc -march=amdgcn -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX9_10,FLATSCR,GFX9-FLATSCR-PAL %s
10; RUN: llc -march=amdgcn -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -amdgpu-enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10_W32,GFX9_10,FLATSCR,GFX10-FLATSCR-PAL,GFX9_10-FLATSCR %s
11
12; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0 0x0
13; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1 0x0
14
15; This used to fail due to a v_add_i32 instruction with an illegal immediate
16; operand that was created during Local Stack Slot Allocation. Test case derived
17; from https://bugs.freedesktop.org/show_bug.cgi?id=96602
18;
19; GCN-LABEL: {{^}}ps_main:
20
21; GFX9-FLATSCR-DAG: s_add_u32 flat_scratch_lo, s0, s2
22; GFX9-FLATSCR-DAG: s_addc_u32 flat_scratch_hi, s1, 0
23; GFX9-FLATSCR-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
24
25; GFX10-FLATSCR: s_add_u32 s0, s0, s2
26; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
27; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
28; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
29
30; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
31; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
32; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
33; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
34; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
35; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
36; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
37; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
38; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
39; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
40; GFX9-FLATSCR-PAL-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
41
42; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
43; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
44; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0
45; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
46; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
47; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
48; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
49; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
50; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
51
52; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
53; MUBUF-DAG: s_mov_b32 s1, SCRATCH_RSRC_DWORD1
54; MUBUF-DAG: s_mov_b32 s2, -1
55; SI-DAG: s_mov_b32 s3, 0xe8f000
56; VI-DAG: s_mov_b32 s3, 0xe80000
57; GFX9-MUBUF-DAG: s_mov_b32 s3, 0xe00000
58; GFX10_W32-MUBUF-DAG: s_mov_b32 s3, 0x31c16000
59; GFX10_W64-MUBUF-DAG: s_mov_b32 s3, 0x31e16000
60
61; FLATSCR-NOT: SCRATCH_RSRC_DWORD
62
63; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
64; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
65
66; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:
67
68; MUBUF-DAG:     v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
69; MUBUF-DAG:     v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
70; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
71; GFX10-FLATSCR-PAL: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
72; GCN-NOT: s_mov_b32 s0
73
74; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x280, [[CLAMP_IDX]]
75; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[LO_OFF:v[0-9]+]],{{.*}} {{v2|0x80}}, [[CLAMP_IDX]]
76
77; MUBUF: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
78; MUBUF: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
79; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[LO_OFF]], off
80define amdgpu_ps float @ps_main(i32 %idx) {
81  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
82  %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
83  %r = fadd float %v1, %v2
84  ret float %r
85}
86
87; GCN-LABEL: {{^}}vs_main:
88; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
89; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
90
91; GFX10-FLATSCR: s_add_u32 s0, s0, s2
92; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
93; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
94; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
95
96; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
97; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
98; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0
99; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
100; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
101; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
102; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
103; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
104; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
105; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
106
107; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
108; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
109; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0
110; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
111; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
112; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
113; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
114; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
115; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
116
117; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
118; GCN-NOT: s_mov_b32 s0
119
120; FLATSCR-NOT: SCRATCH_RSRC_DWORD
121
122; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
123; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
124
125; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
126; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
127
128; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
129; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
130
131define amdgpu_vs float @vs_main(i32 %idx) {
132  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
133  %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
134  %r = fadd float %v1, %v2
135  ret float %r
136}
137
138; GCN-LABEL: {{^}}cs_main:
139; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
140; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
141
142; GFX10-FLATSCR: s_add_u32 s0, s0, s2
143; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
144; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
145; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
146
147; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3]
148; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0
149; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10
150; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
151; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
152; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
153; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
154; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff
155; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0
156; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0
157
158; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3]
159; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0
160; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x10
161; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
162; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff
163; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0
164; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0
165; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
166; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
167
168; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
169
170; FLATSCR-NOT: SCRATCH_RSRC_DWORD
171
172; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
173; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
174
175; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
176; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
177define amdgpu_cs float @cs_main(i32 %idx) {
178  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
179  %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
180  %r = fadd float %v1, %v2
181  ret float %r
182}
183
184; GCN-LABEL: {{^}}hs_main:
185; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
186; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
187
188; GFX10-FLATSCR: s_add_u32 s0, s0, s5
189; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
190; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
191; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
192
193; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
194; SIVI-NOT: s_mov_b32 s0
195; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
196; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
197
198; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
199; GFX9_10-NOT:   s_mov_b32 s5
200; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
201; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
202
203; FLATSCR-NOT: SCRATCH_RSRC_DWORD
204; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
205; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
206define amdgpu_hs float @hs_main(i32 %idx) {
207  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
208  %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
209  %r = fadd float %v1, %v2
210  ret float %r
211}
212
213; GCN-LABEL: {{^}}gs_main:
214; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
215; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
216
217; GFX10-FLATSCR: s_add_u32 s0, s0, s5
218; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
219; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
220; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
221
222; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
223; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
224; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
225; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
226; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
227; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
228; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
229; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
230; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
231; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
232
233; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
234; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
235; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
236; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
237; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
238; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
239; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
240; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
241; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
242
243; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
244; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
245; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
246
247; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0
248; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
249; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
250
251; FLATSCR-NOT: SCRATCH_RSRC_DWORD
252; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
253; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
254define amdgpu_gs float @gs_main(i32 %idx) {
255  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
256  %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
257  %r = fadd float %v1, %v2
258  ret float %r
259}
260
261; Mesa GS and HS shaders have the preloaded scratch wave offset SGPR fixed at
262; SGPR5, and the inreg implementation is used to reference it in the IR. The
263; following tests confirm the shader and anything inserted after the return
264; (i.e. SI_RETURN_TO_EPILOG) can access the scratch wave offset.
265
266; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset:
267; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
268; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
269
270; GFX10-FLATSCR: s_add_u32 s0, s0, s5
271; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
272; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
273; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
274
275; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
276; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
277; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
278; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
279; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
280; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
281; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
282; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
283; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
284; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
285
286; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
287; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
288; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
289; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
290; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
291; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
292; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
293; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
294; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
295
296; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
297; FLATSCR-NOT: SCRATCH_RSRC_DWORD
298
299; SIVI-NOT: s_mov_b32 s6
300; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
301; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
302
303; GFX9_10-NOT: s_mov_b32 s5
304; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
305; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
306
307; MUBUF-DAG: s_mov_b32 s2, s5
308
309; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
310; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
311define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
312  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
313  %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
314  %f = fadd float %v1, %v2
315  %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2
316  %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3
317  ret <{i32, i32, i32, float}> %r2
318}
319
320; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset:
321; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5
322; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
323
324; GFX10-FLATSCR: s_add_u32 s0, s0, s5
325; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
326; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
327; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
328
329; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1]
330; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8
331; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0
332; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0
333; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4
334; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0
335; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0)
336; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff
337; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5
338; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0
339
340; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1]
341; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8
342; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0
343; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0)
344; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff
345; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5
346; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0
347; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
348; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
349
350; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
351; FLATSCR-NOT: SCRATCH_RSRC_DWORD
352
353; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
354; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
355
356; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
357; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
358
359; MUBUF-DAG: s_mov_b32 s2, s5
360
361; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
362; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
363define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) {
364  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
365  %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
366  %f = fadd float %v1, %v2
367  %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2
368  %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3
369  ret <{i32, i32, i32, float}> %r2
370}
371