1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; Legacy intrinsics that just read implicit parameters
6
7; FUNC-LABEL: {{^}}ngroups_x:
8; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
9; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
10; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
11; GCN-NOHSA: buffer_store_dword [[VVAL]]
12
13; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
14; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
15define amdgpu_kernel void @ngroups_x (i32 addrspace(1)* %out) {
16entry:
17  %0 = call i32 @llvm.r600.read.ngroups.x() #0
18  store i32 %0, i32 addrspace(1)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}ngroups_y:
23; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
24; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
25; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
26; GCN-NOHSA: buffer_store_dword [[VVAL]]
27
28; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
29; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
30define amdgpu_kernel void @ngroups_y (i32 addrspace(1)* %out) {
31entry:
32  %0 = call i32 @llvm.r600.read.ngroups.y() #0
33  store i32 %0, i32 addrspace(1)* %out
34  ret void
35}
36
37; FUNC-LABEL: {{^}}ngroups_z:
38; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
39; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
40; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
41; GCN-NOHSA: buffer_store_dword [[VVAL]]
42
43; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
44; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
45define amdgpu_kernel void @ngroups_z (i32 addrspace(1)* %out) {
46entry:
47  %0 = call i32 @llvm.r600.read.ngroups.z() #0
48  store i32 %0, i32 addrspace(1)* %out
49  ret void
50}
51
52; FUNC-LABEL: {{^}}global_size_x:
53; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
54; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
55; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
56; GCN-NOHSA: buffer_store_dword [[VVAL]]
57
58; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
59; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
60define amdgpu_kernel void @global_size_x (i32 addrspace(1)* %out) {
61entry:
62  %0 = call i32 @llvm.r600.read.global.size.x() #0
63  store i32 %0, i32 addrspace(1)* %out
64  ret void
65}
66
67; FUNC-LABEL: {{^}}global_size_y:
68; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
69; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
70; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
71; GCN-NOHSA: buffer_store_dword [[VVAL]]
72
73; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
74; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
75define amdgpu_kernel void @global_size_y (i32 addrspace(1)* %out) {
76entry:
77  %0 = call i32 @llvm.r600.read.global.size.y() #0
78  store i32 %0, i32 addrspace(1)* %out
79  ret void
80}
81
82; FUNC-LABEL: {{^}}global_size_z:
83; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
84; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
85; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
86; GCN-NOHSA: buffer_store_dword [[VVAL]]
87
88; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
89; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
90define amdgpu_kernel void @global_size_z (i32 addrspace(1)* %out) {
91entry:
92  %0 = call i32 @llvm.r600.read.global.size.z() #0
93  store i32 %0, i32 addrspace(1)* %out
94  ret void
95}
96
97; FUNC-LABEL: {{^}}local_size_x:
98; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
99; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
100; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
101; GCN-NOHSA: buffer_store_dword [[VVAL]]
102
103; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
104; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
105define amdgpu_kernel void @local_size_x (i32 addrspace(1)* %out) {
106entry:
107  %0 = call i32 @llvm.r600.read.local.size.x() #0
108  store i32 %0, i32 addrspace(1)* %out
109  ret void
110}
111
112; FUNC-LABEL: {{^}}local_size_y:
113; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
114; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
115; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
116; GCN-NOHSA: buffer_store_dword [[VVAL]]
117
118; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
119; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
120define amdgpu_kernel void @local_size_y (i32 addrspace(1)* %out) {
121entry:
122  %0 = call i32 @llvm.r600.read.local.size.y() #0
123  store i32 %0, i32 addrspace(1)* %out
124  ret void
125}
126
127; FUNC-LABEL: {{^}}local_size_z:
128; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
129; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
130; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
131; GCN-NOHSA: buffer_store_dword [[VVAL]]
132
133; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
134; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
135define amdgpu_kernel void @local_size_z (i32 addrspace(1)* %out) {
136entry:
137  %0 = call i32 @llvm.r600.read.local.size.z() #0
138  store i32 %0, i32 addrspace(1)* %out
139  ret void
140}
141
142; Legacy use of r600 intrinsics by GCN
143
144; The tgid values are stored in sgprs offset by the number of user
145; sgprs.
146
147; FUNC-LABEL: {{^}}tgid_x_legacy:
148; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
149; GCN-NOHSA: buffer_store_dword [[VVAL]]
150
151; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
152; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
153; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
154; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
155; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
156define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) {
157entry:
158  %0 = call i32 @llvm.r600.read.tgid.x() #0
159  store i32 %0, i32 addrspace(1)* %out
160  ret void
161}
162
163; FUNC-LABEL: {{^}}tgid_y_legacy:
164; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
165; GCN-NOHSA: buffer_store_dword [[VVAL]]
166
167; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
168define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) {
169entry:
170  %0 = call i32 @llvm.r600.read.tgid.y() #0
171  store i32 %0, i32 addrspace(1)* %out
172  ret void
173}
174
175; FUNC-LABEL: {{^}}tgid_z_legacy:
176; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
177; GCN-NOHSA: buffer_store_dword [[VVAL]]
178
179; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
180; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
181; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
182; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
183; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
184define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) {
185entry:
186  %0 = call i32 @llvm.r600.read.tgid.z() #0
187  store i32 %0, i32 addrspace(1)* %out
188  ret void
189}
190
191; GCN-NOHSA: .section .AMDGPU.config
192; GCN-NOHSA: .long 47180
193; GCN-NOHSA-NEXT: .long 132{{$}}
194
195; FUNC-LABEL: {{^}}tidig_x_legacy:
196; GCN-NOHSA: buffer_store_dword v0
197define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) {
198entry:
199  %0 = call i32 @llvm.r600.read.tidig.x() #0
200  store i32 %0, i32 addrspace(1)* %out
201  ret void
202}
203
204; GCN-NOHSA: .section .AMDGPU.config
205; GCN-NOHSA: .long 47180
206; GCN-NOHSA-NEXT: .long 2180{{$}}
207
208; FUNC-LABEL: {{^}}tidig_y_legacy:
209
210; GCN-NOHSA: buffer_store_dword v1
211define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) {
212entry:
213  %0 = call i32 @llvm.r600.read.tidig.y() #0
214  store i32 %0, i32 addrspace(1)* %out
215  ret void
216}
217
218; GCN-NOHSA: .section .AMDGPU.config
219; GCN-NOHSA: .long 47180
220; GCN-NOHSA-NEXT: .long 4228{{$}}
221
222; FUNC-LABEL: {{^}}tidig_z_legacy:
223; GCN-NOHSA: buffer_store_dword v2
224define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) {
225entry:
226  %0 = call i32 @llvm.r600.read.tidig.z() #0
227  store i32 %0, i32 addrspace(1)* %out
228  ret void
229}
230
231declare i32 @llvm.r600.read.ngroups.x() #0
232declare i32 @llvm.r600.read.ngroups.y() #0
233declare i32 @llvm.r600.read.ngroups.z() #0
234
235declare i32 @llvm.r600.read.global.size.x() #0
236declare i32 @llvm.r600.read.global.size.y() #0
237declare i32 @llvm.r600.read.global.size.z() #0
238
239declare i32 @llvm.r600.read.local.size.x() #0
240declare i32 @llvm.r600.read.local.size.y() #0
241declare i32 @llvm.r600.read.local.size.z() #0
242
243declare i32 @llvm.r600.read.tgid.x() #0
244declare i32 @llvm.r600.read.tgid.y() #0
245declare i32 @llvm.r600.read.tgid.z() #0
246
247declare i32 @llvm.r600.read.tidig.x() #0
248declare i32 @llvm.r600.read.tidig.y() #0
249declare i32 @llvm.r600.read.tidig.z() #0
250
251attributes #0 = { readnone }
252