1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; Legacy intrinsics that just read implicit parameters
6
7; FUNC-LABEL: {{^}}workdim_legacy:
8; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
9; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
10; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
11; GCN-NOHSA: buffer_store_dword [[VVAL]]
12
13; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
14; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z
15define void @workdim_legacy (i32 addrspace(1)* %out) {
16entry:
17  %0 = call i32 @llvm.AMDGPU.read.workdim() #0
18  store i32 %0, i32 addrspace(1)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}ngroups_x:
23; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
24; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
25; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
26; GCN-NOHSA: buffer_store_dword [[VVAL]]
27
28; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
29; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
30define void @ngroups_x (i32 addrspace(1)* %out) {
31entry:
32  %0 = call i32 @llvm.r600.read.ngroups.x() #0
33  store i32 %0, i32 addrspace(1)* %out
34  ret void
35}
36
37; FUNC-LABEL: {{^}}ngroups_y:
38; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
39; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
40; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
41; GCN-NOHSA: buffer_store_dword [[VVAL]]
42
43; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
44; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
45define void @ngroups_y (i32 addrspace(1)* %out) {
46entry:
47  %0 = call i32 @llvm.r600.read.ngroups.y() #0
48  store i32 %0, i32 addrspace(1)* %out
49  ret void
50}
51
52; FUNC-LABEL: {{^}}ngroups_z:
53; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
54; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
55; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
56; GCN-NOHSA: buffer_store_dword [[VVAL]]
57
58; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
59; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
60define void @ngroups_z (i32 addrspace(1)* %out) {
61entry:
62  %0 = call i32 @llvm.r600.read.ngroups.z() #0
63  store i32 %0, i32 addrspace(1)* %out
64  ret void
65}
66
67; FUNC-LABEL: {{^}}global_size_x:
68; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
69; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
70; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
71; GCN-NOHSA: buffer_store_dword [[VVAL]]
72
73; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
74; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
75define void @global_size_x (i32 addrspace(1)* %out) {
76entry:
77  %0 = call i32 @llvm.r600.read.global.size.x() #0
78  store i32 %0, i32 addrspace(1)* %out
79  ret void
80}
81
82; FUNC-LABEL: {{^}}global_size_y:
83; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
84; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
85; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
86; GCN-NOHSA: buffer_store_dword [[VVAL]]
87
88; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
89; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
90define void @global_size_y (i32 addrspace(1)* %out) {
91entry:
92  %0 = call i32 @llvm.r600.read.global.size.y() #0
93  store i32 %0, i32 addrspace(1)* %out
94  ret void
95}
96
97; FUNC-LABEL: {{^}}global_size_z:
98; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
99; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
100; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
101; GCN-NOHSA: buffer_store_dword [[VVAL]]
102
103; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
104; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
105define void @global_size_z (i32 addrspace(1)* %out) {
106entry:
107  %0 = call i32 @llvm.r600.read.global.size.z() #0
108  store i32 %0, i32 addrspace(1)* %out
109  ret void
110}
111
112; FUNC-LABEL: {{^}}local_size_x:
113; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
114; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
115; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
116; GCN-NOHSA: buffer_store_dword [[VVAL]]
117
118; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
119; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
120define void @local_size_x (i32 addrspace(1)* %out) {
121entry:
122  %0 = call i32 @llvm.r600.read.local.size.x() #0
123  store i32 %0, i32 addrspace(1)* %out
124  ret void
125}
126
127; FUNC-LABEL: {{^}}local_size_y:
128; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
129; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
130; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
131; GCN-NOHSA: buffer_store_dword [[VVAL]]
132
133; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
134; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
135define void @local_size_y (i32 addrspace(1)* %out) {
136entry:
137  %0 = call i32 @llvm.r600.read.local.size.y() #0
138  store i32 %0, i32 addrspace(1)* %out
139  ret void
140}
141
142; FUNC-LABEL: {{^}}local_size_z:
143; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
144; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
145; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
146; GCN-NOHSA: buffer_store_dword [[VVAL]]
147
148; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
149; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
150define void @local_size_z (i32 addrspace(1)* %out) {
151entry:
152  %0 = call i32 @llvm.r600.read.local.size.z() #0
153  store i32 %0, i32 addrspace(1)* %out
154  ret void
155}
156
157; Legacy use of r600 intrinsics by GCN
158
159; The tgid values are stored in sgprs offset by the number of user
160; sgprs.
161
162; FUNC-LABEL: {{^}}tgid_x_legacy:
163; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
164; GCN-NOHSA: buffer_store_dword [[VVAL]]
165
166; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
167; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
168; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
169; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
170; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
171define void @tgid_x_legacy(i32 addrspace(1)* %out) {
172entry:
173  %0 = call i32 @llvm.r600.read.tgid.x() #0
174  store i32 %0, i32 addrspace(1)* %out
175  ret void
176}
177
178; FUNC-LABEL: {{^}}tgid_y_legacy:
179; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
180; GCN-NOHSA: buffer_store_dword [[VVAL]]
181
182; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
183define void @tgid_y_legacy(i32 addrspace(1)* %out) {
184entry:
185  %0 = call i32 @llvm.r600.read.tgid.y() #0
186  store i32 %0, i32 addrspace(1)* %out
187  ret void
188}
189
190; FUNC-LABEL: {{^}}tgid_z_legacy:
191; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
192; GCN-NOHSA: buffer_store_dword [[VVAL]]
193
194; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
195; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
196; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
197; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
198; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
199define void @tgid_z_legacy(i32 addrspace(1)* %out) {
200entry:
201  %0 = call i32 @llvm.r600.read.tgid.z() #0
202  store i32 %0, i32 addrspace(1)* %out
203  ret void
204}
205
206; GCN-NOHSA: .section .AMDGPU.config
207; GCN-NOHSA: .long 47180
208; GCN-NOHSA-NEXT: .long 132{{$}}
209
210; FUNC-LABEL: {{^}}tidig_x_legacy:
211; GCN-NOHSA: buffer_store_dword v0
212define void @tidig_x_legacy(i32 addrspace(1)* %out) {
213entry:
214  %0 = call i32 @llvm.r600.read.tidig.x() #0
215  store i32 %0, i32 addrspace(1)* %out
216  ret void
217}
218
219; GCN-NOHSA: .section .AMDGPU.config
220; GCN-NOHSA: .long 47180
221; GCN-NOHSA-NEXT: .long 2180{{$}}
222
223; FUNC-LABEL: {{^}}tidig_y_legacy:
224
225; GCN-NOHSA: buffer_store_dword v1
226define void @tidig_y_legacy(i32 addrspace(1)* %out) {
227entry:
228  %0 = call i32 @llvm.r600.read.tidig.y() #0
229  store i32 %0, i32 addrspace(1)* %out
230  ret void
231}
232
233; GCN-NOHSA: .section .AMDGPU.config
234; GCN-NOHSA: .long 47180
235; GCN-NOHSA-NEXT: .long 4228{{$}}
236
237; FUNC-LABEL: {{^}}tidig_z_legacy:
238; GCN-NOHSA: buffer_store_dword v2
239define void @tidig_z_legacy(i32 addrspace(1)* %out) {
240entry:
241  %0 = call i32 @llvm.r600.read.tidig.z() #0
242  store i32 %0, i32 addrspace(1)* %out
243  ret void
244}
245
246declare i32 @llvm.r600.read.ngroups.x() #0
247declare i32 @llvm.r600.read.ngroups.y() #0
248declare i32 @llvm.r600.read.ngroups.z() #0
249
250declare i32 @llvm.r600.read.global.size.x() #0
251declare i32 @llvm.r600.read.global.size.y() #0
252declare i32 @llvm.r600.read.global.size.z() #0
253
254declare i32 @llvm.r600.read.local.size.x() #0
255declare i32 @llvm.r600.read.local.size.y() #0
256declare i32 @llvm.r600.read.local.size.z() #0
257
258declare i32 @llvm.r600.read.tgid.x() #0
259declare i32 @llvm.r600.read.tgid.y() #0
260declare i32 @llvm.r600.read.tgid.z() #0
261
262declare i32 @llvm.r600.read.tidig.x() #0
263declare i32 @llvm.r600.read.tidig.y() #0
264declare i32 @llvm.r600.read.tidig.z() #0
265
266declare i32 @llvm.AMDGPU.read.workdim() #0
267
268attributes #0 = { readnone }
269