1; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
2
3declare i32 @llvm.amdgcn.workgroup.id.x() #0
4declare i32 @llvm.amdgcn.workgroup.id.y() #0
5declare i32 @llvm.amdgcn.workgroup.id.z() #0
6
7declare i32 @llvm.amdgcn.workitem.id.x() #0
8declare i32 @llvm.amdgcn.workitem.id.y() #0
9declare i32 @llvm.amdgcn.workitem.id.z() #0
10
11declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
12declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
13declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
14
15; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
16define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
17  %val = call i32 @llvm.amdgcn.workgroup.id.x()
18  store i32 %val, i32 addrspace(1)* %ptr
19  ret void
20}
21
22; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
23define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
24  %val = call i32 @llvm.amdgcn.workgroup.id.y()
25  store i32 %val, i32 addrspace(1)* %ptr
26  ret void
27}
28
29; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
30define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
31  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
32  store volatile i32 %val0, i32 addrspace(1)* %ptr
33  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
34  store volatile i32 %val1, i32 addrspace(1)* %ptr
35  ret void
36}
37
38; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
39define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
40  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
41  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
42  store volatile i32 %val0, i32 addrspace(1)* %ptr
43  store volatile i32 %val1, i32 addrspace(1)* %ptr
44  ret void
45}
46
47; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
48define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
49  %val = call i32 @llvm.amdgcn.workgroup.id.z()
50  store i32 %val, i32 addrspace(1)* %ptr
51  ret void
52}
53
54; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
55define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
56  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
57  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
58  store volatile i32 %val0, i32 addrspace(1)* %ptr
59  store volatile i32 %val1, i32 addrspace(1)* %ptr
60  ret void
61}
62
63; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
64define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
65  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
66  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
67  store volatile i32 %val0, i32 addrspace(1)* %ptr
68  store volatile i32 %val1, i32 addrspace(1)* %ptr
69  ret void
70}
71
72; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
73define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
74  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
75  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
76  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
77  store volatile i32 %val0, i32 addrspace(1)* %ptr
78  store volatile i32 %val1, i32 addrspace(1)* %ptr
79  store volatile i32 %val2, i32 addrspace(1)* %ptr
80  ret void
81}
82
83; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
84define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
85  %val = call i32 @llvm.amdgcn.workitem.id.x()
86  store i32 %val, i32 addrspace(1)* %ptr
87  ret void
88}
89
90; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
91define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
92  %val = call i32 @llvm.amdgcn.workitem.id.y()
93  store i32 %val, i32 addrspace(1)* %ptr
94  ret void
95}
96
97; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
98define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
99  %val = call i32 @llvm.amdgcn.workitem.id.z()
100  store i32 %val, i32 addrspace(1)* %ptr
101  ret void
102}
103
104; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
105define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
106  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
107  %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
108  store volatile i32 %val0, i32 addrspace(1)* %ptr
109  store volatile i32 %val1, i32 addrspace(1)* %ptr
110  ret void
111}
112
113; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
114define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
115  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
116  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
117  store volatile i32 %val0, i32 addrspace(1)* %ptr
118  store volatile i32 %val1, i32 addrspace(1)* %ptr
119  ret void
120}
121
122; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
123define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
124  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
125  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
126  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
127  store volatile i32 %val0, i32 addrspace(1)* %ptr
128  store volatile i32 %val1, i32 addrspace(1)* %ptr
129  store volatile i32 %val2, i32 addrspace(1)* %ptr
130  ret void
131}
132
133; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
134define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
135  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
136  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
137  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
138  %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
139  %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
140  %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
141  store volatile i32 %val0, i32 addrspace(1)* %ptr
142  store volatile i32 %val1, i32 addrspace(1)* %ptr
143  store volatile i32 %val2, i32 addrspace(1)* %ptr
144  store volatile i32 %val3, i32 addrspace(1)* %ptr
145  store volatile i32 %val4, i32 addrspace(1)* %ptr
146  store volatile i32 %val5, i32 addrspace(1)* %ptr
147  ret void
148}
149
150; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
151define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
152  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
153  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
154  %val = load i32, i32 addrspace(4)* %bc
155  store i32 %val, i32 addrspace(1)* %ptr
156  ret void
157}
158
159; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
160define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
161  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
162  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
163  %val = load i32, i32 addrspace(4)* %bc
164  store i32 %val, i32 addrspace(1)* %ptr
165  ret void
166}
167
168; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
169define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
170  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
171  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
172  %val = load i32, i32 addrspace(4)* %bc
173  store i32 %val, i32 addrspace(1)* %ptr
174  ret void
175}
176
177; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
178define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
179  %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
180  store volatile i32 0, i32* %stof
181  ret void
182}
183
184; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 {
185define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
186  %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
187  store volatile i32 0, i32* %stof
188  ret void
189}
190
191; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
192define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
193  %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
194  store volatile i32 0, i32 addrspace(3)* %ftos
195  ret void
196}
197
198; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
199define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
200  %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
201  store volatile i32 0, i32 addrspace(5)* %ftos
202  ret void
203}
204
205; No-op addrspacecast should not use queue ptr
206; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
207define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
208  %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
209  store volatile i32 0, i32* %stof
210  ret void
211}
212
213; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
214define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
215  %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
216  %ld = load volatile i32, i32* %stof
217  ret void
218}
219
220; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
221define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
222  %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
223  store volatile i32 0, i32 addrspace(1)* %ftos
224  ret void
225}
226
227; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
228define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
229  %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
230  %ld = load volatile i32, i32 addrspace(4)* %ftos
231  ret void
232}
233
234attributes #0 = { nounwind readnone speculatable }
235attributes #1 = { nounwind }
236
237; HSA: attributes #0 = { nounwind readnone speculatable }
238; HSA: attributes #1 = { nounwind }
239; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
240; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
241; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
242; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
243; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
244; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
245; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
246; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
247; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
248; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
249; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }
250