1// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s
2
3// Also test serialization of atomic operations here, to avoid duplicating the test.
4// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl
5// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s
6
7#ifndef ALREADY_INCLUDED
8#define ALREADY_INCLUDED
9
10typedef __INTPTR_TYPE__ intptr_t;
11typedef int int8 __attribute__((ext_vector_type(8)));
12
13typedef enum memory_order {
14  memory_order_relaxed = __ATOMIC_RELAXED,
15  memory_order_acquire = __ATOMIC_ACQUIRE,
16  memory_order_release = __ATOMIC_RELEASE,
17  memory_order_acq_rel = __ATOMIC_ACQ_REL,
18  memory_order_seq_cst = __ATOMIC_SEQ_CST
19} memory_order;
20
21typedef enum memory_scope {
22  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
23  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
24  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
25  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
26#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
27  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
28#endif
29} memory_scope;
30
31atomic_int j;
32
33void fi1(atomic_int *i) {
34  // CHECK-LABEL: @fi1
35  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
36  int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
37
38  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
39  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
40
41  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
42  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
43
44  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst
45  x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
46}
47
48void fi2(atomic_int *i) {
49  // CHECK-LABEL: @fi2
50  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
51  __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
52}
53
54void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) {
55  // CHECK-LABEL: @test_addr
56  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
57  __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);
58
59  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
60  __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group);
61
62  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
63  __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group);
64}
65
66void fi3(atomic_int *i, atomic_uint *ui) {
67  // CHECK-LABEL: @fi3
68  // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
69  int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
70
71  // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
72  x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
73
74  // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
75  x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
76
77  // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
78  x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
79
80  // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
81  x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
82}
83
84bool fi4(atomic_int *i) {
85  // CHECK-LABEL: @fi4(
86  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire
87  // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
88  // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
89  // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
90  // CHECK: store i32 [[OLD]]
91  int cmp = 0;
92  return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
93}
94
95void fi5(atomic_int *i, int scope) {
96  // CHECK-LABEL: @fi5
97  // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [
98  // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]]
99  // CHECK-NEXT: i32 2, label %[[opencl_device:.*]]
100  // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]]
101  // CHECK-NEXT: ]
102  // CHECK: [[opencl_workgroup]]:
103  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
104  // CHECK: br label %[[continue:.*]]
105  // CHECK: [[opencl_device]]:
106  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
107  // CHECK: br label %[[continue]]
108  // CHECK: [[opencl_allsvmdevices]]:
109  // CHECK: load atomic i32, i32* %{{.*}} seq_cst
110  // CHECK: br label %[[continue]]
111  // CHECK: [[opencl_subgroup]]:
112  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
113  // CHECK: br label %[[continue]]
114  // CHECK: [[continue]]:
115  int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
116}
117
118void fi6(atomic_int *i, int order, int scope) {
119  // CHECK-LABEL: @fi6
120  // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [
121  // CHECK-NEXT: i32 1, label %[[acquire:.*]]
122  // CHECK-NEXT: i32 2, label %[[acquire:.*]]
123  // CHECK-NEXT: i32 5, label %[[seqcst:.*]]
124  // CHECK-NEXT: ]
125  // CHECK: [[monotonic]]:
126  // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [
127  // CHECK-NEXT: i32 1, label %[[MON_WG:.*]]
128  // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]]
129  // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]]
130  // CHECK-NEXT: ]
131  // CHECK: [[acquire]]:
132  // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [
133  // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]]
134  // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]]
135  // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]]
136  // CHECK-NEXT: ]
137  // CHECK: [[seqcst]]:
138  // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [
139  // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]]
140  // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]]
141  // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
142  // CHECK-NEXT: ]
143  // CHECK: [[MON_WG]]:
144  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic
145  // CHECK: [[MON_DEV]]:
146  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic
147  // CHECK: [[MON_ALL]]:
148  // CHECK: load atomic i32, i32* %{{.*}} monotonic
149  // CHECK: [[MON_SUB]]:
150  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic
151  // CHECK: [[ACQ_WG]]:
152  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire
153  // CHECK: [[ACQ_DEV]]:
154  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire
155  // CHECK: [[ACQ_ALL]]:
156  // CHECK: load atomic i32, i32* %{{.*}} acquire
157  // CHECK: [[ACQ_SUB]]:
158  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire
159  // CHECK: [[SEQ_WG]]:
160  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
161  // CHECK: [[SEQ_DEV]]:
162  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
163  // CHECK: [[SEQ_ALL]]:
164  // CHECK: load atomic i32, i32* %{{.*}} seq_cst
165  // CHECK: [[SEQ_SUB]]:
166  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
167  int x = __opencl_atomic_load(i, order, scope);
168}
169
170float ff1(global atomic_float *d) {
171  // CHECK-LABEL: @ff1
172  // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
173  return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
174}
175
176void ff2(atomic_float *d) {
177  // CHECK-LABEL: @ff2
178  // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release
179  __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
180}
181
182float ff3(atomic_float *d) {
183  // CHECK-LABEL: @ff3
184  // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst
185  return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
186}
187
188// CHECK-LABEL: @atomic_init_foo
189void atomic_init_foo()
190{
191  // CHECK-NOT: atomic
192  // CHECK: store
193  __opencl_atomic_init(&j, 42);
194
195  // CHECK-NOT: atomic
196  // CHECK: }
197}
198
199// CHECK-LABEL: @failureOrder
200void failureOrder(atomic_int *ptr, int *ptr2) {
201  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic
202  __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
203
204  // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
205  __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
206}
207
208// CHECK-LABEL: @generalFailureOrder
209void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
210  __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group);
211  // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
212  // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
213  // CHECK-NEXT: i32 2, label %[[ACQUIRE]]
214  // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
215  // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
216  // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
217
218  // CHECK: [[MONOTONIC]]
219  // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
220  // CHECK-NEXT: ]
221
222  // CHECK: [[ACQUIRE]]
223  // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
224  // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
225  // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
226  // CHECK-NEXT: ]
227
228  // CHECK: [[RELEASE]]
229  // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
230  // CHECK-NEXT: ]
231
232  // CHECK: [[ACQREL]]
233  // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
234  // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
235  // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
236  // CHECK-NEXT: ]
237
238  // CHECK: [[SEQCST]]
239  // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
240  // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
241  // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
242  // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
243  // CHECK-NEXT: ]
244
245  // CHECK: [[MONOTONIC_MONOTONIC]]
246  // CHECK: cmpxchg {{.*}} monotonic monotonic
247  // CHECK: br
248
249  // CHECK: [[ACQUIRE_MONOTONIC]]
250  // CHECK: cmpxchg {{.*}} acquire monotonic
251  // CHECK: br
252
253  // CHECK: [[ACQUIRE_ACQUIRE]]
254  // CHECK: cmpxchg {{.*}} acquire acquire
255  // CHECK: br
256
257  // CHECK: [[ACQREL_MONOTONIC]]
258  // CHECK: cmpxchg {{.*}} acq_rel monotonic
259  // CHECK: br
260
261  // CHECK: [[ACQREL_ACQUIRE]]
262  // CHECK: cmpxchg {{.*}} acq_rel acquire
263  // CHECK: br
264
265  // CHECK: [[SEQCST_MONOTONIC]]
266  // CHECK: cmpxchg {{.*}} seq_cst monotonic
267  // CHECK: br
268
269  // CHECK: [[SEQCST_ACQUIRE]]
270  // CHECK: cmpxchg {{.*}} seq_cst acquire
271  // CHECK: br
272
273  // CHECK: [[SEQCST_SEQCST]]
274  // CHECK: cmpxchg {{.*}} seq_cst seq_cst
275  // CHECK: br
276}
277
278int test_volatile(volatile atomic_int *i) {
279  // CHECK-LABEL: @test_volatile
280  // CHECK:      %[[i_addr:.*]] = alloca i32
281  // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
282  // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]]
283  // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]]
284  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst
285  // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]]
286  // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]]
287  // CHECK-NEXT: ret i32 %[[retval]]
288  return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
289}
290
291#endif
292