1; RUN: opt -S %s -atomic-expand | FileCheck %s
2
3;;; NOTE: this test is actually target-independent -- any target which
4;;; doesn't support inline atomics can be used. (E.g. X86 i386 would
5;;; work, if LLVM is properly taught about what it's missing vs i586.)
6
7;target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
8;target triple = "i386-unknown-unknown"
9target datalayout = "e-m:e-p:32:32-i64:64-f128:64-n32-S64"
10target triple = "sparc-unknown-unknown"
11
12;; First, check the sized calls. Except for cmpxchg, these are fairly
13;; straightforward.
14
15; CHECK-LABEL: @test_load_i16(
16; CHECK:  %1 = bitcast i16* %arg to i8*
17; CHECK:  %2 = call i16 @__atomic_load_2(i8* %1, i32 5)
18; CHECK:  ret i16 %2
19define i16 @test_load_i16(i16* %arg) {
20  %ret = load atomic i16, i16* %arg seq_cst, align 4
21  ret i16 %ret
22}
23
24; CHECK-LABEL: @test_store_i16(
25; CHECK:  %1 = bitcast i16* %arg to i8*
26; CHECK:  call void @__atomic_store_2(i8* %1, i16 %val, i32 5)
27; CHECK:  ret void
28define void @test_store_i16(i16* %arg, i16 %val) {
29  store atomic i16 %val, i16* %arg seq_cst, align 4
30  ret void
31}
32
33; CHECK-LABEL: @test_exchange_i16(
34; CHECK:  %1 = bitcast i16* %arg to i8*
35; CHECK:  %2 = call i16 @__atomic_exchange_2(i8* %1, i16 %val, i32 5)
36; CHECK:  ret i16 %2
37define i16 @test_exchange_i16(i16* %arg, i16 %val) {
38  %ret = atomicrmw xchg i16* %arg, i16 %val seq_cst
39  ret i16 %ret
40}
41
42; CHECK-LABEL: @test_cmpxchg_i16(
43; CHECK:  %1 = bitcast i16* %arg to i8*
44; CHECK:  %2 = alloca i16, align 2
45; CHECK:  %3 = bitcast i16* %2 to i8*
46; CHECK:  call void @llvm.lifetime.start(i64 2, i8* %3)
47; CHECK:  store i16 %old, i16* %2, align 2
48; CHECK:  %4 = call zeroext i1 @__atomic_compare_exchange_2(i8* %1, i8* %3, i16 %new, i32 5, i32 0)
49; CHECK:  %5 = load i16, i16* %2, align 2
50; CHECK:  call void @llvm.lifetime.end(i64 2, i8* %3)
51; CHECK:  %6 = insertvalue { i16, i1 } undef, i16 %5, 0
52; CHECK:  %7 = insertvalue { i16, i1 } %6, i1 %4, 1
53; CHECK:  %ret = extractvalue { i16, i1 } %7, 0
54; CHECK:  ret i16 %ret
55define i16 @test_cmpxchg_i16(i16* %arg, i16 %old, i16 %new) {
56  %ret_succ = cmpxchg i16* %arg, i16 %old, i16 %new seq_cst monotonic
57  %ret = extractvalue { i16, i1 } %ret_succ, 0
58  ret i16 %ret
59}
60
61; CHECK-LABEL: @test_add_i16(
62; CHECK:  %1 = bitcast i16* %arg to i8*
63; CHECK:  %2 = call i16 @__atomic_fetch_add_2(i8* %1, i16 %val, i32 5)
64; CHECK:  ret i16 %2
65define i16 @test_add_i16(i16* %arg, i16 %val) {
66  %ret = atomicrmw add i16* %arg, i16 %val seq_cst
67  ret i16 %ret
68}
69
70
71;; Now, check the output for the unsized libcalls. i128 is used for
72;; these tests because the "16" suffixed functions aren't available on
73;; 32-bit i386.
74
75; CHECK-LABEL: @test_load_i128(
76; CHECK:  %1 = bitcast i128* %arg to i8*
77; CHECK:  %2 = alloca i128, align 8
78; CHECK:  %3 = bitcast i128* %2 to i8*
79; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %3)
80; CHECK:  call void @__atomic_load(i32 16, i8* %1, i8* %3, i32 5)
81; CHECK:  %4 = load i128, i128* %2, align 8
82; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %3)
83; CHECK:  ret i128 %4
84define i128 @test_load_i128(i128* %arg) {
85  %ret = load atomic i128, i128* %arg seq_cst, align 16
86  ret i128 %ret
87}
88
89; CHECK-LABEL @test_store_i128(
90; CHECK:  %1 = bitcast i128* %arg to i8*
91; CHECK:  %2 = alloca i128, align 8
92; CHECK:  %3 = bitcast i128* %2 to i8*
93; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %3)
94; CHECK:  store i128 %val, i128* %2, align 8
95; CHECK:  call void @__atomic_store(i32 16, i8* %1, i8* %3, i32 5)
96; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %3)
97; CHECK:  ret void
98define void @test_store_i128(i128* %arg, i128 %val) {
99  store atomic i128 %val, i128* %arg seq_cst, align 16
100  ret void
101}
102
103; CHECK-LABEL: @test_exchange_i128(
104; CHECK:  %1 = bitcast i128* %arg to i8*
105; CHECK:  %2 = alloca i128, align 8
106; CHECK:  %3 = bitcast i128* %2 to i8*
107; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %3)
108; CHECK:  store i128 %val, i128* %2, align 8
109; CHECK:  %4 = alloca i128, align 8
110; CHECK:  %5 = bitcast i128* %4 to i8*
111; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %5)
112; CHECK:  call void @__atomic_exchange(i32 16, i8* %1, i8* %3, i8* %5, i32 5)
113; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %3)
114; CHECK:  %6 = load i128, i128* %4, align 8
115; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %5)
116; CHECK:  ret i128 %6
117define i128 @test_exchange_i128(i128* %arg, i128 %val) {
118  %ret = atomicrmw xchg i128* %arg, i128 %val seq_cst
119  ret i128 %ret
120}
121
122; CHECK-LABEL: @test_cmpxchg_i128(
123; CHECK:  %1 = bitcast i128* %arg to i8*
124; CHECK:  %2 = alloca i128, align 8
125; CHECK:  %3 = bitcast i128* %2 to i8*
126; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %3)
127; CHECK:  store i128 %old, i128* %2, align 8
128; CHECK:  %4 = alloca i128, align 8
129; CHECK:  %5 = bitcast i128* %4 to i8*
130; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %5)
131; CHECK:  store i128 %new, i128* %4, align 8
132; CHECK:  %6 = call zeroext i1 @__atomic_compare_exchange(i32 16, i8* %1, i8* %3, i8* %5, i32 5, i32 0)
133; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %5)
134; CHECK:  %7 = load i128, i128* %2, align 8
135; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %3)
136; CHECK:  %8 = insertvalue { i128, i1 } undef, i128 %7, 0
137; CHECK:  %9 = insertvalue { i128, i1 } %8, i1 %6, 1
138; CHECK:  %ret = extractvalue { i128, i1 } %9, 0
139; CHECK:  ret i128 %ret
140define i128 @test_cmpxchg_i128(i128* %arg, i128 %old, i128 %new) {
141  %ret_succ = cmpxchg i128* %arg, i128 %old, i128 %new seq_cst monotonic
142  %ret = extractvalue { i128, i1 } %ret_succ, 0
143  ret i128 %ret
144}
145
146; This one is a verbose expansion, as there is no generic
147; __atomic_fetch_add function, so it needs to expand to a cmpxchg
148; loop, which then itself expands into a libcall.
149
150; CHECK-LABEL: @test_add_i128(
151; CHECK:  %1 = alloca i128, align 8
152; CHECK:  %2 = alloca i128, align 8
153; CHECK:  %3 = load i128, i128* %arg, align 16
154; CHECK:  br label %atomicrmw.start
155; CHECK:atomicrmw.start:
156; CHECK:  %loaded = phi i128 [ %3, %0 ], [ %newloaded, %atomicrmw.start ]
157; CHECK:  %new = add i128 %loaded, %val
158; CHECK:  %4 = bitcast i128* %arg to i8*
159; CHECK:  %5 = bitcast i128* %1 to i8*
160; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %5)
161; CHECK:  store i128 %loaded, i128* %1, align 8
162; CHECK:  %6 = bitcast i128* %2 to i8*
163; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %6)
164; CHECK:  store i128 %new, i128* %2, align 8
165; CHECK:  %7 = call zeroext i1 @__atomic_compare_exchange(i32 16, i8* %4, i8* %5, i8* %6, i32 5, i32 5)
166; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %6)
167; CHECK:  %8 = load i128, i128* %1, align 8
168; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %5)
169; CHECK:  %9 = insertvalue { i128, i1 } undef, i128 %8, 0
170; CHECK:  %10 = insertvalue { i128, i1 } %9, i1 %7, 1
171; CHECK:  %success = extractvalue { i128, i1 } %10, 1
172; CHECK:  %newloaded = extractvalue { i128, i1 } %10, 0
173; CHECK:  br i1 %success, label %atomicrmw.end, label %atomicrmw.start
174; CHECK:atomicrmw.end:
175; CHECK:  ret i128 %newloaded
176define i128 @test_add_i128(i128* %arg, i128 %val) {
177  %ret = atomicrmw add i128* %arg, i128 %val seq_cst
178  ret i128 %ret
179}
180
181;; Ensure that non-integer types get bitcast correctly on the way in and out of a libcall:
182
183; CHECK-LABEL: @test_load_double(
184; CHECK:  %1 = bitcast double* %arg to i8*
185; CHECK:  %2 = call i64 @__atomic_load_8(i8* %1, i32 5)
186; CHECK:  %3 = bitcast i64 %2 to double
187; CHECK:  ret double %3
188define double @test_load_double(double* %arg, double %val) {
189  %1 = load atomic double, double* %arg seq_cst, align 16
190  ret double %1
191}
192
193; CHECK-LABEL: @test_store_double(
194; CHECK:  %1 = bitcast double* %arg to i8*
195; CHECK:  %2 = bitcast double %val to i64
196; CHECK:  call void @__atomic_store_8(i8* %1, i64 %2, i32 5)
197; CHECK:  ret void
198define void @test_store_double(double* %arg, double %val) {
199  store atomic double %val, double* %arg seq_cst, align 16
200  ret void
201}
202
203; CHECK-LABEL: @test_cmpxchg_ptr(
204; CHECK:   %1 = bitcast i16** %arg to i8*
205; CHECK:   %2 = alloca i16*, align 4
206; CHECK:   %3 = bitcast i16** %2 to i8*
207; CHECK:   call void @llvm.lifetime.start(i64 4, i8* %3)
208; CHECK:   store i16* %old, i16** %2, align 4
209; CHECK:   %4 = ptrtoint i16* %new to i32
210; CHECK:   %5 = call zeroext i1 @__atomic_compare_exchange_4(i8* %1, i8* %3, i32 %4, i32 5, i32 2)
211; CHECK:   %6 = load i16*, i16** %2, align 4
212; CHECK:   call void @llvm.lifetime.end(i64 4, i8* %3)
213; CHECK:   %7 = insertvalue { i16*, i1 } undef, i16* %6, 0
214; CHECK:   %8 = insertvalue { i16*, i1 } %7, i1 %5, 1
215; CHECK:   %ret = extractvalue { i16*, i1 } %8, 0
216; CHECK:   ret i16* %ret
217; CHECK: }
218define i16* @test_cmpxchg_ptr(i16** %arg, i16* %old, i16* %new) {
219  %ret_succ = cmpxchg i16** %arg, i16* %old, i16* %new seq_cst acquire
220  %ret = extractvalue { i16*, i1 } %ret_succ, 0
221  ret i16* %ret
222}
223
224;; ...and for a non-integer type of large size too.
225
226; CHECK-LABEL: @test_store_fp128
227; CHECK:   %1 = bitcast fp128* %arg to i8*
228; CHECK:  %2 = alloca fp128, align 8
229; CHECK:  %3 = bitcast fp128* %2 to i8*
230; CHECK:  call void @llvm.lifetime.start(i64 16, i8* %3)
231; CHECK:  store fp128 %val, fp128* %2, align 8
232; CHECK:  call void @__atomic_store(i32 16, i8* %1, i8* %3, i32 5)
233; CHECK:  call void @llvm.lifetime.end(i64 16, i8* %3)
234; CHECK:  ret void
235define void @test_store_fp128(fp128* %arg, fp128 %val) {
236  store atomic fp128 %val, fp128* %arg seq_cst, align 16
237  ret void
238}
239
240;; Unaligned loads and stores should be expanded to the generic
241;; libcall, just like large loads/stores, and not a specialized one.
242;; NOTE: atomicrmw and cmpxchg don't yet support an align attribute;
243;; when such support is added, they should also be tested here.
244
245; CHECK-LABEL: @test_unaligned_load_i16(
246; CHECK:  __atomic_load(
247define i16 @test_unaligned_load_i16(i16* %arg) {
248  %ret = load atomic i16, i16* %arg seq_cst, align 1
249  ret i16 %ret
250}
251
252; CHECK-LABEL: @test_unaligned_store_i16(
253; CHECK: __atomic_store(
254define void @test_unaligned_store_i16(i16* %arg, i16 %val) {
255  store atomic i16 %val, i16* %arg seq_cst, align 1
256  ret void
257}
258