1; RUN: opt -S < %s -instcombine | FileCheck %s
2
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
4target triple = "x86_64-apple-macosx10.7.0"
5
6; Check transforms involving atomic operations
7
8define i32 @test1(i32* %p) {
9; CHECK-LABEL: define i32 @test1(
10; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
11; CHECK: shl i32 %x, 1
12  %x = load atomic i32, i32* %p seq_cst, align 4
13  %y = load i32, i32* %p, align 4
14  %z = add i32 %x, %y
15  ret i32 %z
16}
17
18define i32 @test2(i32* %p) {
19; CHECK-LABEL: define i32 @test2(
20; CHECK: %x = load volatile i32, i32* %p, align 4
21; CHECK: %y = load volatile i32, i32* %p, align 4
22  %x = load volatile i32, i32* %p, align 4
23  %y = load volatile i32, i32* %p, align 4
24  %z = add i32 %x, %y
25  ret i32 %z
26}
27
28; The exact semantics of mixing volatile and non-volatile on the same
29; memory location are a bit unclear, but conservatively, we know we don't
30; want to remove the volatile.
31define i32 @test3(i32* %p) {
32; CHECK-LABEL: define i32 @test3(
33; CHECK: %x = load volatile i32, i32* %p, align 4
34  %x = load volatile i32, i32* %p, align 4
35  %y = load i32, i32* %p, align 4
36  %z = add i32 %x, %y
37  ret i32 %z
38}
39
40; Forwarding from a stronger ordered atomic is fine
41define i32 @test4(i32* %p) {
42; CHECK-LABEL: define i32 @test4(
43; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
44; CHECK: shl i32 %x, 1
45  %x = load atomic i32, i32* %p seq_cst, align 4
46  %y = load atomic i32, i32* %p unordered, align 4
47  %z = add i32 %x, %y
48  ret i32 %z
49}
50
51; Forwarding from a non-atomic is not.  (The earlier load
52; could in priciple be promoted to atomic and then forwarded,
53; but we can't just  drop the atomic from the load.)
54define i32 @test5(i32* %p) {
55; CHECK-LABEL: define i32 @test5(
56; CHECK: %x = load atomic i32, i32* %p unordered, align 4
57  %x = load atomic i32, i32* %p unordered, align 4
58  %y = load i32, i32* %p, align 4
59  %z = add i32 %x, %y
60  ret i32 %z
61}
62
63; Forwarding atomic to atomic is fine
64define i32 @test6(i32* %p) {
65; CHECK-LABEL: define i32 @test6(
66; CHECK: %x = load atomic i32, i32* %p unordered, align 4
67; CHECK: shl i32 %x, 1
68  %x = load atomic i32, i32* %p unordered, align 4
69  %y = load atomic i32, i32* %p unordered, align 4
70  %z = add i32 %x, %y
71  ret i32 %z
72}
73
74; FIXME: we currently don't do anything for monotonic
75define i32 @test7(i32* %p) {
76; CHECK-LABEL: define i32 @test7(
77; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
78; CHECK: %y = load atomic i32, i32* %p monotonic, align 4
79  %x = load atomic i32, i32* %p seq_cst, align 4
80  %y = load atomic i32, i32* %p monotonic, align 4
81  %z = add i32 %x, %y
82  ret i32 %z
83}
84
85; FIXME: We could forward in racy code
86define i32 @test8(i32* %p) {
87; CHECK-LABEL: define i32 @test8(
88; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
89; CHECK: %y = load atomic i32, i32* %p acquire, align 4
90  %x = load atomic i32, i32* %p seq_cst, align 4
91  %y = load atomic i32, i32* %p acquire, align 4
92  %z = add i32 %x, %y
93  ret i32 %z
94}
95
96; An unordered access to null is still unreachable.  There's no
97; ordering imposed.
98define i32 @test9() {
99; CHECK-LABEL: define i32 @test9(
100; CHECK: store i32 undef, i32* null
101  %x = load atomic i32, i32* null unordered, align 4
102  ret i32 %x
103}
104
105define i32 @test9_no_null_opt() #0 {
106; CHECK-LABEL: define i32 @test9_no_null_opt(
107; CHECK: load atomic i32, i32* null unordered
108  %x = load atomic i32, i32* null unordered, align 4
109  ret i32 %x
110}
111
112; FIXME: Could also fold
113define i32 @test10() {
114; CHECK-LABEL: define i32 @test10(
115; CHECK: load atomic i32, i32* null monotonic
116  %x = load atomic i32, i32* null monotonic, align 4
117  ret i32 %x
118}
119
120define i32 @test10_no_null_opt() #0 {
121; CHECK-LABEL: define i32 @test10_no_null_opt(
122; CHECK: load atomic i32, i32* null monotonic
123  %x = load atomic i32, i32* null monotonic, align 4
124  ret i32 %x
125}
126
127; Would this be legal to fold?  Probably?
128define i32 @test11() {
129; CHECK-LABEL: define i32 @test11(
130; CHECK: load atomic i32, i32* null seq_cst
131  %x = load atomic i32, i32* null seq_cst, align 4
132  ret i32 %x
133}
134
135define i32 @test11_no_null_opt() #0 {
136; CHECK-LABEL: define i32 @test11_no_null_opt(
137; CHECK: load atomic i32, i32* null seq_cst
138  %x = load atomic i32, i32* null seq_cst, align 4
139  ret i32 %x
140}
141
142; An unordered access to null is still unreachable.  There's no
143; ordering imposed.
144define i32 @test12() {
145; CHECK-LABEL: define i32 @test12(
146; CHECK: store atomic i32 undef, i32* null
147  store atomic i32 0, i32* null unordered, align 4
148  ret i32 0
149}
150
151define i32 @test12_no_null_opt() #0 {
152; CHECK-LABEL: define i32 @test12_no_null_opt(
153; CHECK: store atomic i32 0, i32* null unordered
154  store atomic i32 0, i32* null unordered, align 4
155  ret i32 0
156}
157
158; FIXME: Could also fold
159define i32 @test13() {
160; CHECK-LABEL: define i32 @test13(
161; CHECK: store atomic i32 0, i32* null monotonic
162  store atomic i32 0, i32* null monotonic, align 4
163  ret i32 0
164}
165
166define i32 @test13_no_null_opt() #0 {
167; CHECK-LABEL: define i32 @test13_no_null_opt(
168; CHECK: store atomic i32 0, i32* null monotonic
169  store atomic i32 0, i32* null monotonic, align 4
170  ret i32 0
171}
172
173; Would this be legal to fold?  Probably?
174define i32 @test14() {
175; CHECK-LABEL: define i32 @test14(
176; CHECK: store atomic i32 0, i32* null seq_cst
177  store atomic i32 0, i32* null seq_cst, align 4
178  ret i32 0
179}
180
181define i32 @test14_no_null_opt() #0 {
182; CHECK-LABEL: define i32 @test14_no_null_opt(
183; CHECK: store atomic i32 0, i32* null seq_cst
184  store atomic i32 0, i32* null seq_cst, align 4
185  ret i32 0
186}
187
188@a = external global i32
189@b = external global i32
190
191define i32 @test15(i1 %cnd) {
192; CHECK-LABEL: define i32 @test15(
193; CHECK: load atomic i32, i32* @a unordered, align 4
194; CHECK: load atomic i32, i32* @b unordered, align 4
195  %addr = select i1 %cnd, i32* @a, i32* @b
196  %x = load atomic i32, i32* %addr unordered, align 4
197  ret i32 %x
198}
199
200; FIXME: This would be legal to transform
201define i32 @test16(i1 %cnd) {
202; CHECK-LABEL: define i32 @test16(
203; CHECK: load atomic i32, i32* %addr monotonic, align 4
204  %addr = select i1 %cnd, i32* @a, i32* @b
205  %x = load atomic i32, i32* %addr monotonic, align 4
206  ret i32 %x
207}
208
209; FIXME: This would be legal to transform
210define i32 @test17(i1 %cnd) {
211; CHECK-LABEL: define i32 @test17(
212; CHECK: load atomic i32, i32* %addr seq_cst, align 4
213  %addr = select i1 %cnd, i32* @a, i32* @b
214  %x = load atomic i32, i32* %addr seq_cst, align 4
215  ret i32 %x
216}
217
218define i32 @test22(i1 %cnd) {
219; CHECK-LABEL: define i32 @test22(
220; CHECK: [[PHI:%.*]] = phi i32
221; CHECK: store atomic i32 [[PHI]], i32* @a unordered, align 4
222  br i1 %cnd, label %block1, label %block2
223
224block1:
225  store atomic i32 1, i32* @a unordered, align 4
226  br label %merge
227block2:
228  store atomic i32 2, i32* @a unordered, align 4
229  br label %merge
230
231merge:
232  ret i32 0
233}
234
235; TODO: probably also legal here
236define i32 @test23(i1 %cnd) {
237; CHECK-LABEL: define i32 @test23(
238; CHECK: br i1 %cnd, label %block1, label %block2
239  br i1 %cnd, label %block1, label %block2
240
241block1:
242  store atomic i32 1, i32* @a monotonic, align 4
243  br label %merge
244block2:
245  store atomic i32 2, i32* @a monotonic, align 4
246  br label %merge
247
248merge:
249  ret i32 0
250}
251
252declare void @clobber()
253
254define i32 @test18(float* %p) {
255; CHECK-LABEL: define i32 @test18(
256; CHECK: load atomic i32, i32* [[A:%.*]] unordered, align 4
257; CHECK: store atomic i32 [[B:%.*]], i32* [[C:%.*]] unordered, align 4
258  %x = load atomic float, float* %p unordered, align 4
259  call void @clobber() ;; keep the load around
260  store atomic float %x, float* %p unordered, align 4
261  ret i32 0
262}
263
264; TODO: probably also legal in this case
265define i32 @test19(float* %p) {
266; CHECK-LABEL: define i32 @test19(
267; CHECK: load atomic float, float* %p seq_cst, align 4
268; CHECK: store atomic float %x, float* %p seq_cst, align 4
269  %x = load atomic float, float* %p seq_cst, align 4
270  call void @clobber() ;; keep the load around
271  store atomic float %x, float* %p seq_cst, align 4
272  ret i32 0
273}
274
275define i32 @test20(i32** %p, i8* %v) {
276; CHECK-LABEL: define i32 @test20(
277; CHECK: store atomic i8* %v, i8** [[D:%.*]] unordered, align 4
278  %cast = bitcast i8* %v to i32*
279  store atomic i32* %cast, i32** %p unordered, align 4
280  ret i32 0
281}
282
283define i32 @test21(i32** %p, i8* %v) {
284; CHECK-LABEL: define i32 @test21(
285; CHECK: store atomic i32* %cast, i32** %p monotonic, align 4
286  %cast = bitcast i8* %v to i32*
287  store atomic i32* %cast, i32** %p monotonic, align 4
288  ret i32 0
289}
290
291define void @pr27490a(i8** %p1, i8** %p2) {
292; CHECK-LABEL: define void @pr27490
293; CHECK: %1 = bitcast i8** %p1 to i64*
294; CHECK: %l1 = load i64, i64* %1, align 8
295; CHECK: %2 = bitcast i8** %p2 to i64*
296; CHECK: store volatile i64 %l1, i64* %2, align 8
297  %l = load i8*, i8** %p1
298  store volatile i8* %l, i8** %p2
299  ret void
300}
301
302define void @pr27490b(i8** %p1, i8** %p2) {
303; CHECK-LABEL: define void @pr27490
304; CHECK: %1 = bitcast i8** %p1 to i64*
305; CHECK: %l1 = load i64, i64* %1, align 8
306; CHECK: %2 = bitcast i8** %p2 to i64*
307; CHECK: store atomic i64 %l1, i64* %2 seq_cst, align 8
308  %l = load i8*, i8** %p1
309  store atomic i8* %l, i8** %p2 seq_cst, align 8
310  ret void
311}
312
313;; At the moment, we can't form atomic vectors by folding since these are
314;; not representable in the IR.  This was pr29121.  The right long term
315;; solution is to extend the IR to handle this case.
316define <2 x float> @no_atomic_vector_load(i64* %p) {
317; CHECK-LABEL @no_atomic_vector_load
318; CHECK: load atomic i64, i64* %p unordered, align 8
319  %load = load atomic i64, i64* %p unordered, align 8
320  %.cast = bitcast i64 %load to <2 x float>
321  ret <2 x float> %.cast
322}
323
324define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) {
325; CHECK-LABEL: @no_atomic_vector_store
326; CHECK: store atomic i64 %1, i64* %2 unordered, align 8
327  %1 = bitcast <2 x float> %p to i64
328  %2 = bitcast i8* %p2 to i64*
329  store atomic i64 %1, i64* %2 unordered, align 8
330  ret void
331}
332
333attributes #0 = { "null-pointer-is-valid"="true" }
334