1; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix CHECK-ARMV7
2; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2
3; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
4; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1
5; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL
6
7target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
8
9define void @func(i32 %argc, i8** %argv) nounwind {
10entry:
11	%argc.addr = alloca i32		; <i32*> [#uses=1]
12	%argv.addr = alloca i8**		; <i8***> [#uses=1]
13	%val1 = alloca i32		; <i32*> [#uses=2]
14	%val2 = alloca i32		; <i32*> [#uses=15]
15	%andt = alloca i32		; <i32*> [#uses=2]
16	%ort = alloca i32		; <i32*> [#uses=2]
17	%xort = alloca i32		; <i32*> [#uses=2]
18	%old = alloca i32		; <i32*> [#uses=18]
19	%temp = alloca i32		; <i32*> [#uses=2]
20	store i32 %argc, i32* %argc.addr
21	store i8** %argv, i8*** %argv.addr
22	store i32 0, i32* %val1
23	store i32 31, i32* %val2
24	store i32 3855, i32* %andt
25	store i32 3855, i32* %ort
26	store i32 3855, i32* %xort
27	store i32 4, i32* %temp
28	%tmp = load i32, i32* %temp
29  ; CHECK: ldrex
30  ; CHECK: add
31  ; CHECK: strex
32  ; CHECK-T1: bl ___sync_fetch_and_add_4
33  ; CHECK-BAREMETAL: add
34  ; CHECK-BAREMETAL-NOT: __sync
35  %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
36	store i32 %0, i32* %old
37  ; CHECK: ldrex
38  ; CHECK: sub
39  ; CHECK: strex
40  ; CHECK-T1: bl ___sync_fetch_and_sub_4
41  ; CHECK-BAREMETAL: sub
42  ; CHECK-BAREMETAL-NOT: __sync
43  %1 = atomicrmw sub i32* %val2, i32 30 monotonic
44	store i32 %1, i32* %old
45  ; CHECK: ldrex
46  ; CHECK: add
47  ; CHECK: strex
48  ; CHECK-T1: bl ___sync_fetch_and_add_4
49  ; CHECK-BAREMETAL: add
50  ; CHECK-BAREMETAL-NOT: __sync
51  %2 = atomicrmw add i32* %val2, i32 1 monotonic
52	store i32 %2, i32* %old
53  ; CHECK: ldrex
54  ; CHECK: sub
55  ; CHECK: strex
56  ; CHECK-T1: bl ___sync_fetch_and_sub_4
57  ; CHECK-BAREMETAL: sub
58  ; CHECK-BAREMETAL-NOT: __sync
59  %3 = atomicrmw sub i32* %val2, i32 1 monotonic
60	store i32 %3, i32* %old
61  ; CHECK: ldrex
62  ; CHECK: and
63  ; CHECK: strex
64  ; CHECK-T1: bl ___sync_fetch_and_and_4
65  ; CHECK-BAREMETAL: and
66  ; CHECK-BAREMETAL-NOT: __sync
67  %4 = atomicrmw and i32* %andt, i32 4080 monotonic
68	store i32 %4, i32* %old
69  ; CHECK: ldrex
70  ; CHECK: or
71  ; CHECK: strex
72  ; CHECK-T1: bl ___sync_fetch_and_or_4
73  ; CHECK-BAREMETAL: or
74  ; CHECK-BAREMETAL-NOT: __sync
75  %5 = atomicrmw or i32* %ort, i32 4080 monotonic
76	store i32 %5, i32* %old
77  ; CHECK: ldrex
78  ; CHECK: eor
79  ; CHECK: strex
80  ; CHECK-T1: bl ___sync_fetch_and_xor_4
81  ; CHECK-BAREMETAL: eor
82  ; CHECK-BAREMETAL-NOT: __sync
83  %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
84	store i32 %6, i32* %old
85  ; CHECK: ldrex
86  ; CHECK: cmp
87  ; CHECK: strex
88  ; CHECK-T1: bl ___sync_fetch_and_min_4
89  ; CHECK-BAREMETAL: cmp
90  ; CHECK-BAREMETAL-NOT: __sync
91  %7 = atomicrmw min i32* %val2, i32 16 monotonic
92	store i32 %7, i32* %old
93	%neg = sub i32 0, 1
94  ; CHECK: ldrex
95  ; CHECK: cmp
96  ; CHECK: strex
97  ; CHECK-T1: bl ___sync_fetch_and_min_4
98  ; CHECK-BAREMETAL: cmp
99  ; CHECK-BAREMETAL-NOT: __sync
100  %8 = atomicrmw min i32* %val2, i32 %neg monotonic
101	store i32 %8, i32* %old
102  ; CHECK: ldrex
103  ; CHECK: cmp
104  ; CHECK: strex
105  ; CHECK-T1: bl ___sync_fetch_and_max_4
106  ; CHECK-BAREMETAL: cmp
107  ; CHECK-BAREMETAL-NOT: __sync
108  %9 = atomicrmw max i32* %val2, i32 1 monotonic
109	store i32 %9, i32* %old
110  ; CHECK: ldrex
111  ; CHECK: cmp
112  ; CHECK: strex
113  ; CHECK-T1: bl ___sync_fetch_and_max_4
114  ; CHECK-BAREMETAL: cmp
115  ; CHECK-BAREMETAL-NOT: __sync
116  %10 = atomicrmw max i32* %val2, i32 0 monotonic
117	store i32 %10, i32* %old
118  ; CHECK: ldrex
119  ; CHECK: cmp
120  ; CHECK: strex
121  ; CHECK-T1: bl ___sync_fetch_and_umin_4
122  ; CHECK-BAREMETAL: cmp
123  ; CHECK-BAREMETAL-NOT: __sync
124  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
125	store i32 %11, i32* %old
126	%uneg = sub i32 0, 1
127  ; CHECK: ldrex
128  ; CHECK: cmp
129  ; CHECK: strex
130  ; CHECK-T1: bl ___sync_fetch_and_umin_4
131  ; CHECK-BAREMETAL: cmp
132  ; CHECK-BAREMETAL-NOT: __sync
133  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
134	store i32 %12, i32* %old
135  ; CHECK: ldrex
136  ; CHECK: cmp
137  ; CHECK: strex
138  ; CHECK-T1: bl ___sync_fetch_and_umax_4
139  ; CHECK-BAREMETAL: cmp
140  ; CHECK-BAREMETAL-NOT: __sync
141  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
142	store i32 %13, i32* %old
143  ; CHECK: ldrex
144  ; CHECK: cmp
145  ; CHECK: strex
146  ; CHECK-T1: bl ___sync_fetch_and_umax_4
147  ; CHECK-BAREMETAL: cmp
148  ; CHECK-BAREMETAL-NOT: __sync
149  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
150	store i32 %14, i32* %old
151
152  ret void
153}
154
155define void @func2() nounwind {
156entry:
157  %val = alloca i16
158  %old = alloca i16
159  store i16 31, i16* %val
160  ; CHECK: ldrex
161  ; CHECK: cmp
162  ; CHECK: strex
163  ; CHECK-T1: bl ___sync_fetch_and_umin_2
164  ; CHECK-BAREMETAL: cmp
165  ; CHECK-BAREMETAL-NOT: __sync
166  %0 = atomicrmw umin i16* %val, i16 16 monotonic
167  store i16 %0, i16* %old
168  %uneg = sub i16 0, 1
169  ; CHECK: ldrex
170  ; CHECK: cmp
171  ; CHECK: strex
172  ; CHECK-T1: bl ___sync_fetch_and_umin_2
173  ; CHECK-BAREMETAL: cmp
174  ; CHECK-BAREMETAL-NOT: __sync
175  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
176  store i16 %1, i16* %old
177  ; CHECK: ldrex
178  ; CHECK: cmp
179  ; CHECK: strex
180  ; CHECK-T1: bl ___sync_fetch_and_umax_2
181  ; CHECK-BAREMETAL: cmp
182  ; CHECK-BAREMETAL-NOT: __sync
183  %2 = atomicrmw umax i16* %val, i16 1 monotonic
184  store i16 %2, i16* %old
185  ; CHECK: ldrex
186  ; CHECK: cmp
187  ; CHECK: strex
188  ; CHECK-T1: bl ___sync_fetch_and_umax_2
189  ; CHECK-BAREMETAL: cmp
190  ; CHECK-BAREMETAL-NOT: __sync
191  %3 = atomicrmw umax i16* %val, i16 0 monotonic
192  store i16 %3, i16* %old
193  ret void
194}
195
196define void @func3() nounwind {
197entry:
198  %val = alloca i8
199  %old = alloca i8
200  store i8 31, i8* %val
201  ; CHECK: ldrex
202  ; CHECK: cmp
203  ; CHECK: strex
204  ; CHECK-T1: bl ___sync_fetch_and_umin_1
205  ; CHECK-BAREMETAL: cmp
206  ; CHECK-BAREMETAL-NOT: __sync
207  %0 = atomicrmw umin i8* %val, i8 16 monotonic
208  store i8 %0, i8* %old
209  ; CHECK: ldrex
210  ; CHECK: cmp
211  ; CHECK: strex
212  ; CHECK-T1: bl ___sync_fetch_and_umin_1
213  ; CHECK-BAREMETAL: cmp
214  ; CHECK-BAREMETAL-NOT: __sync
215  %uneg = sub i8 0, 1
216  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
217  store i8 %1, i8* %old
218  ; CHECK: ldrex
219  ; CHECK: cmp
220  ; CHECK: strex
221  ; CHECK-T1: bl ___sync_fetch_and_umax_1
222  ; CHECK-BAREMETAL: cmp
223  ; CHECK-BAREMETAL-NOT: __sync
224  %2 = atomicrmw umax i8* %val, i8 1 monotonic
225  store i8 %2, i8* %old
226  ; CHECK: ldrex
227  ; CHECK: cmp
228  ; CHECK: strex
229  ; CHECK-T1: bl ___sync_fetch_and_umax_1
230  ; CHECK-BAREMETAL: cmp
231  ; CHECK-BAREMETAL-NOT: __sync
232  %3 = atomicrmw umax i8* %val, i8 0 monotonic
233  store i8 %3, i8* %old
234  ret void
235}
236
237; CHECK: func4
238; This function should not need to use callee-saved registers.
239; rdar://problem/12203728
240; CHECK-NOT: r4
241define i32 @func4(i32* %p) nounwind optsize ssp {
242entry:
243  %0 = atomicrmw add i32* %p, i32 1 monotonic
244  ret i32 %0
245}
246
247define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
248; CHECK-LABEL: test_cmpxchg_fail_order:
249
250  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
251  %oldval = extractvalue { i32, i1 } %pair, 0
252; CHECK-ARMV7:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
253; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
254; CHECK-ARMV7:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
255; CHECK-ARMV7:     dmb ish
256; CHECK-ARMV7: [[LOOP_BB:\.?LBB.*]]:
257; CHECK-ARMV7:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
258; CHECK-ARMV7:     cmp     [[SUCCESS]], #0
259; CHECK-ARMV7:     beq     [[SUCCESS_BB:\.?LBB.*]]
260; CHECK-ARMV7:     ldrex   [[OLDVAL]], [r[[ADDR]]]
261; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
262; CHECK-ARMV7:     beq     [[LOOP_BB]]
263; CHECK-ARMV7: [[FAIL_BB]]:
264; CHECK-ARMV7:     clrex
265; CHECK-ARMV7:     bx      lr
266; CHECK-ARMV7: [[SUCCESS_BB]]:
267; CHECK-ARMV7:     dmb     ish
268; CHECK-ARMV7:     bx      lr
269
270; CHECK-T2:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
271; CHECK-T2:     cmp     [[OLDVAL]], r1
272; CHECK-T2:     bne     [[FAIL_BB:\.?LBB.*]]
273; CHECK-T2:     dmb ish
274; CHECK-T2: [[LOOP_BB:\.?LBB.*]]:
275; CHECK-T2:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
276; CHECK-T2:     cmp     [[SUCCESS]], #0
277; CHECK-T2:     dmbeq   ish
278; CHECK-T2:     bxeq    lr
279; CHECK-T2:     ldrex   [[OLDVAL]], [r[[ADDR]]]
280; CHECK-T2:     cmp     [[OLDVAL]], r1
281; CHECK-T2:     beq     [[LOOP_BB]]
282; CHECK-T2:     clrex
283
284  ret i32 %oldval
285}
286
287define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
288; CHECK-LABEL: test_cmpxchg_fail_order1:
289
290  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire
291  %oldval = extractvalue { i32, i1 } %pair, 0
292; CHECK-NOT:     dmb ish
293; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
294; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
295; CHECK:     cmp     [[OLDVAL]], r1
296; CHECK:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
297; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
298; CHECK:     cmp     [[SUCCESS]], #0
299; CHECK:     bne     [[LOOP_BB]]
300; CHECK:     b       [[END_BB:\.?LBB[0-9]+_[0-9]+]]
301; CHECK: [[FAIL_BB]]:
302; CHECK-NEXT: clrex
303; CHECK-NEXT: [[END_BB]]:
304; CHECK:     dmb     ish
305; CHECK:     bx      lr
306
307  ret i32 %oldval
308}
309
310define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
311; CHECK-LABEL: load_load_add_acquire
312  %val1 = load atomic i32, i32* %mem1 acquire, align 4
313  %val2 = load atomic i32, i32* %mem2 acquire, align 4
314  %tmp = add i32 %val1, %val2
315
316; CHECK: ldr {{r[0-9]}}, [r0]
317; CHECK: dmb
318; CHECK: ldr {{r[0-9]}}, [r1]
319; CHECK: dmb
320; CHECK: add r0,
321
322; CHECK-T1: ___sync_val_compare_and_swap_4
323; CHECK-T1: ___sync_val_compare_and_swap_4
324
325; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r0]
326; CHECK-BAREMETAL-NOT: dmb
327; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r1]
328; CHECK-BAREMETAL-NOT: dmb
329; CHECK-BAREMETAL: add r0,
330
331  ret i32 %tmp
332}
333
334define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) {
335; CHECK-LABEL: store_store_release
336  store atomic i32 %val1, i32* %mem1 release, align 4
337  store atomic i32 %val2, i32* %mem2 release, align 4
338
339; CHECK: dmb
340; CHECK: str r1, [r0]
341; CHECK: dmb
342; CHECK: str r3, [r2]
343
344; CHECK-T1: ___sync_lock_test_and_set
345; CHECK-T1: ___sync_lock_test_and_set
346
347; CHECK-BAREMETAL-NOT: dmb
348; CHECK-BAREMTEAL: str r1, [r0]
349; CHECK-BAREMETAL-NOT: dmb
350; CHECK-BAREMTEAL: str r3, [r2]
351
352  ret void
353}
354
355define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) {
356; CHECK-LABEL: load_fence_store_monotonic
357  %val = load atomic i32, i32* %mem1 monotonic, align 4
358  fence seq_cst
359  store atomic i32 %val, i32* %mem2 monotonic, align 4
360
361; CHECK: ldr [[R0:r[0-9]]], [r0]
362; CHECK: dmb
363; CHECK: str [[R0]], [r1]
364
365; CHECK-T1: ldr [[R0:r[0-9]]], [{{r[0-9]+}}]
366; CHECK-T1: {{dmb|bl ___sync_synchronize}}
367; CHECK-T1: str [[R0]], [{{r[0-9]+}}]
368
369; CHECK-BAREMETAL: ldr [[R0:r[0-9]]], [r0]
370; CHECK-BAREMETAL-NOT: dmb
371; CHECK-BAREMETAL: str [[R0]], [r1]
372
373  ret void
374}
375