1; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix CHECK-ARMV7
2; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2
3; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
4; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1-M0
5; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL
6
7target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
8
9define void @func(i32 %argc, i8** %argv) nounwind {
10entry:
11	%argc.addr = alloca i32		; <i32*> [#uses=1]
12	%argv.addr = alloca i8**		; <i8***> [#uses=1]
13	%val1 = alloca i32		; <i32*> [#uses=2]
14	%val2 = alloca i32		; <i32*> [#uses=15]
15	%andt = alloca i32		; <i32*> [#uses=2]
16	%ort = alloca i32		; <i32*> [#uses=2]
17	%xort = alloca i32		; <i32*> [#uses=2]
18	%old = alloca i32		; <i32*> [#uses=18]
19	%temp = alloca i32		; <i32*> [#uses=2]
20	store i32 %argc, i32* %argc.addr
21	store i8** %argv, i8*** %argv.addr
22	store i32 0, i32* %val1
23	store i32 31, i32* %val2
24	store i32 3855, i32* %andt
25	store i32 3855, i32* %ort
26	store i32 3855, i32* %xort
27	store i32 4, i32* %temp
28	%tmp = load i32, i32* %temp
29	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
30  ; CHECK: ldrex
31  ; CHECK: add
32  ; CHECK: strex
33  ; CHECK-T1: bl ___sync_fetch_and_add_4
34  ; CHECK-T1-M0: bl ___sync_fetch_and_add_4
35  ; CHECK-BAREMETAL: add
36  ; CHECK-BAREMETAL-NOT: __sync
37  %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
38	store i32 %0, i32* %old
39	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
40  ; CHECK: ldrex
41  ; CHECK: sub
42  ; CHECK: strex
43  ; CHECK-T1: bl ___sync_fetch_and_sub_4
44  ; CHECK-T1-M0: bl ___sync_fetch_and_sub_4
45  ; CHECK-BAREMETAL: sub
46  ; CHECK-BAREMETAL-NOT: __sync
47  %1 = atomicrmw sub i32* %val2, i32 30 monotonic
48	store i32 %1, i32* %old
49	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
50  ; CHECK: ldrex
51  ; CHECK: add
52  ; CHECK: strex
53  ; CHECK-T1: bl ___sync_fetch_and_add_4
54  ; CHECK-T1-M0: bl ___sync_fetch_and_add_4
55  ; CHECK-BAREMETAL: add
56  ; CHECK-BAREMETAL-NOT: __sync
57  %2 = atomicrmw add i32* %val2, i32 1 monotonic
58	store i32 %2, i32* %old
59	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
60  ; CHECK: ldrex
61  ; CHECK: sub
62  ; CHECK: strex
63  ; CHECK-T1: bl ___sync_fetch_and_sub_4
64  ; CHECK-T1-M0: bl ___sync_fetch_and_sub_4
65  ; CHECK-BAREMETAL: sub
66  ; CHECK-BAREMETAL-NOT: __sync
67  %3 = atomicrmw sub i32* %val2, i32 1 monotonic
68	store i32 %3, i32* %old
69	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
70  ; CHECK: ldrex
71  ; CHECK: and
72  ; CHECK: strex
73  ; CHECK-T1: bl ___sync_fetch_and_and_4
74  ; CHECK-T1-M0: bl ___sync_fetch_and_and_4
75  ; CHECK-BAREMETAL: and
76  ; CHECK-BAREMETAL-NOT: __sync
77  %4 = atomicrmw and i32* %andt, i32 4080 monotonic
78	store i32 %4, i32* %old
79	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
80  ; CHECK: ldrex
81  ; CHECK: or
82  ; CHECK: strex
83  ; CHECK-T1: bl ___sync_fetch_and_or_4
84  ; CHECK-T1-M0: bl ___sync_fetch_and_or_4
85  ; CHECK-BAREMETAL: or
86  ; CHECK-BAREMETAL-NOT: __sync
87  %5 = atomicrmw or i32* %ort, i32 4080 monotonic
88	store i32 %5, i32* %old
89	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
90  ; CHECK: ldrex
91  ; CHECK: eor
92  ; CHECK: strex
93  ; CHECK-T1: bl ___sync_fetch_and_xor_4
94  ; CHECK-T1-M0: bl ___sync_fetch_and_xor_4
95  ; CHECK-BAREMETAL: eor
96  ; CHECK-BAREMETAL-NOT: __sync
97  %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
98	store i32 %6, i32* %old
99	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
100  ; CHECK: ldrex
101  ; CHECK: cmp
102  ; CHECK: strex
103  ; CHECK-T1: bl ___sync_fetch_and_min_4
104  ; CHECK-T1-M0: bl ___sync_fetch_and_min_4
105  ; CHECK-BAREMETAL: cmp
106  ; CHECK-BAREMETAL-NOT: __sync
107  %7 = atomicrmw min i32* %val2, i32 16 monotonic
108	store i32 %7, i32* %old
109	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
110	%neg = sub i32 0, 1
111  ; CHECK: ldrex
112  ; CHECK: cmp
113  ; CHECK: strex
114  ; CHECK-T1: bl ___sync_fetch_and_min_4
115  ; CHECK-T1-M0: bl ___sync_fetch_and_min_4
116  ; CHECK-BAREMETAL: cmp
117  ; CHECK-BAREMETAL-NOT: __sync
118  %8 = atomicrmw min i32* %val2, i32 %neg monotonic
119	store i32 %8, i32* %old
120	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
121  ; CHECK: ldrex
122  ; CHECK: cmp
123  ; CHECK: strex
124  ; CHECK-T1: bl ___sync_fetch_and_max_4
125  ; CHECK-T1-M0: bl ___sync_fetch_and_max_4
126  ; CHECK-BAREMETAL: cmp
127  ; CHECK-BAREMETAL-NOT: __sync
128  %9 = atomicrmw max i32* %val2, i32 1 monotonic
129	store i32 %9, i32* %old
130	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
131  ; CHECK: ldrex
132  ; CHECK: bic
133  ; CHECK-NOT: cmp
134  ; CHECK: strex
135  ; CHECK-T1: bl ___sync_fetch_and_max_4
136  ; CHECK-T1-M0: bl ___sync_fetch_and_max_4
137  ; CHECK-BAREMETAL: bic
138  ; CHECK-BAREMETAL-NOT: __sync
139  %10 = atomicrmw max i32* %val2, i32 0 monotonic
140	store i32 %10, i32* %old
141	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
142  ; CHECK: ldrex
143  ; CHECK: cmp
144  ; CHECK: strex
145  ; CHECK-T1: bl ___sync_fetch_and_umin_4
146  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_4
147  ; CHECK-BAREMETAL: cmp
148  ; CHECK-BAREMETAL-NOT: __sync
149  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
150	store i32 %11, i32* %old
151	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
152	%uneg = sub i32 0, 1
153  ; CHECK: ldrex
154  ; CHECK: cmp
155  ; CHECK: strex
156  ; CHECK-T1: bl ___sync_fetch_and_umin_4
157  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_4
158  ; CHECK-BAREMETAL: cmp
159  ; CHECK-BAREMETAL-NOT: __sync
160  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
161	store i32 %12, i32* %old
162	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
163  ; CHECK: ldrex
164  ; CHECK: cmp
165  ; CHECK: strex
166  ; CHECK-T1: bl ___sync_fetch_and_umax_4
167  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_4
168  ; CHECK-BAREMETAL: cmp
169  ; CHECK-BAREMETAL-NOT: __sync
170  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
171	store i32 %13, i32* %old
172	call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
173  ; CHECK: ldrex
174  ; CHECK: cmp
175  ; CHECK: strex
176  ; CHECK-T1: bl ___sync_fetch_and_umax_4
177  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_4
178  ; CHECK-BAREMETAL: cmp
179  ; CHECK-BAREMETAL-NOT: __sync
180  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
181	store i32 %14, i32* %old
182
183  ret void
184}
185
186define void @func2() nounwind {
187entry:
188  %val = alloca i16
189  %old = alloca i16
190  store i16 31, i16* %val
191  ; CHECK: ldrex
192  ; CHECK: cmp
193  ; CHECK: strex
194  ; CHECK-T1: bl ___sync_fetch_and_umin_2
195  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_2
196  ; CHECK-BAREMETAL: cmp
197  ; CHECK-BAREMETAL-NOT: __sync
198  %0 = atomicrmw umin i16* %val, i16 16 monotonic
199  store i16 %0, i16* %old
200  %uneg = sub i16 0, 1
201  ; CHECK: ldrex
202  ; CHECK: cmp
203  ; CHECK: strex
204  ; CHECK-T1: bl ___sync_fetch_and_umin_2
205  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_2
206  ; CHECK-BAREMETAL: cmp
207  ; CHECK-BAREMETAL-NOT: __sync
208  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
209  store i16 %1, i16* %old
210  ; CHECK: ldrex
211  ; CHECK: cmp
212  ; CHECK: strex
213  ; CHECK-T1: bl ___sync_fetch_and_umax_2
214  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_2
215  ; CHECK-BAREMETAL: cmp
216  ; CHECK-BAREMETAL-NOT: __sync
217  %2 = atomicrmw umax i16* %val, i16 1 monotonic
218  store i16 %2, i16* %old
219  ; CHECK: ldrex
220  ; CHECK: cmp
221  ; CHECK: strex
222  ; CHECK-T1: bl ___sync_fetch_and_umax_2
223  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_2
224  ; CHECK-BAREMETAL: cmp
225  ; CHECK-BAREMETAL-NOT: __sync
226  %3 = atomicrmw umax i16* %val, i16 0 monotonic
227  store i16 %3, i16* %old
228  ret void
229}
230
231define void @func3() nounwind {
232entry:
233  %val = alloca i8
234  %old = alloca i8
235  store i8 31, i8* %val
236  ; CHECK: ldrex
237  ; CHECK: cmp
238  ; CHECK: strex
239  ; CHECK-T1: bl ___sync_fetch_and_umin_1
240  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
241  ; CHECK-BAREMETAL: cmp
242  ; CHECK-BAREMETAL-NOT: __sync
243  %0 = atomicrmw umin i8* %val, i8 16 monotonic
244  store i8 %0, i8* %old
245  ; CHECK: ldrex
246  ; CHECK: cmp
247  ; CHECK: strex
248  ; CHECK-T1: bl ___sync_fetch_and_umin_1
249  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
250  ; CHECK-BAREMETAL: cmp
251  ; CHECK-BAREMETAL-NOT: __sync
252  %uneg = sub i8 0, 1
253  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
254  store i8 %1, i8* %old
255  ; CHECK: ldrex
256  ; CHECK: cmp
257  ; CHECK: strex
258  ; CHECK-T1: bl ___sync_fetch_and_umax_1
259  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_1
260  ; CHECK-BAREMETAL: cmp
261  ; CHECK-BAREMETAL-NOT: __sync
262  %2 = atomicrmw umax i8* %val, i8 1 monotonic
263  store i8 %2, i8* %old
264  ; CHECK: ldrex
265  ; CHECK: cmp
266  ; CHECK: strex
267  ; CHECK-T1: bl ___sync_fetch_and_umax_1
268  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_1
269  ; CHECK-BAREMETAL: cmp
270  ; CHECK-BAREMETAL-NOT: __sync
271  %3 = atomicrmw umax i8* %val, i8 0 monotonic
272  store i8 %3, i8* %old
273  ret void
274}
275
276; CHECK: func4
277; This function should not need to use callee-saved registers.
278; rdar://problem/12203728
279; CHECK-NOT: r4
280define i32 @func4(i32* %p) nounwind optsize ssp {
281entry:
282  %0 = atomicrmw add i32* %p, i32 1 monotonic
283  ret i32 %0
284}
285
286define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
287; CHECK-LABEL: test_cmpxchg_fail_order:
288
289  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
290  %oldval = extractvalue { i32, i1 } %pair, 0
291; CHECK-ARMV7:     mov     r[[ADDR:[0-9]+]], r0
292; CHECK-ARMV7:     ldrex   [[OLDVAL:r[0-9]+]], [r0]
293; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
294; CHECK-ARMV7:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
295; CHECK-ARMV7:     dmb ish
296; CHECK-ARMV7: [[LOOP_BB:\.?LBB.*]]:
297; CHECK-ARMV7:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
298; CHECK-ARMV7:     cmp     [[SUCCESS]], #0
299; CHECK-ARMV7:     beq     [[SUCCESS_BB:\.?LBB.*]]
300; CHECK-ARMV7:     ldrex   [[OLDVAL]], [r[[ADDR]]]
301; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
302; CHECK-ARMV7:     beq     [[LOOP_BB]]
303; CHECK-ARMV7: [[FAIL_BB]]:
304; CHECK-ARMV7:     clrex
305; CHECK-ARMV7:     bx      lr
306; CHECK-ARMV7: [[SUCCESS_BB]]:
307; CHECK-ARMV7:     dmb     ish
308; CHECK-ARMV7:     bx      lr
309
310; CHECK-T2:     mov     r[[ADDR:[0-9]+]], r0
311; CHECK-T2:     ldrex   [[OLDVAL:r[0-9]+]], [r0]
312; CHECK-T2:     cmp     [[OLDVAL]], r1
313; CHECK-T2:     bne     [[FAIL_BB:\.?LBB.*]]
314; CHECK-T2:     dmb ish
315; CHECK-T2: [[LOOP_BB:\.?LBB.*]]:
316; CHECK-T2:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
317; CHECK-T2:     cmp     [[SUCCESS]], #0
318; CHECK-T2:     dmbeq   ish
319; CHECK-T2:     bxeq    lr
320; CHECK-T2:     ldrex   [[OLDVAL]], [r[[ADDR]]]
321; CHECK-T2:     cmp     [[OLDVAL]], r1
322; CHECK-T2:     beq     [[LOOP_BB]]
323; CHECK-T2:     clrex
324
325  ret i32 %oldval
326}
327
328define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
329; CHECK-LABEL: test_cmpxchg_fail_order1:
330
331  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire
332  %oldval = extractvalue { i32, i1 } %pair, 0
333; CHECK-NOT:     dmb ish
334; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
335; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
336; CHECK:     cmp     [[OLDVAL]], r1
337; CHECK:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
338; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
339; CHECK:     cmp     [[SUCCESS]], #0
340; CHECK:     bne     [[LOOP_BB]]
341; CHECK:     dmb     ish
342; CHECK:     bx      lr
343; CHECK: [[FAIL_BB]]:
344; CHECK-NEXT: clrex
345; CHECK:     dmb     ish
346; CHECK:     bx      lr
347
348  ret i32 %oldval
349}
350
351define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
352; CHECK-LABEL: load_load_add_acquire
353  %val1 = load atomic i32, i32* %mem1 acquire, align 4
354  %val2 = load atomic i32, i32* %mem2 acquire, align 4
355  %tmp = add i32 %val1, %val2
356
357; CHECK: ldr {{r[0-9]}}, [r0]
358; CHECK: dmb
359; CHECK: ldr {{r[0-9]}}, [r1]
360; CHECK: dmb
361; CHECK: add r0,
362
363; CHECK-T1-M0: ldr {{r[0-9]}}, [r0]
364; CHECK-T1-M0: dmb
365; CHECK-T1-M0: ldr {{r[0-9]}}, [r1]
366; CHECK-T1-M0: dmb
367
368; CHECK-T1: ___sync_val_compare_and_swap_4
369; CHECK-T1: ___sync_val_compare_and_swap_4
370
371; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r0]
372; CHECK-BAREMETAL-NOT: dmb
373; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r1]
374; CHECK-BAREMETAL-NOT: dmb
375; CHECK-BAREMETAL: add r0,
376
377  ret i32 %tmp
378}
379
380define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) {
381; CHECK-LABEL: store_store_release
382  store atomic i32 %val1, i32* %mem1 release, align 4
383  store atomic i32 %val2, i32* %mem2 release, align 4
384
385; CHECK: dmb
386; CHECK: str r1, [r0]
387; CHECK: dmb
388; CHECK: str r3, [r2]
389
390; CHECK-T1: ___sync_lock_test_and_set
391; CHECK-T1: ___sync_lock_test_and_set
392
393; CHECK-T1-M0: dmb
394; CHECK-T1-M0: str r1, [r0]
395; CHECK-T1-M0: dmb
396; CHECK-T1-M0: str r3, [r2]
397
398; CHECK-BAREMETAL-NOT: dmb
399; CHECK-BAREMETAL: str r1, [r0]
400; CHECK-BAREMETAL-NOT: dmb
401; CHECK-BAREMETAL: str r3, [r2]
402
403  ret void
404}
405
406define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) {
407; CHECK-LABEL: load_fence_store_monotonic
408  %val = load atomic i32, i32* %mem1 monotonic, align 4
409  fence seq_cst
410  store atomic i32 %val, i32* %mem2 monotonic, align 4
411
412; CHECK: ldr [[R0:r[0-9]]], [r0]
413; CHECK: dmb
414; CHECK: str [[R0]], [r1]
415
416; CHECK-T1-M0: ldr [[R0:r[0-9]]], [r0]
417; CHECK-T1-M0: dmb
418; CHECK-T1-M0: str [[R0]], [r1]
419
420; CHECK-T1: ldr [[R0:r[0-9]]], [{{r[0-9]+}}]
421; CHECK-T1: {{dmb|bl ___sync_synchronize}}
422; CHECK-T1: str [[R0]], [{{r[0-9]+}}]
423
424; CHECK-BAREMETAL: ldr [[R0:r[0-9]]], [r0]
425; CHECK-BAREMETAL-NOT: dmb
426; CHECK-BAREMETAL: str [[R0]], [r1]
427
428  ret void
429}
430