1; This tests each of the supported NaCl atomic instructions for every
2; size allowed.
3
4; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \
5; RUN:   -allow-externally-defined-symbols | FileCheck %s
6; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \
7; RUN:   -allow-externally-defined-symbols | FileCheck --check-prefix=O2 %s
8; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 \
9; RUN:   -allow-externally-defined-symbols | FileCheck %s
10
11; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \
12; RUN:   --target arm32 -i %s --args -O2 \
13; RUN:   -allow-externally-defined-symbols \
14; RUN:   | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \
15; RUN:   --check-prefix=ARM32
16
17; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \
18; RUN:   --target arm32 -i %s --args -O2 \
19; RUN:   -allow-externally-defined-symbols \
20; RUN:   | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \
21; RUN:   --check-prefix=ARM32O2
22
23; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \
24; RUN:   --target arm32 -i %s --args -Om1 \
25; RUN:   -allow-externally-defined-symbols \
26; RUN:   | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \
27; RUN:   --check-prefix=ARM32
28
29; RUN: %if --need=allow_dump --need=target_MIPS32 --command %p2i --filetype=asm\
30; RUN:   --target mips32 -i %s --args -O2 \
31; RUN:   -allow-externally-defined-symbols \
32; RUN:   | %if --need=allow_dump --need=target_MIPS32 --command FileCheck %s \
33; RUN:   --check-prefix=MIPS32O2 --check-prefix=MIPS32
34
35; RUN: %if --need=allow_dump --need=target_MIPS32 --command %p2i --filetype=asm\
36; RUN:   --target mips32 -i %s --args -Om1 \
37; RUN:   -allow-externally-defined-symbols \
38; RUN:   | %if --need=allow_dump --need=target_MIPS32 --command FileCheck %s \
39; RUN:   --check-prefix=MIPS32OM1 --check-prefix=MIPS32
40
41declare i8 @llvm.nacl.atomic.load.i8(i8*, i32)
42declare i16 @llvm.nacl.atomic.load.i16(i16*, i32)
43declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
44declare i64 @llvm.nacl.atomic.load.i64(i64*, i32)
45declare void @llvm.nacl.atomic.store.i8(i8, i8*, i32)
46declare void @llvm.nacl.atomic.store.i16(i16, i16*, i32)
47declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
48declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32)
49declare i8 @llvm.nacl.atomic.rmw.i8(i32, i8*, i8, i32)
50declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32)
51declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32)
52declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32)
53declare i8 @llvm.nacl.atomic.cmpxchg.i8(i8*, i8, i8, i32, i32)
54declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32)
55declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32)
56declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32)
57declare void @llvm.nacl.atomic.fence(i32)
58declare void @llvm.nacl.atomic.fence.all()
59declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
60
61@SzGlobal8 = internal global [1 x i8] zeroinitializer, align 1
62@SzGlobal16 = internal global [2 x i8] zeroinitializer, align 2
63@SzGlobal32 = internal global [4 x i8] zeroinitializer, align 4
64@SzGlobal64 = internal global [8 x i8] zeroinitializer, align 8
65
66; NOTE: The LLC equivalent for 16-bit atomic operations are expanded
67; as 32-bit operations. For Subzero, assume that real 16-bit operations
68; will be usable (the validator will be fixed):
69; https://code.google.com/p/nativeclient/issues/detail?id=2981
70
71;;; Load
72
73; x86 guarantees load/store to be atomic if naturally aligned.
74; The PNaCl IR requires all atomic accesses to be naturally aligned.
75
76define internal i32 @test_atomic_load_8(i32 %iptr) {
77entry:
78  %ptr = inttoptr i32 %iptr to i8*
79  ; parameter value "6" is for the sequential consistency memory order.
80  %i = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6)
81  %i2 = sub i8 %i, 0
82  %r = zext i8 %i2 to i32
83  ret i32 %r
84}
85; CHECK-LABEL: test_atomic_load_8
86; CHECK: mov {{.*}},DWORD
87; CHECK: mov {{.*}},BYTE
88; ARM32-LABEL: test_atomic_load_8
89; ARM32: ldrb r{{[0-9]+}}, [r{{[0-9]+}}
90; ARM32: dmb
91; MIPS32-LABEL: test_atomic_load_8
92; MIPS32: sync
93; MIPS32: ll
94; MIPS32: sc
95; MIPS32: sync
96
97define internal i32 @test_atomic_load_16(i32 %iptr) {
98entry:
99  %ptr = inttoptr i32 %iptr to i16*
100  %i = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6)
101  %i2 = sub i16 %i, 0
102  %r = zext i16 %i2 to i32
103  ret i32 %r
104}
105; CHECK-LABEL: test_atomic_load_16
106; CHECK: mov {{.*}},DWORD
107; CHECK: mov {{.*}},WORD
108; ARM32-LABEL: test_atomic_load_16
109; ARM32: ldrh r{{[0-9]+}}, [r{{[0-9]+}}
110; ARM32: dmb
111; MIPS32-LABEL: test_atomic_load_16
112; MIPS32: sync
113; MIPS32: ll
114; MIPS32: sc
115; MIPS32: sync
116
117define internal i32 @test_atomic_load_32(i32 %iptr) {
118entry:
119  %ptr = inttoptr i32 %iptr to i32*
120  %r = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
121  ret i32 %r
122}
123; CHECK-LABEL: test_atomic_load_32
124; CHECK: mov {{.*}},DWORD
125; CHECK: mov {{.*}},DWORD
126; ARM32-LABEL: test_atomic_load_32
127; ARM32: ldr r{{[0-9]+}}, [r{{[0-9]+}}
128; ARM32: dmb
129; MIPS32-LABEL: test_atomic_load_32
130; MIPS32: sync
131; MIPS32: ll
132; MIPS32: sc
133; MIPS32: sync
134
135define internal i64 @test_atomic_load_64(i32 %iptr) {
136entry:
137  %ptr = inttoptr i32 %iptr to i64*
138  %r = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6)
139  ret i64 %r
140}
141; CHECK-LABEL: test_atomic_load_64
142; CHECK: movq x{{.*}},QWORD
143; CHECK: movq QWORD {{.*}},x{{.*}}
144; ARM32-LABEL: test_atomic_load_64
145; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}
146; ARM32: dmb
147; MIPS32-LABEL: test_atomic_load_64
148; MIPS32: jal __sync_val_compare_and_swap_8
149; MIPS32: sync
150
151define internal i32 @test_atomic_load_32_with_arith(i32 %iptr) {
152entry:
153  br label %next
154
155next:
156  %ptr = inttoptr i32 %iptr to i32*
157  %r = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
158  %r2 = sub i32 32, %r
159  ret i32 %r2
160}
161; CHECK-LABEL: test_atomic_load_32_with_arith
162; CHECK: mov {{.*}},DWORD
163; The next instruction may be a separate load or folded into an add.
164;
165; In O2 mode, we know that the load and sub are going to be fused.
166; O2-LABEL: test_atomic_load_32_with_arith
167; O2: mov {{.*}},DWORD
168; O2: sub {{.*}},DWORD
169; ARM32-LABEL: test_atomic_load_32_with_arith
170; ARM32: ldr r{{[0-9]+}}, [r{{[0-9]+}}
171; ARM32: dmb
172; MIPS32-LABEL: test_atomic_load_32_with_arith
173; MIPS32: sync
174; MIPS32: ll
175; MIPS32: sc
176; MIPS32: sync
177; MIPS32: subu
178
179define internal i32 @test_atomic_load_32_ignored(i32 %iptr) {
180entry:
181  %ptr = inttoptr i32 %iptr to i32*
182  %ignored = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
183  ret i32 0
184}
185; CHECK-LABEL: test_atomic_load_32_ignored
186; CHECK: mov {{.*}},DWORD
187; CHECK: mov {{.*}},DWORD
188; O2-LABEL: test_atomic_load_32_ignored
189; O2: mov {{.*}},DWORD
190; O2: mov {{.*}},DWORD
191; ARM32-LABEL: test_atomic_load_32_ignored
192; ARM32: ldr r{{[0-9]+}}, [r{{[0-9]+}}
193; ARM32: dmb
194; MIPS32-LABEL: test_atomic_load_32_ignored
195; MIPS32: sync
196; MIPS32: ll
197; MIPS32: sc
198; MIPS32: sync
199
200define internal i64 @test_atomic_load_64_ignored(i32 %iptr) {
201entry:
202  %ptr = inttoptr i32 %iptr to i64*
203  %ignored = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6)
204  ret i64 0
205}
206; CHECK-LABEL: test_atomic_load_64_ignored
207; CHECK: movq x{{.*}},QWORD
208; CHECK: movq QWORD {{.*}},x{{.*}}
209; ARM32-LABEL: test_atomic_load_64_ignored
210; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}
211; ARM32: dmb
212; MIPS32-LABEL: test_atomic_load_64_ignored
213; MIPS32: jal	__sync_val_compare_and_swap_8
214; MIPS32: sync
215
216;;; Store
217
218define internal void @test_atomic_store_8(i32 %iptr, i32 %v) {
219entry:
220  %truncv = trunc i32 %v to i8
221  %ptr = inttoptr i32 %iptr to i8*
222  call void @llvm.nacl.atomic.store.i8(i8 %truncv, i8* %ptr, i32 6)
223  ret void
224}
225; CHECK-LABEL: test_atomic_store_8
226; CHECK: mov BYTE
227; CHECK: mfence
228; ARM32-LABEL: test_atomic_store_8
229; ARM32: dmb
230; ARM32: strb r{{[0-9]+}}, [r{{[0-9]+}}
231; ARM32: dmb
232; MIPS32-LABEL: test_atomic_store_8
233; MIPS32: sync
234; MIPS32: ll
235; MIPS32: sc
236; MIPS32: sync
237
238define internal void @test_atomic_store_16(i32 %iptr, i32 %v) {
239entry:
240  %truncv = trunc i32 %v to i16
241  %ptr = inttoptr i32 %iptr to i16*
242  call void @llvm.nacl.atomic.store.i16(i16 %truncv, i16* %ptr, i32 6)
243  ret void
244}
245; CHECK-LABEL: test_atomic_store_16
246; CHECK: mov WORD
247; CHECK: mfence
248; ARM32-LABEL: test_atomic_store_16
249; ARM32: dmb
250; ARM32: strh r{{[0-9]+}}, [r{{[0-9]+}}
251; ARM32: dmb
252; MIPS32-LABEL: test_atomic_store_16
253; MIPS32: sync
254; MIPS32: ll
255; MIPS32: sc
256; MIPS32: sync
257
258define internal void @test_atomic_store_32(i32 %iptr, i32 %v) {
259entry:
260  %ptr = inttoptr i32 %iptr to i32*
261  call void @llvm.nacl.atomic.store.i32(i32 %v, i32* %ptr, i32 6)
262  ret void
263}
264; CHECK-LABEL: test_atomic_store_32
265; CHECK: mov DWORD
266; CHECK: mfence
267; ARM32-LABEL: test_atomic_store_32
268; ARM32: dmb
269; ARM32: str r{{[0-9]+}}, [r{{[0-9]+}}
270; ARM32: dmb
271; MIPS32-LABEL: test_atomic_store_32
272; MIPS32: sync
273; MIPS32: ll
274; MIPS32: sc
275; MIPS32: sync
276
277define internal void @test_atomic_store_64(i32 %iptr, i64 %v) {
278entry:
279  %ptr = inttoptr i32 %iptr to i64*
280  call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr, i32 6)
281  ret void
282}
283; CHECK-LABEL: test_atomic_store_64
284; CHECK: movq x{{.*}},QWORD
285; CHECK: movq QWORD {{.*}},x{{.*}}
286; CHECK: mfence
287; ARM32-LABEL: test_atomic_store_64
288; ARM32: dmb
289; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [[MEM:.*]]
290; ARM32: strexd [[S:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}, [[MEM]]
291; ARM32: cmp [[S]], #0
292; ARM32: bne
293; ARM32: dmb
294; MIPS32-LABEL: test_atomic_store_64
295; MIPS32: sync
296; MIPS32: jal	__sync_lock_test_and_set_8
297; MIPS32: sync
298
299define internal void @test_atomic_store_64_const(i32 %iptr) {
300entry:
301  %ptr = inttoptr i32 %iptr to i64*
302  call void @llvm.nacl.atomic.store.i64(i64 12345678901234, i64* %ptr, i32 6)
303  ret void
304}
305; CHECK-LABEL: test_atomic_store_64_const
306; CHECK: mov {{.*}},0x73ce2ff2
307; CHECK: mov {{.*}},0xb3a
308; CHECK: movq x{{.*}},QWORD
309; CHECK: movq QWORD {{.*}},x{{.*}}
310; CHECK: mfence
311; ARM32-LABEL: test_atomic_store_64_const
312; ARM32: movw [[T0:r[0-9]+]], #12274
313; ARM32: movt [[T0]], #29646
314; ARM32: movw r{{[0-9]+}}, #2874
315; ARM32: dmb
316; ARM32: .L[[RETRY:.*]]:
317; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [[MEM:.*]]
318; ARM32: strexd [[S:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}, [[MEM]]
319; ARM32: cmp [[S]], #0
320; ARM32: bne .L[[RETRY]]
321; ARM32: dmb
322; MIPS32-LABEL: test_atomic_store_64_const
323; MIPS32: sync
324; MIPS32: lui	{{.*}}, 29646
325; MIPS32: ori	{{.*}},{{.*}}, 12274
326; MIPS32: addiu	{{.*}}, $zero, 2874
327; MIPS32: jal	__sync_lock_test_and_set_8
328; MIPS32: sync
329
330;;; RMW
331
332;; add
333
334define internal i32 @test_atomic_rmw_add_8(i32 %iptr, i32 %v) {
335entry:
336  %trunc = trunc i32 %v to i8
337  %ptr = inttoptr i32 %iptr to i8*
338  ; "1" is an atomic add, and "6" is sequential consistency.
339  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i8* %ptr, i8 %trunc, i32 6)
340  %a_ext = zext i8 %a to i32
341  ret i32 %a_ext
342}
343; CHECK-LABEL: test_atomic_rmw_add_8
344; CHECK: lock xadd BYTE {{.*}},[[REG:.*]]
345; CHECK: {{mov|movzx}} {{.*}},[[REG]]
346; ARM32-LABEL: test_atomic_rmw_add_8
347; ARM32: dmb
348; ARM32: ldrexb
349; ARM32: add
350; ARM32: strexb
351; ARM32: bne
352; ARM32: dmb
353; MIPS32-LABEL: test_atomic_rmw_add_8
354; MIPS32: sync
355; MIPS32: addiu	{{.*}}, $zero, -4
356; MIPS32: and
357; MIPS32: andi	{{.*}}, {{.*}}, 3
358; MIPS32: sll	{{.*}}, {{.*}}, 3
359; MIPS32: ori	{{.*}}, $zero, 255
360; MIPS32: sllv
361; MIPS32: nor
362; MIPS32: sllv
363; MIPS32: ll
364; MIPS32: addu
365; MIPS32: and
366; MIPS32: and
367; MIPS32: or
368; MIPS32: sc
369; MIPS32: beq	{{.*}}, $zero, {{.*}}
370; MIPS32: and
371; MIPS32: srlv
372; MIPS32: sll	{{.*}}, {{.*}}, 24
373; MIPS32: sra	{{.*}}, {{.*}}, 24
374; MIPS32: sync
375
376define internal i32 @test_atomic_rmw_add_16(i32 %iptr, i32 %v) {
377entry:
378  %trunc = trunc i32 %v to i16
379  %ptr = inttoptr i32 %iptr to i16*
380  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr, i16 %trunc, i32 6)
381  %a_ext = zext i16 %a to i32
382  ret i32 %a_ext
383}
384; CHECK-LABEL: test_atomic_rmw_add_16
385; CHECK: lock xadd WORD {{.*}},[[REG:.*]]
386; CHECK: {{mov|movzx}} {{.*}},[[REG]]
387; ARM32-LABEL: test_atomic_rmw_add_16
388; ARM32: dmb
389; ARM32: ldrexh
390; ARM32: add
391; ARM32: strexh
392; ARM32: bne
393; ARM32: dmb
394; MIPS32-LABEL: test_atomic_rmw_add_16
395; MIPS32: sync
396; MIPS32: addiu	{{.*}}, $zero, -4
397; MIPS32: and
398; MIPS32: andi	{{.*}}, {{.*}}, 3
399; MIPS32: sll	{{.*}}, {{.*}}, 3
400; MIPS32: ori	{{.*}}, {{.*}}, 65535
401; MIPS32: sllv
402; MIPS32: nor
403; MIPS32: sllv
404; MIPS32: ll
405; MIPS32: addu
406; MIPS32: and
407; MIPS32: and
408; MIPS32: or
409; MIPS32: sc
410; MIPS32: beq	{{.*}}, $zero, {{.*}}
411; MIPS32: and
412; MIPS32: srlv
413; MIPS32: sll	{{.*}}, {{.*}}, 16
414; MIPS32: sra	{{.*}}, {{.*}}, 16
415; MIPS32: sync
416
417define internal i32 @test_atomic_rmw_add_32(i32 %iptr, i32 %v) {
418entry:
419  %ptr = inttoptr i32 %iptr to i32*
420  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %v, i32 6)
421  ret i32 %a
422}
423; CHECK-LABEL: test_atomic_rmw_add_32
424; CHECK: lock xadd DWORD {{.*}},[[REG:.*]]
425; CHECK: mov {{.*}},[[REG]]
426; ARM32-LABEL: test_atomic_rmw_add_32
427; ARM32: dmb
428; ARM32: ldrex
429; ARM32: add
430; ARM32: strex
431; ARM32: bne
432; ARM32: dmb
433; MIPS32-LABEL: test_atomic_rmw_add_32
434; MIPS32: sync
435; MIPS32: ll
436; MIPS32: addu
437; MIPS32: sc
438; MIPS32: beq	{{.*}}, $zero, {{.*}}
439; MIPS32: sync
440
441define internal i64 @test_atomic_rmw_add_64(i32 %iptr, i64 %v) {
442entry:
443  %ptr = inttoptr i32 %iptr to i64*
444  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
445  ret i64 %a
446}
447; CHECK-LABEL: test_atomic_rmw_add_64
448; CHECK: push ebx
449; CHECK: mov eax,DWORD PTR [{{.*}}]
450; CHECK: mov edx,DWORD PTR [{{.*}}+0x4]
451; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax
452; RHS of add cannot be any of the e[abcd]x regs because they are
453; clobbered in the loop, and the RHS needs to be remain live.
454; CHECK: add ebx,{{.*e.[^x]}}
455; CHECK: mov ecx,edx
456; CHECK: adc ecx,{{.*e.[^x]}}
457; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
458; It can be esi, edi, or ebp though, for example (so we need to be careful
459; about rejecting eb* and ed*.)
460; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}
461; CHECK: jne [[LABEL]]
462; ARM32-LABEL: test_atomic_rmw_add_64
463; ARM32: dmb
464; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
465; ARM32: adds
466; ARM32: adc
467; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
468; ARM32: bne
469; ARM32: dmb
470; MIPS32-LABEL: test_atomic_rmw_add_64
471; MIPS32: sync
472; MIPS32: jal	__sync_fetch_and_add_8
473; MIPS32: sync
474
475; Same test as above, but with a global address to test FakeUse issues.
476define internal i64 @test_atomic_rmw_add_64_global(i64 %v) {
477entry:
478  %ptr = bitcast [8 x i8]* @SzGlobal64 to i64*
479  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
480  ret i64 %a
481}
482; CHECK-LABEL: test_atomic_rmw_add_64_global
483; ARM32-LABEL: test_atomic_rmw_add_64_global
484; ARM32: dmb
485; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
486; ARM32: adds
487; ARM32: adc
488; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
489; ARM32: bne
490; ARM32: dmb
491; MIPS32-LABEL: test_atomic_rmw_add_64_global
492; MIPS32: sync
493; MIPS32: jal	__sync_fetch_and_add_8
494; MIPS32: sync
495
496; Test with some more register pressure. When we have an alloca, ebp is
497; used to manage the stack frame, so it cannot be used as a register either.
498declare void @use_ptr(i32 %iptr)
499
500define internal i64 @test_atomic_rmw_add_64_alloca(i32 %iptr, i64 %v) {
501entry:
502  br label %eblock  ; Disable alloca optimization
503eblock:
504  %alloca_ptr = alloca i8, i32 16, align 16
505  %ptr = inttoptr i32 %iptr to i64*
506  %old = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
507  store i8 0, i8* %alloca_ptr, align 1
508  store i8 1, i8* %alloca_ptr, align 1
509  store i8 2, i8* %alloca_ptr, align 1
510  store i8 3, i8* %alloca_ptr, align 1
511  %__5 = ptrtoint i8* %alloca_ptr to i32
512  call void @use_ptr(i32 %__5)
513  ret i64 %old
514}
515; CHECK-LABEL: test_atomic_rmw_add_64_alloca
516; CHECK: push ebx
517; CHECK-DAG: mov edx
518; CHECK-DAG: mov eax
519; CHECK-DAG: mov ecx
520; CHECK-DAG: mov ebx
521; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
522; It also cannot be ebp since we use that for alloca. Also make sure it's
523; not esp, since that's the stack pointer and mucking with it will break
524; the later use_ptr function call.
525; That pretty much leaves esi, or edi as the only viable registers.
526; CHECK: lock cmpxchg8b QWORD PTR [e{{[ds]}}i]
527; CHECK: call {{.*}} R_{{.*}} use_ptr
528; ARM32-LABEL: test_atomic_rmw_add_64_alloca
529; ARM32: dmb
530; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
531; ARM32: adds
532; ARM32: adc
533; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
534; ARM32: bne
535; ARM32: dmb
536; MIPS32-LABEL: test_atomic_rmw_add_64_alloca
537; MIPS32: sync
538; MIPS32: jal	__sync_fetch_and_add_8
539; MIPS32: sync
540
541define internal i32 @test_atomic_rmw_add_32_ignored(i32 %iptr, i32 %v) {
542entry:
543  %ptr = inttoptr i32 %iptr to i32*
544  %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %v, i32 6)
545  ret i32 %v
546}
547; Technically this could use "lock add" instead of "lock xadd", if liveness
548; tells us that the destination variable is dead.
549; CHECK-LABEL: test_atomic_rmw_add_32_ignored
550; CHECK: lock xadd DWORD {{.*}},[[REG:.*]]
551; ARM32-LABEL: test_atomic_rmw_add_32_ignored
552; ARM32: dmb
553; ARM32: ldrex
554; ARM32: add
555; ARM32: strex
556; ARM32: bne
557; ARM32: dmb
558; MIPS32-LABEL: test_atomic_rmw_add_32_ignored
559; MIPS32: sync
560; MIPS32: ll
561; MIPS32: addu
562; MIPS32: sc
563; MIPS32: beq	{{.*}}, $zero, {{.*}}
564; MIPS32: sync
565
566; Atomic RMW 64 needs to be expanded into its own loop.
567; Make sure that works w/ non-trivial function bodies.
568define internal i64 @test_atomic_rmw_add_64_loop(i32 %iptr, i64 %v) {
569entry:
570  %x = icmp ult i64 %v, 100
571  br i1 %x, label %err, label %loop
572
573loop:
574  %v_next = phi i64 [ %v, %entry ], [ %next, %loop ]
575  %ptr = inttoptr i32 %iptr to i64*
576  %next = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v_next, i32 6)
577  %success = icmp eq i64 %next, 100
578  br i1 %success, label %done, label %loop
579
580done:
581  ret i64 %next
582
583err:
584  ret i64 0
585}
586; CHECK-LABEL: test_atomic_rmw_add_64_loop
587; CHECK: push ebx
588; CHECK: mov eax,DWORD PTR [{{.*}}]
589; CHECK: mov edx,DWORD PTR [{{.*}}+0x4]
590; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax
591; CHECK: add ebx,{{.*e.[^x]}}
592; CHECK: mov ecx,edx
593; CHECK: adc ecx,{{.*e.[^x]}}
594; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}+0x0]
595; CHECK: jne [[LABEL]]
596; ARM32-LABEL: test_atomic_rmw_add_64_loop
597; ARM32: dmb
598; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
599; ARM32: adds
600; ARM32: adc
601; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
602; ARM32: bne
603; ARM32: dmb
604; ARM32: b
605; MIPS32-LABEL: test_atomic_rmw_add_64_loop
606; MIPS32: sync
607; MIPS32: jal	__sync_fetch_and_add_8
608; MIPS32: sync
609
610;; sub
611
612define internal i32 @test_atomic_rmw_sub_8(i32 %iptr, i32 %v) {
613entry:
614  %trunc = trunc i32 %v to i8
615  %ptr = inttoptr i32 %iptr to i8*
616  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 2, i8* %ptr, i8 %trunc, i32 6)
617  %a_ext = zext i8 %a to i32
618  ret i32 %a_ext
619}
620; CHECK-LABEL: test_atomic_rmw_sub_8
621; CHECK: neg [[REG:.*]]
622; CHECK: lock xadd BYTE {{.*}},[[REG]]
623; CHECK: {{mov|movzx}} {{.*}},[[REG]]
624; ARM32-LABEL: test_atomic_rmw_sub_8
625; ARM32: dmb
626; ARM32: ldrexb
627; ARM32: sub
628; ARM32: strexb
629; ARM32: bne
630; ARM32: dmb
631; MIPS32-LABEL: test_atomic_rmw_sub_8
632; MIPS32: sync
633; MIPS32: addiu	{{.*}}, $zero, -4
634; MIPS32: and
635; MIPS32: andi	{{.*}}, {{.*}}, 3
636; MIPS32: sll	{{.*}}, {{.*}}, 3
637; MIPS32: ori	{{.*}}, $zero, 255
638; MIPS32: sllv
639; MIPS32: nor
640; MIPS32: sllv
641; MIPS32: ll
642; MIPS32: subu
643; MIPS32: and
644; MIPS32: and
645; MIPS32: or
646; MIPS32: sc
647; MIPS32: beq	{{.*}}, $zero, {{.*}}
648; MIPS32: and
649; MIPS32: srlv
650; MIPS32: sll	{{.*}}, {{.*}}, 24
651; MIPS32: sra	{{.*}}, {{.*}}, 24
652; MIPS32: sync
653
654define internal i32 @test_atomic_rmw_sub_16(i32 %iptr, i32 %v) {
655entry:
656  %trunc = trunc i32 %v to i16
657  %ptr = inttoptr i32 %iptr to i16*
658  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %trunc, i32 6)
659  %a_ext = zext i16 %a to i32
660  ret i32 %a_ext
661}
662; CHECK-LABEL: test_atomic_rmw_sub_16
663; CHECK: neg [[REG:.*]]
664; CHECK: lock xadd WORD {{.*}},[[REG]]
665; CHECK: {{mov|movzx}} {{.*}},[[REG]]
666; ARM32-LABEL: test_atomic_rmw_sub_16
667; ARM32: dmb
668; ARM32: ldrexh
669; ARM32: sub
670; ARM32: strexh
671; ARM32: bne
672; ARM32: dmb
673; MIPS32-LABEL: test_atomic_rmw_sub_16
674; MIPS32: sync
675; MIPS32: addiu	{{.*}}, $zero, -4
676; MIPS32: and
677; MIPS32: andi	{{.*}}, {{.*}}, 3
678; MIPS32: sll	{{.*}}, {{.*}}, 3
679; MIPS32: ori	{{.*}}, {{.*}}, 65535
680; MIPS32: sllv
681; MIPS32: nor
682; MIPS32: sllv
683; MIPS32: ll
684; MIPS32: subu
685; MIPS32: and
686; MIPS32: and
687; MIPS32: or
688; MIPS32: sc
689; MIPS32: beq	{{.*}}, $zero, {{.*}}
690; MIPS32: and
691; MIPS32: srlv
692; MIPS32: sll	{{.*}}, {{.*}}, 16
693; MIPS32: sra	{{.*}}, {{.*}}, 16
694; MIPS32: sync
695
696define internal i32 @test_atomic_rmw_sub_32(i32 %iptr, i32 %v) {
697entry:
698  %ptr = inttoptr i32 %iptr to i32*
699  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
700  ret i32 %a
701}
702; CHECK-LABEL: test_atomic_rmw_sub_32
703; CHECK: neg [[REG:.*]]
704; CHECK: lock xadd DWORD {{.*}},[[REG]]
705; CHECK: mov {{.*}},[[REG]]
706; ARM32-LABEL: test_atomic_rmw_sub_32
707; ARM32: dmb
708; ARM32: ldrex
709; ARM32: sub
710; ARM32: strex
711; ARM32: bne
712; ARM32: dmb
713; MIPS32-LABEL: test_atomic_rmw_sub_32
714; MIPS32: sync
715; MIPS32: ll
716; MIPS32: subu
717; MIPS32: sc
718; MIPS32: beq	{{.*}}, $zero, {{.*}}
719; MIPS32: sync
720
721define internal i64 @test_atomic_rmw_sub_64(i32 %iptr, i64 %v) {
722entry:
723  %ptr = inttoptr i32 %iptr to i64*
724  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 2, i64* %ptr, i64 %v, i32 6)
725  ret i64 %a
726}
727; CHECK-LABEL: test_atomic_rmw_sub_64
728; CHECK: push ebx
729; CHECK: mov eax,DWORD PTR [{{.*}}]
730; CHECK: mov edx,DWORD PTR [{{.*}}+0x4]
731; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax
732; CHECK: sub ebx,{{.*e.[^x]}}
733; CHECK: mov ecx,edx
734; CHECK: sbb ecx,{{.*e.[^x]}}
735; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}
736; CHECK: jne [[LABEL]]
737; ARM32-LABEL: test_atomic_rmw_sub_64
738; ARM32: dmb
739; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
740; ARM32: subs
741; ARM32: sbc
742; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
743; ARM32: bne
744; ARM32: dmb
745; MIPS32-LABEL: test_atomic_rmw_sub_64
746; MIPS32: sync
747; MIPS32: jal	__sync_fetch_and_sub_8
748; MIPS32: sync
749
750define internal i32 @test_atomic_rmw_sub_32_ignored(i32 %iptr, i32 %v) {
751entry:
752  %ptr = inttoptr i32 %iptr to i32*
753  %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %v, i32 6)
754  ret i32 %v
755}
756; Could use "lock sub" instead of "neg; lock xadd"
757; CHECK-LABEL: test_atomic_rmw_sub_32_ignored
758; CHECK: neg [[REG:.*]]
759; CHECK: lock xadd DWORD {{.*}},[[REG]]
760; ARM32-LABEL: test_atomic_rmw_sub_32_ignored
761; ARM32: dmb
762; ARM32: ldrex
763; ARM32: sub
764; ARM32: strex
765; ARM32: bne
766; ARM32: dmb
767; MIPS32-LABEL: test_atomic_rmw_sub_32_ignored
768; MIPS32: sync
769; MIPS32: ll
770; MIPS32: subu
771; MIPS32: sc
772; MIPS32: beq	{{.*}}, $zero, {{.*}}
773; MIPS32: sync
774
775;; or
776
777define internal i32 @test_atomic_rmw_or_8(i32 %iptr, i32 %v) {
778entry:
779  %trunc = trunc i32 %v to i8
780  %ptr = inttoptr i32 %iptr to i8*
781  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6)
782  %a_ext = zext i8 %a to i32
783  ret i32 %a_ext
784}
785; CHECK-LABEL: test_atomic_rmw_or_8
786; CHECK: mov al,BYTE PTR
787; Dest cannot be eax here, because eax is used for the old value. Also want
788; to make sure that cmpxchg's source is the same register.
789; CHECK: or [[REG:[^a].]]
790; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],[[REG]]
791; CHECK: jne
792; ARM32-LABEL: test_atomic_rmw_or_8
793; ARM32: dmb
794; ARM32: ldrexb
795; ARM32: orr
796; ARM32: strexb
797; ARM32: bne
798; ARM32: dmb
799; MIPS32-LABEL: test_atomic_rmw_or_8
800; MIPS32: sync
801; MIPS32: addiu	{{.*}}, $zero, -4
802; MIPS32: and
803; MIPS32: andi	{{.*}}, {{.*}}, 3
804; MIPS32: sll	{{.*}}, {{.*}}, 3
805; MIPS32: ori	{{.*}}, $zero, 255
806; MIPS32: sllv
807; MIPS32: nor
808; MIPS32: sllv
809; MIPS32: ll
810; MIPS32: or
811; MIPS32: and
812; MIPS32: and
813; MIPS32: or
814; MIPS32: sc
815; MIPS32: beq	{{.*}}, $zero, {{.*}}
816; MIPS32: and
817; MIPS32: srlv
818; MIPS32: sll	{{.*}}, {{.*}}, 24
819; MIPS32: sra	{{.*}}, {{.*}}, 24
820; MIPS32: sync
821
822; Same test as above, but with a global address to test FakeUse issues.
823define internal i32 @test_atomic_rmw_or_8_global(i32 %v) {
824entry:
825  %trunc = trunc i32 %v to i8
826  %ptr = bitcast [1 x i8]* @SzGlobal8 to i8*
827  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6)
828  %a_ext = zext i8 %a to i32
829  ret i32 %a_ext
830}
831; CHECK-LABEL: test_atomic_rmw_or_8_global
832; ARM32-LABEL: test_atomic_rmw_or_8_global
833; ARM32: dmb
834; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal8
835; ARM32: movt [[PTR]], #:upper16:SzGlobal8
836; ARM32: ldrexb r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
837; ARM32: orr
838; ARM32: strexb
839; ARM32: bne
840; ARM32: dmb
841; MIPS32-LABEL: test_atomic_rmw_or_8_global
842; MIPS32: sync
843; MIPS32: addiu	{{.*}}, $zero, -4
844; MIPS32: and
845; MIPS32: andi	{{.*}}, {{.*}}, 3
846; MIPS32: sll	{{.*}}, {{.*}}, 3
847; MIPS32: ori	{{.*}}, $zero, 255
848; MIPS32: sllv
849; MIPS32: nor
850; MIPS32: sllv
851; MIPS32: ll
852; MIPS32: or
853; MIPS32: and
854; MIPS32: and
855; MIPS32: or
856; MIPS32: sc
857; MIPS32: beq	{{.*}}, $zero, {{.*}}
858; MIPS32: and
859; MIPS32: srlv
860; MIPS32: sll	{{.*}}, {{.*}}, 24
861; MIPS32: sra	{{.*}}, {{.*}}, 24
862; MIPS32: sync
863
864define internal i32 @test_atomic_rmw_or_16(i32 %iptr, i32 %v) {
865entry:
866  %trunc = trunc i32 %v to i16
867  %ptr = inttoptr i32 %iptr to i16*
868  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6)
869  %a_ext = zext i16 %a to i32
870  ret i32 %a_ext
871}
872; CHECK-LABEL: test_atomic_rmw_or_16
873; CHECK: mov ax,WORD PTR
874; CHECK: or [[REG:[^a].]]
875; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],[[REG]]
876; CHECK: jne
877; ARM32-LABEL: test_atomic_rmw_or_16
878; ARM32: dmb
879; ARM32: ldrexh
880; ARM32: orr
881; ARM32: strexh
882; ARM32: bne
883; ARM32: dmb
884; MIPS32-LABEL: test_atomic_rmw_or_16
885; MIPS32: sync
886; MIPS32: addiu	{{.*}}, $zero, -4
887; MIPS32: and
888; MIPS32: andi	{{.*}}, {{.*}}, 3
889; MIPS32: sll	{{.*}}, {{.*}}, 3
890; MIPS32: ori	{{.*}}, {{.*}}, 65535
891; MIPS32: sllv
892; MIPS32: nor
893; MIPS32: sllv
894; MIPS32: ll
895; MIPS32: or
896; MIPS32: and
897; MIPS32: and
898; MIPS32: or
899; MIPS32: sc
900; MIPS32: beq	{{.*}}, $zero, {{.*}}
901; MIPS32: and
902; MIPS32: srlv
903; MIPS32: sll	{{.*}}, {{.*}}, 16
904; MIPS32: sra	{{.*}}, {{.*}}, 16
905; MIPS32: sync
906
907; Same test as above, but with a global address to test FakeUse issues.
908define internal i32 @test_atomic_rmw_or_16_global(i32 %v) {
909entry:
910  %trunc = trunc i32 %v to i16
911  %ptr = bitcast [2 x i8]* @SzGlobal16 to i16*
912  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6)
913  %a_ext = zext i16 %a to i32
914  ret i32 %a_ext
915}
916; CHECK-LABEL: test_atomic_rmw_or_16_global
917; ARM32-LABEL: test_atomic_rmw_or_16_global
918; ARM32: dmb
919; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal16
920; ARM32: movt [[PTR]], #:upper16:SzGlobal16
921; ARM32: ldrexh r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
922; ARM32: orr
923; ARM32: strexh
924; ARM32: bne
925; ARM32: dmb
926; MIPS32-LABEL: test_atomic_rmw_or_16_global
927; MIPS32: sync
928; MIPS32: addiu	{{.*}}, $zero, -4
929; MIPS32: and
930; MIPS32: andi	{{.*}}, {{.*}}, 3
931; MIPS32: sll	{{.*}}, {{.*}}, 3
932; MIPS32: ori	{{.*}}, {{.*}}, 65535
933; MIPS32: sllv
934; MIPS32: nor
935; MIPS32: sllv
936; MIPS32: ll
937; MIPS32: or
938; MIPS32: and
939; MIPS32: and
940; MIPS32: or
941; MIPS32: sc
942; MIPS32: beq	{{.*}}, $zero, {{.*}}
943; MIPS32: and
944; MIPS32: srlv
945; MIPS32: sll	{{.*}}, {{.*}}, 16
946; MIPS32: sra	{{.*}}, {{.*}}, 16
947; MIPS32: sync
948
949define internal i32 @test_atomic_rmw_or_32(i32 %iptr, i32 %v) {
950entry:
951  %ptr = inttoptr i32 %iptr to i32*
952  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
953  ret i32 %a
954}
955; CHECK-LABEL: test_atomic_rmw_or_32
956; CHECK: mov eax,DWORD PTR
957; CHECK: or [[REG:e[^a].]]
958; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}],[[REG]]
959; CHECK: jne
960; ARM32-LABEL: test_atomic_rmw_or_32
961; ARM32: dmb
962; ARM32: ldrex
963; ARM32: orr
964; ARM32: strex
965; ARM32: bne
966; ARM32: dmb
967; MIPS32-LABEL: test_atomic_rmw_or_32
968; MIPS32: sync
969; MIPS32: ll
970; MIPS32: or
971; MIPS32: sc
972; MIPS32: beq	{{.*}}, $zero, {{.*}}
973; MIPS32: sync
974
975; Same test as above, but with a global address to test FakeUse issues.
976define internal i32 @test_atomic_rmw_or_32_global(i32 %v) {
977entry:
978  %ptr = bitcast [4 x i8]* @SzGlobal32 to i32*
979  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
980  ret i32 %a
981}
982; CHECK-LABEL: test_atomic_rmw_or_32_global
983; ARM32-LABEL: test_atomic_rmw_or_32_global
984; ARM32: dmb
985; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal32
986; ARM32: movt [[PTR]], #:upper16:SzGlobal32
987; ARM32: ldrex r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
988; ARM32: orr
989; ARM32: strex
990; ARM32: bne
991; ARM32: dmb
992; MIPS32-LABEL: test_atomic_rmw_or_32_global
993; MIPS32: sync
994; MIPS32: ll
995; MIPS32: or
996; MIPS32: sc
997; MIPS32: beq	{{.*}}, $zero, {{.*}}
998; MIPS32: sync
999
1000define internal i64 @test_atomic_rmw_or_64(i32 %iptr, i64 %v) {
1001entry:
1002  %ptr = inttoptr i32 %iptr to i64*
1003  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %v, i32 6)
1004  ret i64 %a
1005}
1006; CHECK-LABEL: test_atomic_rmw_or_64
1007; CHECK: push ebx
1008; CHECK: mov eax,DWORD PTR [{{.*}}]
1009; CHECK: mov edx,DWORD PTR [{{.*}}+0x4]
1010; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax
1011; CHECK: or ebx,{{.*e.[^x]}}
1012; CHECK: mov ecx,edx
1013; CHECK: or ecx,{{.*e.[^x]}}
1014; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}
1015; CHECK: jne [[LABEL]]
1016; ARM32-LABEL: test_atomic_rmw_or_64
1017; ARM32: dmb
1018; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
1019; ARM32: orr
1020; ARM32: orr
1021; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
1022; ARM32: bne
1023; ARM32: dmb
1024; MIPS32-LABEL: test_atomic_rmw_or_64
1025; MIPS32: sync
1026; MIPS32: jal	__sync_fetch_and_or_8
1027; MIPS32: sync
1028
1029define internal i32 @test_atomic_rmw_or_32_ignored(i32 %iptr, i32 %v) {
1030entry:
1031  %ptr = inttoptr i32 %iptr to i32*
1032  %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
1033  ret i32 %v
1034}
1035; CHECK-LABEL: test_atomic_rmw_or_32_ignored
1036; Could just "lock or", if we inspect the liveness information first.
1037; Would also need a way to introduce "lock"'edness to binary
1038; operators without introducing overhead on the more common binary ops.
1039; CHECK: mov eax,DWORD PTR
1040; CHECK: or [[REG:e[^a].]]
1041; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}],[[REG]]
1042; CHECK: jne
1043; ARM32-LABEL: test_atomic_rmw_or_32_ignored
1044; ARM32: dmb
1045; ARM32: ldrex
1046; ARM32: orr
1047; ARM32: strex
1048; ARM32: bne
1049; ARM32: dmb
1050; MIPS32-LABEL: test_atomic_rmw_or_32_ignored
1051; MIPS32: sync
1052; MIPS32: ll
1053; MIPS32: or
1054; MIPS32: sc
1055; MIPS32: beq	{{.*}}, $zero, {{.*}}
1056; MIPS32: sync
1057
1058;; and
1059
1060define internal i32 @test_atomic_rmw_and_8(i32 %iptr, i32 %v) {
1061entry:
1062  %trunc = trunc i32 %v to i8
1063  %ptr = inttoptr i32 %iptr to i8*
1064  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 4, i8* %ptr, i8 %trunc, i32 6)
1065  %a_ext = zext i8 %a to i32
1066  ret i32 %a_ext
1067}
1068; CHECK-LABEL: test_atomic_rmw_and_8
1069; CHECK: mov al,BYTE PTR
1070; CHECK: and [[REG:[^a].]]
1071; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],[[REG]]
1072; CHECK: jne
1073; ARM32-LABEL: test_atomic_rmw_and_8
1074; ARM32: dmb
1075; ARM32: ldrexb
1076; ARM32: and
1077; ARM32: strexb
1078; ARM32: bne
1079; ARM32: dmb
1080; MIPS32-LABEL: test_atomic_rmw_and_8
1081; MIPS32: sync
1082; MIPS32: addiu	{{.*}}, $zero, -4
1083; MIPS32: and
1084; MIPS32: andi	{{.*}}, {{.*}}, 3
1085; MIPS32: sll	{{.*}}, {{.*}}, 3
1086; MIPS32: ori	{{.*}}, $zero, 255
1087; MIPS32: sllv
1088; MIPS32: nor
1089; MIPS32: sllv
1090; MIPS32: ll
1091; MIPS32: and
1092; MIPS32: and
1093; MIPS32: and
1094; MIPS32: or
1095; MIPS32: sc
1096; MIPS32: beq	{{.*}}, $zero, {{.*}}
1097; MIPS32: and
1098; MIPS32: srlv
1099; MIPS32: sll	{{.*}}, {{.*}}, 24
1100; MIPS32: sra	{{.*}}, {{.*}}, 24
1101; MIPS32: sync
1102
1103define internal i32 @test_atomic_rmw_and_16(i32 %iptr, i32 %v) {
1104entry:
1105  %trunc = trunc i32 %v to i16
1106  %ptr = inttoptr i32 %iptr to i16*
1107  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %trunc, i32 6)
1108  %a_ext = zext i16 %a to i32
1109  ret i32 %a_ext
1110}
1111; CHECK-LABEL: test_atomic_rmw_and_16
1112; CHECK: mov ax,WORD PTR
1113; CHECK: and
1114; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}]
1115; CHECK: jne
1116; ARM32-LABEL: test_atomic_rmw_and_16
1117; ARM32: dmb
1118; ARM32: ldrexh
1119; ARM32: and
1120; ARM32: strexh
1121; ARM32: bne
1122; ARM32: dmb
1123; MIPS32-LABEL: test_atomic_rmw_and_16
1124; MIPS32: sync
1125; MIPS32: addiu	{{.*}}, $zero, -4
1126; MIPS32: and
1127; MIPS32: andi	{{.*}}, {{.*}}, 3
1128; MIPS32: sll	{{.*}}, {{.*}}, 3
1129; MIPS32: ori	{{.*}}, {{.*}}, 65535
1130; MIPS32: sllv
1131; MIPS32: nor
1132; MIPS32: sllv
1133; MIPS32: ll
1134; MIPS32: and
1135; MIPS32: and
1136; MIPS32: and
1137; MIPS32: or
1138; MIPS32: sc
1139; MIPS32: beq	{{.*}}, $zero, {{.*}}
1140; MIPS32: and
1141; MIPS32: srlv
1142; MIPS32: sll	{{.*}}, {{.*}}, 16
1143; MIPS32: sra	{{.*}}, {{.*}}, 16
1144; MIPS32: sync
1145
1146define internal i32 @test_atomic_rmw_and_32(i32 %iptr, i32 %v) {
1147entry:
1148  %ptr = inttoptr i32 %iptr to i32*
1149  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
1150  ret i32 %a
1151}
1152; CHECK-LABEL: test_atomic_rmw_and_32
1153; CHECK: mov eax,DWORD PTR
1154; CHECK: and
1155; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}]
1156; CHECK: jne
1157; ARM32-LABEL: test_atomic_rmw_and_32
1158; ARM32: dmb
1159; ARM32: ldrex
1160; ARM32: and
1161; ARM32: strex
1162; ARM32: bne
1163; ARM32: dmb
1164; MIPS32-LABEL: test_atomic_rmw_and_32
1165; MIPS32: sync
1166; MIPS32: ll
1167; MIPS32: and
1168; MIPS32: sc
1169; MIPS32: beq	{{.*}}, $zero, {{.*}}
1170; MIPS32: sync
1171
1172define internal i64 @test_atomic_rmw_and_64(i32 %iptr, i64 %v) {
1173entry:
1174  %ptr = inttoptr i32 %iptr to i64*
1175  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 4, i64* %ptr, i64 %v, i32 6)
1176  ret i64 %a
1177}
1178; CHECK-LABEL: test_atomic_rmw_and_64
1179; CHECK: push ebx
1180; CHECK: mov eax,DWORD PTR [{{.*}}]
1181; CHECK: mov edx,DWORD PTR [{{.*}}+0x4]
1182; CHECK: [[LABEL:[^ ]*]]: {{.*}} mov ebx,eax
1183; CHECK: and ebx,{{.*e.[^x]}}
1184; CHECK: mov ecx,edx
1185; CHECK: and ecx,{{.*e.[^x]}}
1186; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}
1187; CHECK: jne [[LABEL]]
1188; ARM32-LABEL: test_atomic_rmw_and_64
1189; ARM32: dmb
1190; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
1191; ARM32: and
1192; ARM32: and
1193; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
1194; ARM32: bne
1195; ARM32: dmb
1196; MIPS32-LABEL: test_atomic_rmw_and_64
1197; MIPS32: sync
1198; MIPS32: jal	__sync_fetch_and_and_8
1199; MIPS32: sync
1200
1201define internal i32 @test_atomic_rmw_and_32_ignored(i32 %iptr, i32 %v) {
1202entry:
1203  %ptr = inttoptr i32 %iptr to i32*
1204  %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %v, i32 6)
1205  ret i32 %v
1206}
1207; CHECK-LABEL: test_atomic_rmw_and_32_ignored
1208; Could just "lock and"
1209; CHECK: mov eax,DWORD PTR
1210; CHECK: and
1211; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}]
1212; CHECK: jne
1213; ARM32-LABEL: test_atomic_rmw_and_32_ignored
1214; ARM32: dmb
1215; ARM32: ldrex
1216; ARM32: and
1217; ARM32: strex
1218; ARM32: bne
1219; ARM32: dmb
1220; MIPS32-LABEL: test_atomic_rmw_and_32_ignored
1221; MIPS32: sync
1222; MIPS32: ll
1223; MIPS32: and
1224; MIPS32: sc
1225; MIPS32: beq	{{.*}}, $zero, {{.*}}
1226; MIPS32: sync
1227
1228;; xor
1229
1230define internal i32 @test_atomic_rmw_xor_8(i32 %iptr, i32 %v) {
1231entry:
1232  %trunc = trunc i32 %v to i8
1233  %ptr = inttoptr i32 %iptr to i8*
1234  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 5, i8* %ptr, i8 %trunc, i32 6)
1235  %a_ext = zext i8 %a to i32
1236  ret i32 %a_ext
1237}
1238; CHECK-LABEL: test_atomic_rmw_xor_8
1239; CHECK: mov al,BYTE PTR
1240; CHECK: xor [[REG:[^a].]]
1241; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],[[REG]]
1242; CHECK: jne
1243; ARM32-LABEL: test_atomic_rmw_xor_8
1244; ARM32: dmb
1245; ARM32: ldrexb
1246; ARM32: eor
1247; ARM32: strexb
1248; ARM32: bne
1249; ARM32: dmb
1250; MIPS32-LABEL: test_atomic_rmw_xor_8
1251; MIPS32: sync
1252; MIPS32: addiu	{{.*}}, $zero, -4
1253; MIPS32: and
1254; MIPS32: andi	{{.*}}, {{.*}}, 3
1255; MIPS32: sll	{{.*}}, {{.*}}, 3
1256; MIPS32: ori	{{.*}}, $zero, 255
1257; MIPS32: sllv
1258; MIPS32: nor
1259; MIPS32: sllv
1260; MIPS32: ll
1261; MIPS32: xor
1262; MIPS32: and
1263; MIPS32: and
1264; MIPS32: or
1265; MIPS32: sc
1266; MIPS32: beq	{{.*}}, $zero, {{.*}}
1267; MIPS32: and
1268; MIPS32: srlv
1269; MIPS32: sll	{{.*}}, {{.*}}, 24
1270; MIPS32: sra	{{.*}}, {{.*}}, 24
1271; MIPS32: sync
1272
1273define internal i32 @test_atomic_rmw_xor_16(i32 %iptr, i32 %v) {
1274entry:
1275  %trunc = trunc i32 %v to i16
1276  %ptr = inttoptr i32 %iptr to i16*
1277  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %trunc, i32 6)
1278  %a_ext = zext i16 %a to i32
1279  ret i32 %a_ext
1280}
1281; CHECK-LABEL: test_atomic_rmw_xor_16
1282; CHECK: mov ax,WORD PTR
1283; CHECK: xor
1284; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}]
1285; CHECK: jne
1286; ARM32-LABEL: test_atomic_rmw_xor_16
1287; ARM32: dmb
1288; ARM32: ldrexh
1289; ARM32: eor
1290; ARM32: strexh
1291; ARM32: bne
1292; ARM32: dmb
1293; MIPS32-LABEL: test_atomic_rmw_xor_16
1294; MIPS32: sync
1295; MIPS32: addiu	{{.*}}, $zero, -4
1296; MIPS32: and
1297; MIPS32: andi	{{.*}}, {{.*}}, 3
1298; MIPS32: sll	{{.*}}, {{.*}}, 3
1299; MIPS32: ori	{{.*}}, {{.*}}, 65535
1300; MIPS32: sllv
1301; MIPS32: nor
1302; MIPS32: sllv
1303; MIPS32: ll
1304; MIPS32: xor
1305; MIPS32: and
1306; MIPS32: and
1307; MIPS32: or
1308; MIPS32: sc
1309; MIPS32: beq	{{.*}}, $zero, {{.*}}
1310; MIPS32: and
1311; MIPS32: srlv
1312; MIPS32: sll	{{.*}}, {{.*}}, 16
1313; MIPS32: sra	{{.*}}, {{.*}}, 16
1314; MIPS32: sync
1315
1316define internal i32 @test_atomic_rmw_xor_32(i32 %iptr, i32 %v) {
1317entry:
1318  %ptr = inttoptr i32 %iptr to i32*
1319  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
1320  ret i32 %a
1321}
1322; CHECK-LABEL: test_atomic_rmw_xor_32
1323; CHECK: mov eax,DWORD PTR
1324; CHECK: xor
1325; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}]
1326; CHECK: jne
1327; ARM32-LABEL: test_atomic_rmw_xor_32
1328; ARM32: dmb
1329; ARM32: ldrex
1330; ARM32: eor
1331; ARM32: strex
1332; ARM32: bne
1333; ARM32: dmb
1334; MIPS32-LABEL: test_atomic_rmw_xor_32
1335; MIPS32: sync
1336; MIPS32: ll
1337; MIPS32: xor
1338; MIPS32: sc
1339; MIPS32: beq	{{.*}}, $zero, {{.*}}
1340; MIPS32: sync
1341
1342define internal i64 @test_atomic_rmw_xor_64(i32 %iptr, i64 %v) {
1343entry:
1344  %ptr = inttoptr i32 %iptr to i64*
1345  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 5, i64* %ptr, i64 %v, i32 6)
1346  ret i64 %a
1347}
1348; CHECK-LABEL: test_atomic_rmw_xor_64
1349; CHECK: push ebx
1350; CHECK: mov eax,DWORD PTR [{{.*}}]
1351; CHECK: mov edx,DWORD PTR [{{.*}}+0x4]
1352; CHECK: mov ebx,eax
1353; CHECK: or ebx,{{.*e.[^x]}}
1354; CHECK: mov ecx,edx
1355; CHECK: or ecx,{{.*e.[^x]}}
1356; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}
1357; CHECK: jne
1358; ARM32-LABEL: test_atomic_rmw_xor_64
1359; ARM32: dmb
1360; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
1361; ARM32: eor
1362; ARM32: eor
1363; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
1364; ARM32: bne
1365; ARM32: dmb
1366; MIPS32-LABEL: test_atomic_rmw_xor_64
1367; MIPS32: sync
1368; MIPS32: jal	__sync_fetch_and_xor_8
1369; MIPS32: sync
1370
1371define internal i32 @test_atomic_rmw_xor_32_ignored(i32 %iptr, i32 %v) {
1372entry:
1373  %ptr = inttoptr i32 %iptr to i32*
1374  %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %v, i32 6)
1375  ret i32 %v
1376}
1377; CHECK-LABEL: test_atomic_rmw_xor_32_ignored
1378; CHECK: mov eax,DWORD PTR
1379; CHECK: xor
1380; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}]
1381; CHECK: jne
1382; ARM32-LABEL: test_atomic_rmw_xor_32_ignored
1383; ARM32: dmb
1384; ARM32: ldrex
1385; ARM32: eor
1386; ARM32: strex
1387; ARM32: bne
1388; ARM32: dmb
1389; MIPS32-LABEL: test_atomic_rmw_xor_32_ignored
1390; MIPS32: sync
1391; MIPS32: ll
1392; MIPS32: xor
1393; MIPS32: sc
1394; MIPS32: beq	{{.*}}, $zero, {{.*}}
1395; MIPS32: sync
1396
1397;; exchange
1398
1399define internal i32 @test_atomic_rmw_xchg_8(i32 %iptr, i32 %v) {
1400entry:
1401  %trunc = trunc i32 %v to i8
1402  %ptr = inttoptr i32 %iptr to i8*
1403  %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 6, i8* %ptr, i8 %trunc, i32 6)
1404  %a_ext = zext i8 %a to i32
1405  ret i32 %a_ext
1406}
1407; CHECK-LABEL: test_atomic_rmw_xchg_8
1408; CHECK: xchg BYTE PTR {{.*}},[[REG:.*]]
1409; ARM32-LABEL: test_atomic_rmw_xchg_8
1410; ARM32: dmb
1411; ARM32: ldrexb
1412; ARM32: strexb
1413; ARM32: cmp
1414; ARM32: bne
1415; ARM32: dmb
1416; MIPS32-LABEL: test_atomic_rmw_xchg_8
1417; MIPS32: sync
1418; MIPS32: addiu	{{.*}}, $zero, -4
1419; MIPS32: and
1420; MIPS32: andi	{{.*}}, {{.*}}, 3
1421; MIPS32: sll	{{.*}}, {{.*}}, 3
1422; MIPS32: ori	{{.*}}, $zero, 255
1423; MIPS32: sllv
1424; MIPS32: nor
1425; MIPS32: sllv
1426; MIPS32: ll
1427; MIPS32: and
1428; MIPS32: or
1429; MIPS32: sc
1430; MIPS32: beq	{{.*}}, $zero, {{.*}}
1431; MIPS32: and
1432; MIPS32: srlv
1433; MIPS32: sll	{{.*}}, {{.*}}, 24
1434; MIPS32: sra	{{.*}}, {{.*}}, 24
1435; MIPS32: sync
1436
1437define internal i32 @test_atomic_rmw_xchg_16(i32 %iptr, i32 %v) {
1438entry:
1439  %trunc = trunc i32 %v to i16
1440  %ptr = inttoptr i32 %iptr to i16*
1441  %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 6, i16* %ptr, i16 %trunc, i32 6)
1442  %a_ext = zext i16 %a to i32
1443  ret i32 %a_ext
1444}
1445; CHECK-LABEL: test_atomic_rmw_xchg_16
1446; CHECK: xchg WORD PTR {{.*}},[[REG:.*]]
1447; ARM32-LABEL: test_atomic_rmw_xchg_16
1448; ARM32: dmb
1449; ARM32: ldrexh
1450; ARM32: strexh
1451; ARM32: cmp
1452; ARM32: bne
1453; ARM32: dmb
1454; MIPS32-LABEL: test_atomic_rmw_xchg_16
1455; MIPS32: sync
1456; MIPS32: addiu	{{.*}}, $zero, -4
1457; MIPS32: and
1458; MIPS32: andi	{{.*}}, {{.*}}, 3
1459; MIPS32: sll	{{.*}}, {{.*}}, 3
1460; MIPS32: ori	{{.*}}, {{.*}}, 65535
1461; MIPS32: sllv
1462; MIPS32: nor
1463; MIPS32: sllv
1464; MIPS32: ll
1465; MIPS32: and
1466; MIPS32: or
1467; MIPS32: sc
1468; MIPS32: beq	{{.*}}, $zero, {{.*}}
1469; MIPS32: and
1470; MIPS32: srlv
1471; MIPS32: sll	{{.*}}, {{.*}}, 16
1472; MIPS32: sra	{{.*}}, {{.*}}, 16
1473; MIPS32: sync
1474
1475define internal i32 @test_atomic_rmw_xchg_32(i32 %iptr, i32 %v) {
1476entry:
1477  %ptr = inttoptr i32 %iptr to i32*
1478  %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
1479  ret i32 %a
1480}
1481; CHECK-LABEL: test_atomic_rmw_xchg_32
1482; CHECK: xchg DWORD PTR {{.*}},[[REG:.*]]
1483; ARM32-LABEL: test_atomic_rmw_xchg_32
1484; ARM32: dmb
1485; ARM32: ldrex
1486; ARM32: strex
1487; ARM32: cmp
1488; ARM32: bne
1489; ARM32: dmb
1490; MIPS32-LABEL: test_atomic_rmw_xchg_32
1491; MIPS32: sync
1492; MIPS32: ll
1493; MIPS32: move
1494; MIPS32: sc
1495; MIPS32: beq	{{.*}}, $zero, {{.*}}
1496; MIPS32: sync
1497
1498define internal i64 @test_atomic_rmw_xchg_64(i32 %iptr, i64 %v) {
1499entry:
1500  %ptr = inttoptr i32 %iptr to i64*
1501  %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 6, i64* %ptr, i64 %v, i32 6)
1502  ret i64 %a
1503}
1504; CHECK-LABEL: test_atomic_rmw_xchg_64
1505; CHECK: push ebx
1506; CHECK-DAG: mov edx
1507; CHECK-DAG: mov eax
1508; CHECK-DAG: mov ecx
1509; CHECK-DAG: mov ebx
1510; CHECK: lock cmpxchg8b QWORD PTR [{{e.[^x]}}
1511; CHECK: jne
1512; ARM32-LABEL: test_atomic_rmw_xchg_64
1513; ARM32: dmb
1514; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
1515; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
1516; ARM32: cmp
1517; ARM32: bne
1518; ARM32: dmb
1519; MIPS32-LABEL: test_atomic_rmw_xchg_64
1520; MIPS32: sync
1521; MIPS32: jal	__sync_lock_test_and_set_8
1522; MIPS32: sync
1523
1524define internal i32 @test_atomic_rmw_xchg_32_ignored(i32 %iptr, i32 %v) {
1525entry:
1526  %ptr = inttoptr i32 %iptr to i32*
1527  %ignored = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %v, i32 6)
1528  ret i32 %v
1529}
1530; In this case, ignoring the return value doesn't help. The xchg is
1531; used to do an atomic store.
1532; CHECK-LABEL: test_atomic_rmw_xchg_32_ignored
1533; CHECK: xchg DWORD PTR {{.*}},[[REG:.*]]
1534; ARM32-LABEL: test_atomic_rmw_xchg_32_ignored
1535; ARM32: dmb
1536; ARM32: ldrex
1537; ARM32: strex
1538; ARM32: cmp
1539; ARM32: bne
1540; ARM32: dmb
1541; MIPS32-LABEL: test_atomic_rmw_xchg_32_ignored
1542; MIPS32: sync
1543; MIPS32: ll
1544; MIPS32: move
1545; MIPS32: sc
1546; MIPS32: beq	{{.*}}, $zero, {{.*}}
1547; MIPS32: sync
1548
1549;;;; Cmpxchg
1550
1551define internal i32 @test_atomic_cmpxchg_8(i32 %iptr, i32 %expected,
1552                                           i32 %desired) {
1553entry:
1554  %trunc_exp = trunc i32 %expected to i8
1555  %trunc_des = trunc i32 %desired to i8
1556  %ptr = inttoptr i32 %iptr to i8*
1557  %old = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %trunc_exp,
1558                                              i8 %trunc_des, i32 6, i32 6)
1559  %old_ext = zext i8 %old to i32
1560  ret i32 %old_ext
1561}
1562; CHECK-LABEL: test_atomic_cmpxchg_8
1563; CHECK: mov eax,{{.*}}
1564; Need to check that eax isn't used as the address register or the desired.
1565; since it is already used as the *expected* register.
1566; CHECK: lock cmpxchg BYTE PTR [e{{[^a].}}],{{[^a]}}l
1567; ARM32-LABEL: test_atomic_cmpxchg_8
1568; ARM32: dmb
1569; ARM32: ldrexb [[V:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1570; ARM32: lsl [[VV:r[0-9]+]], [[V]], #24
1571; ARM32: cmp [[VV]], {{r[0-9]+}}, lsl #24
1572; ARM32: movne [[SUCCESS:r[0-9]+]],
1573; ARM32: strexbeq [[SUCCESS]], {{r[0-9]+}}, {{[[]}}[[A]]{{[]]}}
1574; ARM32: cmp [[SUCCESS]], #0
1575; ARM32: bne
1576; ARM32: dmb
1577; MIPS32-LABEL: test_atomic_cmpxchg_8
1578; MIPS32: addiu	{{.*}}, $zero, -4
1579; MIPS32: and
1580; MIPS32: andi	{{.*}}, {{.*}}, 3
1581; MIPS32: sll	{{.*}}, {{.*}}, 3
1582; MIPS32: ori	{{.*}}, $zero, 255
1583; MIPS32: sllv
1584; MIPS32: nor
1585; MIPS32: andi	{{.*}}, {{.*}}, 255
1586; MIPS32: sllv
1587; MIPS32: andi	{{.*}}, {{.*}}, 255
1588; MIPS32: sllv
1589; MIPS32: sync
1590; MIPS32: ll
1591; MIPS32: and
1592; MIPS32: bne
1593; MIPS32: and
1594; MIPS32: or
1595; MIPS32: sc
1596; MIPS32: beq	$zero, {{.*}}, {{.*}}
1597; MIPS32: srlv
1598; MIPS32: sll	{{.*}}, {{.*}}, 24
1599; MIPS32: sra	{{.*}}, {{.*}}, 24
1600; MIPS32: sync
1601
1602define internal i32 @test_atomic_cmpxchg_16(i32 %iptr, i32 %expected,
1603                                            i32 %desired) {
1604entry:
1605  %trunc_exp = trunc i32 %expected to i16
1606  %trunc_des = trunc i32 %desired to i16
1607  %ptr = inttoptr i32 %iptr to i16*
1608  %old = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %trunc_exp,
1609                                               i16 %trunc_des, i32 6, i32 6)
1610  %old_ext = zext i16 %old to i32
1611  ret i32 %old_ext
1612}
1613; CHECK-LABEL: test_atomic_cmpxchg_16
1614; CHECK: mov {{ax|eax}},{{.*}}
1615; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],{{[^a]}}x
1616; ARM32-LABEL: test_atomic_cmpxchg_16
1617; ARM32: dmb
1618; ARM32: ldrexh [[V:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1619; ARM32: lsl [[VV:r[0-9]+]], [[V]], #16
1620; ARM32: cmp [[VV]], {{r[0-9]+}}, lsl #16
1621; ARM32: movne [[SUCCESS:r[0-9]+]],
1622; ARM32: strexheq [[SUCCESS]], {{r[0-9]+}}, {{[[]}}[[A]]{{[]]}}
1623; ARM32: cmp [[SUCCESS]], #0
1624; ARM32: bne
1625; ARM32: dmb
1626; MIPS32-LABEL: test_atomic_cmpxchg_16
1627; MIPS32: addiu	{{.*}}, $zero, -4
1628; MIPS32: and
1629; MIPS32: andi	{{.*}}, {{.*}}, 3
1630; MIPS32: sll	{{.*}}, {{.*}}, 3
1631; MIPS32: ori	{{.*}}, {{.*}}, 65535
1632; MIPS32: sllv
1633; MIPS32: nor
1634; MIPS32: andi	{{.*}}, {{.*}}, 65535
1635; MIPS32: sllv
1636; MIPS32: andi	{{.*}}, {{.*}}, 65535
1637; MIPS32: sllv
1638; MIPS32: sync
1639; MIPS32: ll
1640; MIPS32: and
1641; MIPS32: bne
1642; MIPS32: and
1643; MIPS32: or
1644; MIPS32: sc
1645; MIPS32: beq	$zero, {{.*}}, {{.*}}
1646; MIPS32: srlv
1647; MIPS32: sll	{{.*}}, {{.*}}, 16
1648; MIPS32: sra	{{.*}}, {{.*}}, 16
1649; MIPS32: sync
1650
1651define internal i32 @test_atomic_cmpxchg_32(i32 %iptr, i32 %expected,
1652                                            i32 %desired) {
1653entry:
1654  %ptr = inttoptr i32 %iptr to i32*
1655  %old = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
1656                                               i32 %desired, i32 6, i32 6)
1657  ret i32 %old
1658}
1659; CHECK-LABEL: test_atomic_cmpxchg_32
1660; CHECK: mov eax,{{.*}}
1661; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}],e{{[^a]}}
1662; ARM32-LABEL: test_atomic_cmpxchg_32
1663; ARM32: dmb
1664; ARM32: ldrex [[V:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1665; ARM32: cmp [[V]], {{r[0-9]+}}
1666; ARM32: movne [[SUCCESS:r[0-9]+]],
1667; ARM32: strexeq [[SUCCESS]], {{r[0-9]+}}, {{[[]}}[[A]]{{[]]}}
1668; ARM32: cmp [[SUCCESS]], #0
1669; ARM32: bne
1670; ARM32: dmb
1671; MIPS32-LABEL: test_atomic_cmpxchg_32
1672; MIPS32: sync
1673; MIPS32: ll
1674; MIPS32: bne
1675; MIPS32: sc
1676; MIPS32: beq	{{.*}}, $zero, {{.*}}
1677; MIPS32: sync
1678
1679define internal i64 @test_atomic_cmpxchg_64(i32 %iptr, i64 %expected,
1680                                            i64 %desired) {
1681entry:
1682  %ptr = inttoptr i32 %iptr to i64*
1683  %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
1684                                               i64 %desired, i32 6, i32 6)
1685  ret i64 %old
1686}
1687; CHECK-LABEL: test_atomic_cmpxchg_64
1688; CHECK: push ebx
1689; CHECK-DAG: mov edx
1690; CHECK-DAG: mov eax
1691; CHECK-DAG: mov ecx
1692; CHECK-DAG: mov ebx
1693; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}+0x0]
1694; edx and eax are already the return registers, so they don't actually
1695; need to be reshuffled via movs. The next test stores the result
1696; somewhere, so in that case they do need to be mov'ed.
1697; ARM32-LABEL: test_atomic_cmpxchg_64
1698; ARM32: dmb
1699; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1700; ARM32: cmp [[V0]], {{r[0-9]+}}
1701; ARM32: cmpeq [[V1]], {{r[0-9]+}}
1702; ARM32: movne [[SUCCESS:r[0-9]+]],
1703; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[A]]{{[]]}}
1704; ARM32: cmp [[SUCCESS]], #0
1705; ARM32: bne
1706; ARM32: dmb
1707; MIPS32-LABEL: test_atomic_cmpxchg_64
1708; MIPS32: sync
1709; MIPS32: jal	__sync_val_compare_and_swap_8
1710; MIPS32: sync
1711
1712
1713define internal i64 @test_atomic_cmpxchg_64_undef(i32 %iptr, i64 %desired) {
1714entry:
1715  %ptr = inttoptr i32 %iptr to i64*
1716  %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 undef,
1717                                               i64 %desired, i32 6, i32 6)
1718  ret i64 %old
1719}
1720; CHECK-LABEL: test_atomic_cmpxchg_64_undef
1721; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}+0x0]
1722; ARM32-LABEL: test_atomic_cmpxchg_64_undef
1723; ARM32: mov r{{[0-9]+}}, #0
1724; ARM32: mov r{{[0-9]+}}, #0
1725; ARM32: dmb
1726; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1727; ARM32: cmp [[V0]], {{r[0-9]+}}
1728; ARM32: cmpeq [[V1]], {{r[0-9]+}}
1729; ARM32: movne [[SUCCESS:r[0-9]+]],
1730; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[A]]{{[]]}}
1731; ARM32: cmp [[SUCCESS]], #0
1732; ARM32: bne
1733; ARM32: dmb
1734; MIPS32-LABEL: test_atomic_cmpxchg_64_undef
1735; MIPS32: sync
1736; MIPS32: jal	__sync_val_compare_and_swap_8
1737; MIPS32: sync
1738
1739; Test a case where %old really does need to be copied out of edx:eax.
1740define internal void @test_atomic_cmpxchg_64_store(
1741    i32 %ret_iptr, i32 %iptr, i64 %expected, i64 %desired) {
1742entry:
1743  %ptr = inttoptr i32 %iptr to i64*
1744  %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
1745                                                i64 %desired, i32 6, i32 6)
1746  %__6 = inttoptr i32 %ret_iptr to i64*
1747  store i64 %old, i64* %__6, align 1
1748  ret void
1749}
1750; CHECK-LABEL: test_atomic_cmpxchg_64_store
1751; CHECK: push ebx
1752; CHECK-DAG: mov edx
1753; CHECK-DAG: mov eax
1754; CHECK-DAG: mov ecx
1755; CHECK-DAG: mov ebx
1756; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}
1757; CHECK-DAG: mov {{.*}},edx
1758; CHECK-DAG: mov {{.*}},eax
1759; ARM32-LABEL: test_atomic_cmpxchg_64_store
1760; ARM32: dmb
1761; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1762; ARM32: cmp [[V0]], {{r[0-9]+}}
1763; ARM32: cmpeq [[V1]], {{r[0-9]+}}
1764; ARM32: movne [[SUCCESS:r[0-9]+]],
1765; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[A]]{{[]]}}
1766; ARM32: cmp [[SUCCESS]], #0
1767; ARM32: bne
1768; ARM32: dmb
1769; ARM32: str
1770; ARM32: str
1771; MIPS32-LABEL: test_atomic_cmpxchg_64_store
1772; MIPS32: sync
1773; MIPS32: jal	__sync_val_compare_and_swap_8
1774; MIPS32: sync
1775
1776
1777; Test with some more register pressure. When we have an alloca, ebp is
1778; used to manage the stack frame, so it cannot be used as a register either.
1779define internal i64 @test_atomic_cmpxchg_64_alloca(i32 %iptr, i64 %expected,
1780                                                   i64 %desired) {
1781entry:
1782  br label %eblock  ; Disable alloca optimization
1783eblock:
1784  %alloca_ptr = alloca i8, i32 16, align 16
1785  %ptr = inttoptr i32 %iptr to i64*
1786  %old = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
1787                                                i64 %desired, i32 6, i32 6)
1788  store i8 0, i8* %alloca_ptr, align 1
1789  store i8 1, i8* %alloca_ptr, align 1
1790  store i8 2, i8* %alloca_ptr, align 1
1791  store i8 3, i8* %alloca_ptr, align 1
1792  %__6 = ptrtoint i8* %alloca_ptr to i32
1793  call void @use_ptr(i32 %__6)
1794  ret i64 %old
1795}
1796; CHECK-LABEL: test_atomic_cmpxchg_64_alloca
1797; CHECK: push ebx
1798; CHECK-DAG: mov edx
1799; CHECK-DAG: mov eax
1800; CHECK-DAG: mov ecx
1801; CHECK-DAG: mov ebx
1802; Ptr cannot be eax, ebx, ecx, or edx (used up for the expected and desired).
1803; It also cannot be ebp since we use that for alloca. Also make sure it's
1804; not esp, since that's the stack pointer and mucking with it will break
1805; the later use_ptr function call.
1806; That pretty much leaves esi, or edi as the only viable registers.
1807; CHECK: lock cmpxchg8b QWORD PTR [e{{[ds]}}i]
1808; CHECK: call {{.*}} R_{{.*}} use_ptr
1809; ARM32-LABEL: test_atomic_cmpxchg_64_alloca
1810; ARM32: dmb
1811; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1812; ARM32: cmp [[V0]], {{r[0-9]+}}
1813; ARM32: cmpeq [[V1]], {{r[0-9]+}}
1814; ARM32: movne [[SUCCESS:r[0-9]+]],
1815; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[A]]{{[]]}}
1816; ARM32: cmp [[SUCCESS]], #0
1817; ARM32: bne
1818; ARM32: dmb
1819; MIPS32-LABEL: test_atomic_cmpxchg_64_alloca
1820; MIPS32: sync
1821; MIPS32: jal	__sync_val_compare_and_swap_8
1822; MIPS32: sync
1823
1824define internal i32 @test_atomic_cmpxchg_32_ignored(i32 %iptr, i32 %expected,
1825                                                    i32 %desired) {
1826entry:
1827  %ptr = inttoptr i32 %iptr to i32*
1828  %ignored = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %expected,
1829                                                    i32 %desired, i32 6, i32 6)
1830  ret i32 0
1831}
1832; CHECK-LABEL: test_atomic_cmpxchg_32_ignored
1833; CHECK: mov eax,{{.*}}
1834; CHECK: lock cmpxchg DWORD PTR [e{{[^a].}}]
1835; ARM32-LABEL: test_atomic_cmpxchg_32_ignored
1836; ARM32: dmb
1837; ARM32: ldrex [[V:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1838; ARM32: cmp [[V]], {{r[0-9]+}}
1839; ARM32: movne [[SUCCESS:r[0-9]+]],
1840; ARM32: strexeq [[SUCCESS]]
1841; ARM32: cmp [[SUCCESS]], #0
1842; ARM32: bne
1843; ARM32: dmb
1844; MIPS32-LABEL: test_atomic_cmpxchg_32_ignored
1845; MIPS32: sync
1846; MIPS32: ll
1847; MIPS32: bne
1848; MIPS32: sc
1849; MIPS32: beq	{{.*}}, $zero, {{.*}}
1850; MIPS32: sync
1851
1852define internal i64 @test_atomic_cmpxchg_64_ignored(i32 %iptr, i64 %expected,
1853                                                    i64 %desired) {
1854entry:
1855  %ptr = inttoptr i32 %iptr to i64*
1856  %ignored = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %expected,
1857                                                    i64 %desired, i32 6, i32 6)
1858  ret i64 0
1859}
1860; CHECK-LABEL: test_atomic_cmpxchg_64_ignored
1861; CHECK: push ebx
1862; CHECK-DAG: mov edx
1863; CHECK-DAG: mov eax
1864; CHECK-DAG: mov ecx
1865; CHECK-DAG: mov ebx
1866; CHECK: lock cmpxchg8b QWORD PTR [e{{.[^x]}}+0x0]
1867; ARM32-LABEL: test_atomic_cmpxchg_64_ignored
1868; ARM32: dmb
1869; ARM32: ldrexd [[V0:r[0-9]+]], [[V1:r[0-9]+]], {{[[]}}[[A:r[0-9]+]]{{[]]}}
1870; ARM32: cmp [[V0]], {{r[0-9]+}}
1871; ARM32: cmpeq [[V1]], {{r[0-9]+}}
1872; ARM32: movne [[SUCCESS:r[0-9]+]],
1873; ARM32: strexdeq [[SUCCESS]], r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
1874; ARM32: cmp [[SUCCESS]], #0
1875; ARM32: bne
1876; ARM32: dmb
1877; MIPS32-LABEL: test_atomic_cmpxchg_64_ignored
1878; MIPS32: sync
1879; MIPS32: jal	__sync_val_compare_and_swap_8
1880; MIPS32: sync
1881
1882;;;; Fence and is-lock-free.
1883
1884define internal void @test_atomic_fence() {
1885entry:
1886  call void @llvm.nacl.atomic.fence(i32 6)
1887  ret void
1888}
1889; CHECK-LABEL: test_atomic_fence
1890; CHECK: mfence
1891; ARM32-LABEL: test_atomic_fence
1892; ARM32: dmb sy
1893; MIPS32-LABEL: test_atomic_fence
1894; MIPS32: sync
1895
1896define internal void @test_atomic_fence_all() {
1897entry:
1898  call void @llvm.nacl.atomic.fence.all()
1899  ret void
1900}
1901; CHECK-LABEL: test_atomic_fence_all
1902; CHECK: mfence
1903; ARM32-LABEL: test_atomic_fence_all
1904; ARM32: dmb sy
1905; MIPS32-LABEL: test_atomic_fence_all
1906; MIPS32: sync
1907
1908define internal i32 @test_atomic_is_lock_free(i32 %iptr) {
1909entry:
1910  %ptr = inttoptr i32 %iptr to i8*
1911  %i = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr)
1912  %r = zext i1 %i to i32
1913  ret i32 %r
1914}
1915; CHECK-LABEL: test_atomic_is_lock_free
1916; CHECK: mov {{.*}},0x1
1917; ARM32-LABEL: test_atomic_is_lock_free
1918; ARM32: mov {{.*}}, #1
1919; MIPS32-LABEL: test_atomic_is_lock_free
1920; MIPS32: addiu {{.*}}, $zero, 1
1921
1922define internal i32 @test_not_lock_free(i32 %iptr) {
1923entry:
1924  %ptr = inttoptr i32 %iptr to i8*
1925  %i = call i1 @llvm.nacl.atomic.is.lock.free(i32 7, i8* %ptr)
1926  %r = zext i1 %i to i32
1927  ret i32 %r
1928}
1929; CHECK-LABEL: test_not_lock_free
1930; CHECK: mov {{.*}},0x0
1931; ARM32-LABEL: test_not_lock_free
1932; ARM32: mov {{.*}}, #0
1933; MIPS32-LABEL: test_not_lock_free
1934; MIPS32: addiu {{.*}}, $zero, 0
1935
1936define internal i32 @test_atomic_is_lock_free_ignored(i32 %iptr) {
1937entry:
1938  %ptr = inttoptr i32 %iptr to i8*
1939  %ignored = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr)
1940  ret i32 0
1941}
1942; CHECK-LABEL: test_atomic_is_lock_free_ignored
1943; CHECK: mov {{.*}},0x0
1944; This can get optimized out, because it's side-effect-free.
1945; O2-LABEL: test_atomic_is_lock_free_ignored
1946; O2-NOT: mov {{.*}}, 1
1947; O2: mov {{.*}},0x0
1948; ARM32O2-LABEL: test_atomic_is_lock_free_ignored
1949; ARM32O2-NOT: mov {{.*}}, #1
1950; ARM32O2: mov {{.*}}, #0
1951; MIPS32O2-LABEL: test_atomic_is_lock_free
1952; MIPS32O2-NOT: addiu {{.*}}, $zero, 1
1953; MIPS32O2: addiu {{.*}}, $zero, 0
1954
1955; TODO(jvoung): at some point we can take advantage of the
1956; fact that nacl.atomic.is.lock.free will resolve to a constant
1957; (which adds DCE opportunities). Once we optimize, the test expectations
1958; for this case should change.
1959define internal i32 @test_atomic_is_lock_free_can_dce(i32 %iptr, i32 %x,
1960                                                      i32 %y) {
1961entry:
1962  %ptr = inttoptr i32 %iptr to i8*
1963  %i = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr)
1964  %i_ext = zext i1 %i to i32
1965  %cmp = icmp eq i32 %i_ext, 1
1966  br i1 %cmp, label %lock_free, label %not_lock_free
1967lock_free:
1968  ret i32 %i_ext
1969
1970not_lock_free:
1971  %z = add i32 %x, %y
1972  ret i32 %z
1973}
1974; CHECK-LABEL: test_atomic_is_lock_free_can_dce
1975; CHECK: mov {{.*}},0x1
1976; CHECK: ret
1977; CHECK: add
1978; CHECK: ret
1979
1980; Test the liveness / register allocation properties of the xadd instruction.
1981; Make sure we model that the Src register is modified and therefore it can't
1982; share a register with an overlapping live range, even if the result of the
1983; xadd instruction is unused.
1984define internal void @test_xadd_regalloc() {
1985entry:
1986  br label %body
1987body:
1988  %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ]
1989  %g = bitcast [4 x i8]* @SzGlobal32 to i32*
1990  %unused = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %g, i32 %i, i32 6)
1991  %i_plus_1 = add i32 %i, 1
1992  %cmp = icmp eq i32 %i_plus_1, 1001
1993  br i1 %cmp, label %done, label %body
1994done:
1995  ret void
1996}
1997; O2-LABEL: test_xadd_regalloc
1998;;; Some register will be used in the xadd instruction.
1999; O2: lock xadd DWORD PTR {{.*}},[[REG:e..]]
2000;;; Make sure that register isn't used again, e.g. as the induction variable.
2001; O2-NOT: ,[[REG]]
2002; O2: ret
2003
2004; Do the same test for the xchg instruction instead of xadd.
2005define internal void @test_xchg_regalloc() {
2006entry:
2007  br label %body
2008body:
2009  %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ]
2010  %g = bitcast [4 x i8]* @SzGlobal32 to i32*
2011  %unused = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %g, i32 %i, i32 6)
2012  %i_plus_1 = add i32 %i, 1
2013  %cmp = icmp eq i32 %i_plus_1, 1001
2014  br i1 %cmp, label %done, label %body
2015done:
2016  ret void
2017}
2018; O2-LABEL: test_xchg_regalloc
2019;;; Some register will be used in the xchg instruction.
2020; O2: xchg DWORD PTR {{.*}},[[REG:e..]]
2021;;; Make sure that register isn't used again, e.g. as the induction variable.
2022; O2-NOT: ,[[REG]]
2023; O2: ret
2024
2025; Same test for cmpxchg.
2026define internal void @test_cmpxchg_regalloc() {
2027entry:
2028  br label %body
2029body:
2030  %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ]
2031  %g = bitcast [4 x i8]* @SzGlobal32 to i32*
2032  %unused = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %g, i32 %i, i32 %i, i32 6, i32 6)
2033  %i_plus_1 = add i32 %i, 1
2034  %cmp = icmp eq i32 %i_plus_1, 1001
2035  br i1 %cmp, label %done, label %body
2036done:
2037  ret void
2038}
2039; O2-LABEL: test_cmpxchg_regalloc
2040;;; eax and some other register will be used in the cmpxchg instruction.
2041; O2: lock cmpxchg DWORD PTR {{.*}},[[REG:e..]]
2042;;; Make sure eax isn't used again, e.g. as the induction variable.
2043; O2-NOT: ,eax
2044; O2: ret
2045
2046; Same test for cmpxchg8b.
2047define internal void @test_cmpxchg8b_regalloc() {
2048entry:
2049  br label %body
2050body:
2051  %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ]
2052  %g = bitcast [8 x i8]* @SzGlobal64 to i64*
2053  %i_64 = zext i32 %i to i64
2054  %unused = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %g, i64 %i_64, i64 %i_64, i32 6, i32 6)
2055  %i_plus_1 = add i32 %i, 1
2056  %cmp = icmp eq i32 %i_plus_1, 1001
2057  br i1 %cmp, label %done, label %body
2058done:
2059  ret void
2060}
2061; O2-LABEL: test_cmpxchg8b_regalloc
2062;;; eax and some other register will be used in the cmpxchg instruction.
2063; O2: lock cmpxchg8b QWORD PTR
2064;;; Make sure eax/ecx/edx/ebx aren't used again, e.g. as the induction variable.
2065; O2-NOT: ,{{eax|ecx|edx|ebx}}
2066; O2: pop ebx
2067