1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix FAST_INC
3; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix SLOW_INC
5
6; This file checks that atomic (non-seq_cst) stores of immediate values are
7; done in one mov instruction and not 2. More precisely, it makes sure that the
8; immediate is not first copied uselessly into a register.
9
10; Similarily, it checks that a binary operation of an immediate with an atomic
11; variable that is stored back in that variable is done as a single instruction.
12; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
13; should be just an add instruction, instead of loading x into a register, doing
14; an add and storing the result back.
15; The binary operations supported are currently add, and, or, xor.
16; sub is not supported because they are translated by an addition of the
17; negated immediate.
18;
19; We also check the same patterns:
20; - For inc/dec.
21; - For register instead of immediate operands.
22; - For floating point operations.
23
24; seq_cst stores are left as (lock) xchgl, but we try to check every other
25; attribute at least once.
26
27; Please note that these operations do not require the lock prefix: only
28; sequentially consistent stores require this kind of protection on X86.
29; And even for seq_cst operations, llvm uses the xchg instruction which has
30; an implicit lock prefix, so making it explicit is not required.
31
32define void @store_atomic_imm_8(i8* %p) {
33; X64-LABEL: store_atomic_imm_8:
34; X64:       # %bb.0:
35; X64-NEXT:    movb $42, (%rdi)
36; X64-NEXT:    retq
37;
38; X32-LABEL: store_atomic_imm_8:
39; X32:       # %bb.0:
40; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
41; X32-NEXT:    movb $42, (%eax)
42; X32-NEXT:    retl
43  store atomic i8 42, i8* %p release, align 1
44  ret void
45}
46
47define void @store_atomic_imm_16(i16* %p) {
48; X64-LABEL: store_atomic_imm_16:
49; X64:       # %bb.0:
50; X64-NEXT:    movw $42, (%rdi)
51; X64-NEXT:    retq
52;
53; X32-LABEL: store_atomic_imm_16:
54; X32:       # %bb.0:
55; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
56; X32-NEXT:    movw $42, (%eax)
57; X32-NEXT:    retl
58  store atomic i16 42, i16* %p monotonic, align 2
59  ret void
60}
61
62define void @store_atomic_imm_32(i32* %p) {
63; X64-LABEL: store_atomic_imm_32:
64; X64:       # %bb.0:
65; X64-NEXT:    movl $42, (%rdi)
66; X64-NEXT:    retq
67;
68; X32-LABEL: store_atomic_imm_32:
69; X32:       # %bb.0:
70; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
71; X32-NEXT:    movl $42, (%eax)
72; X32-NEXT:    retl
73;   On 32 bits, there is an extra movl for each of those functions
74;   (probably for alignment reasons).
75  store atomic i32 42, i32* %p release, align 4
76  ret void
77}
78
79define void @store_atomic_imm_64(i64* %p) {
80; X64-LABEL: store_atomic_imm_64:
81; X64:       # %bb.0:
82; X64-NEXT:    movq $42, (%rdi)
83; X64-NEXT:    retq
84;
85; X32-LABEL: store_atomic_imm_64:
86; X32:       # %bb.0:
87; X32-NEXT:    pushl %ebp
88; X32-NEXT:    .cfi_def_cfa_offset 8
89; X32-NEXT:    .cfi_offset %ebp, -8
90; X32-NEXT:    movl %esp, %ebp
91; X32-NEXT:    .cfi_def_cfa_register %ebp
92; X32-NEXT:    andl $-8, %esp
93; X32-NEXT:    subl $8, %esp
94; X32-NEXT:    movl 8(%ebp), %eax
95; X32-NEXT:    movl $0, {{[0-9]+}}(%esp)
96; X32-NEXT:    movl $42, (%esp)
97; X32-NEXT:    fildll (%esp)
98; X32-NEXT:    fistpll (%eax)
99; X32-NEXT:    movl %ebp, %esp
100; X32-NEXT:    popl %ebp
101; X32-NEXT:    .cfi_def_cfa %esp, 4
102; X32-NEXT:    retl
103;   These are implemented with a CAS loop on 32 bit architectures, and thus
104;   cannot be optimized in the same way as the others.
105  store atomic i64 42, i64* %p release, align 8
106  ret void
107}
108
109; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
110; even on X64, one must use movabsq that can only target a register.
111define void @store_atomic_imm_64_big(i64* %p) {
112; X64-LABEL: store_atomic_imm_64_big:
113; X64:       # %bb.0:
114; X64-NEXT:    movabsq $100000000000, %rax # imm = 0x174876E800
115; X64-NEXT:    movq %rax, (%rdi)
116; X64-NEXT:    retq
117;
118; X32-LABEL: store_atomic_imm_64_big:
119; X32:       # %bb.0:
120; X32-NEXT:    pushl %ebp
121; X32-NEXT:    .cfi_def_cfa_offset 8
122; X32-NEXT:    .cfi_offset %ebp, -8
123; X32-NEXT:    movl %esp, %ebp
124; X32-NEXT:    .cfi_def_cfa_register %ebp
125; X32-NEXT:    andl $-8, %esp
126; X32-NEXT:    subl $8, %esp
127; X32-NEXT:    movl 8(%ebp), %eax
128; X32-NEXT:    movl $23, {{[0-9]+}}(%esp)
129; X32-NEXT:    movl $1215752192, (%esp) # imm = 0x4876E800
130; X32-NEXT:    fildll (%esp)
131; X32-NEXT:    fistpll (%eax)
132; X32-NEXT:    movl %ebp, %esp
133; X32-NEXT:    popl %ebp
134; X32-NEXT:    .cfi_def_cfa %esp, 4
135; X32-NEXT:    retl
136  store atomic i64 100000000000, i64* %p monotonic, align 8
137  ret void
138}
139
140; It would be incorrect to replace a lock xchgl by a movl
141define void @store_atomic_imm_32_seq_cst(i32* %p) {
142; X64-LABEL: store_atomic_imm_32_seq_cst:
143; X64:       # %bb.0:
144; X64-NEXT:    movl $42, %eax
145; X64-NEXT:    xchgl %eax, (%rdi)
146; X64-NEXT:    retq
147;
148; X32-LABEL: store_atomic_imm_32_seq_cst:
149; X32:       # %bb.0:
150; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
151; X32-NEXT:    movl $42, %ecx
152; X32-NEXT:    xchgl %ecx, (%eax)
153; X32-NEXT:    retl
154  store atomic i32 42, i32* %p seq_cst, align 4
155  ret void
156}
157
158; ----- ADD -----
159
160define void @add_8i(i8* %p) {
161; X64-LABEL: add_8i:
162; X64:       # %bb.0:
163; X64-NEXT:    addb $2, (%rdi)
164; X64-NEXT:    retq
165;
166; X32-LABEL: add_8i:
167; X32:       # %bb.0:
168; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
169; X32-NEXT:    addb $2, (%eax)
170; X32-NEXT:    retl
171  %1 = load atomic i8, i8* %p seq_cst, align 1
172  %2 = add i8 %1, 2
173  store atomic i8 %2, i8* %p release, align 1
174  ret void
175}
176
177define void @add_8r(i8* %p, i8 %v) {
178; X64-LABEL: add_8r:
179; X64:       # %bb.0:
180; X64-NEXT:    addb %sil, (%rdi)
181; X64-NEXT:    retq
182;
183; X32-LABEL: add_8r:
184; X32:       # %bb.0:
185; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
186; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
187; X32-NEXT:    addb %al, (%ecx)
188; X32-NEXT:    retl
189  %1 = load atomic i8, i8* %p seq_cst, align 1
190  %2 = add i8 %1, %v
191  store atomic i8 %2, i8* %p release, align 1
192  ret void
193}
194
195define void @add_16i(i16* %p) {
196; X64-LABEL: add_16i:
197; X64:       # %bb.0:
198; X64-NEXT:    addw $2, (%rdi)
199; X64-NEXT:    retq
200;
201; X32-LABEL: add_16i:
202; X32:       # %bb.0:
203; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X32-NEXT:    addw $2, (%eax)
205; X32-NEXT:    retl
206  %1 = load atomic i16, i16* %p acquire, align 2
207  %2 = add i16 %1, 2
208  store atomic i16 %2, i16* %p release, align 2
209  ret void
210}
211
212define void @add_16r(i16* %p, i16 %v) {
213; X64-LABEL: add_16r:
214; X64:       # %bb.0:
215; X64-NEXT:    addw %si, (%rdi)
216; X64-NEXT:    retq
217;
218; X32-LABEL: add_16r:
219; X32:       # %bb.0:
220; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
221; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
222; X32-NEXT:    addw %ax, (%ecx)
223; X32-NEXT:    retl
224  %1 = load atomic i16, i16* %p acquire, align 2
225  %2 = add i16 %1, %v
226  store atomic i16 %2, i16* %p release, align 2
227  ret void
228}
229
230define void @add_32i(i32* %p) {
231; X64-LABEL: add_32i:
232; X64:       # %bb.0:
233; X64-NEXT:    addl $2, (%rdi)
234; X64-NEXT:    retq
235;
236; X32-LABEL: add_32i:
237; X32:       # %bb.0:
238; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
239; X32-NEXT:    addl $2, (%eax)
240; X32-NEXT:    retl
241  %1 = load atomic i32, i32* %p acquire, align 4
242  %2 = add i32 %1, 2
243  store atomic i32 %2, i32* %p monotonic, align 4
244  ret void
245}
246
247define void @add_32r(i32* %p, i32 %v) {
248; X64-LABEL: add_32r:
249; X64:       # %bb.0:
250; X64-NEXT:    addl %esi, (%rdi)
251; X64-NEXT:    retq
252;
253; X32-LABEL: add_32r:
254; X32:       # %bb.0:
255; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
256; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
257; X32-NEXT:    addl %eax, (%ecx)
258; X32-NEXT:    retl
259  %1 = load atomic i32, i32* %p acquire, align 4
260  %2 = add i32 %1, %v
261  store atomic i32 %2, i32* %p monotonic, align 4
262  ret void
263}
264
265; The following is a corner case where the load is added to itself. The pattern
266; matching should not fold this. We only test with 32-bit add, but the same
267; applies to other sizes and operations.
268define void @add_32r_self(i32* %p) {
269; X64-LABEL: add_32r_self:
270; X64:       # %bb.0:
271; X64-NEXT:    movl (%rdi), %eax
272; X64-NEXT:    addl %eax, %eax
273; X64-NEXT:    movl %eax, (%rdi)
274; X64-NEXT:    retq
275;
276; X32-LABEL: add_32r_self:
277; X32:       # %bb.0:
278; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
279; X32-NEXT:    movl (%eax), %ecx
280; X32-NEXT:    addl %ecx, %ecx
281; X32-NEXT:    movl %ecx, (%eax)
282; X32-NEXT:    retl
283  %1 = load atomic i32, i32* %p acquire, align 4
284  %2 = add i32 %1, %1
285  store atomic i32 %2, i32* %p monotonic, align 4
286  ret void
287}
288
289; The following is a corner case where the load's result is returned. The
290; optimizer isn't allowed to duplicate the load because it's atomic.
291define i32 @add_32r_ret_load(i32* %p, i32 %v) {
292; X64-LABEL: add_32r_ret_load:
293; X64:       # %bb.0:
294; X64-NEXT:    movl (%rdi), %eax
295; X64-NEXT:    addl %eax, %esi
296; X64-NEXT:    movl %esi, (%rdi)
297; X64-NEXT:    retq
298;
299; X32-LABEL: add_32r_ret_load:
300; X32:       # %bb.0:
301; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
302; X32-NEXT:    movl (%ecx), %eax
303; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
304; X32-NEXT:    addl %eax, %edx
305; X32-NEXT:    movl %edx, (%ecx)
306; X32-NEXT:    retl
307; More code here, we just don't want it to load from P.
308  %1 = load atomic i32, i32* %p acquire, align 4
309  %2 = add i32 %1, %v
310  store atomic i32 %2, i32* %p monotonic, align 4
311  ret i32 %1
312}
313
314define void @add_64i(i64* %p) {
315; X64-LABEL: add_64i:
316; X64:       # %bb.0:
317; X64-NEXT:    addq $2, (%rdi)
318; X64-NEXT:    retq
319;
320; X32-LABEL: add_64i:
321; X32:       # %bb.0:
322; X32-NEXT:    pushl %ebp
323; X32-NEXT:    .cfi_def_cfa_offset 8
324; X32-NEXT:    .cfi_offset %ebp, -8
325; X32-NEXT:    movl %esp, %ebp
326; X32-NEXT:    .cfi_def_cfa_register %ebp
327; X32-NEXT:    andl $-8, %esp
328; X32-NEXT:    subl $16, %esp
329; X32-NEXT:    movl 8(%ebp), %eax
330; X32-NEXT:    fildll (%eax)
331; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
332; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
333; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
334; X32-NEXT:    addl $2, %ecx
335; X32-NEXT:    adcl $0, %edx
336; X32-NEXT:    movl %ecx, (%esp)
337; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
338; X32-NEXT:    fildll (%esp)
339; X32-NEXT:    fistpll (%eax)
340; X32-NEXT:    movl %ebp, %esp
341; X32-NEXT:    popl %ebp
342; X32-NEXT:    .cfi_def_cfa %esp, 4
343; X32-NEXT:    retl
344;   We do not check X86-32 as it cannot do 'addq'.
345  %1 = load atomic i64, i64* %p acquire, align 8
346  %2 = add i64 %1, 2
347  store atomic i64 %2, i64* %p release, align 8
348  ret void
349}
350
351define void @add_64r(i64* %p, i64 %v) {
352; X64-LABEL: add_64r:
353; X64:       # %bb.0:
354; X64-NEXT:    addq %rsi, (%rdi)
355; X64-NEXT:    retq
356;
357; X32-LABEL: add_64r:
358; X32:       # %bb.0:
359; X32-NEXT:    pushl %ebp
360; X32-NEXT:    .cfi_def_cfa_offset 8
361; X32-NEXT:    .cfi_offset %ebp, -8
362; X32-NEXT:    movl %esp, %ebp
363; X32-NEXT:    .cfi_def_cfa_register %ebp
364; X32-NEXT:    andl $-8, %esp
365; X32-NEXT:    subl $16, %esp
366; X32-NEXT:    movl 8(%ebp), %eax
367; X32-NEXT:    fildll (%eax)
368; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
369; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
370; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
371; X32-NEXT:    addl 12(%ebp), %ecx
372; X32-NEXT:    adcl 16(%ebp), %edx
373; X32-NEXT:    movl %ecx, (%esp)
374; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
375; X32-NEXT:    fildll (%esp)
376; X32-NEXT:    fistpll (%eax)
377; X32-NEXT:    movl %ebp, %esp
378; X32-NEXT:    popl %ebp
379; X32-NEXT:    .cfi_def_cfa %esp, 4
380; X32-NEXT:    retl
381;   We do not check X86-32 as it cannot do 'addq'.
382  %1 = load atomic i64, i64* %p acquire, align 8
383  %2 = add i64 %1, %v
384  store atomic i64 %2, i64* %p release, align 8
385  ret void
386}
387
388define void @add_32i_seq_cst(i32* %p) {
389; X64-LABEL: add_32i_seq_cst:
390; X64:       # %bb.0:
391; X64-NEXT:    movl (%rdi), %eax
392; X64-NEXT:    addl $2, %eax
393; X64-NEXT:    xchgl %eax, (%rdi)
394; X64-NEXT:    retq
395;
396; X32-LABEL: add_32i_seq_cst:
397; X32:       # %bb.0:
398; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
399; X32-NEXT:    movl (%eax), %ecx
400; X32-NEXT:    addl $2, %ecx
401; X32-NEXT:    xchgl %ecx, (%eax)
402; X32-NEXT:    retl
403  %1 = load atomic i32, i32* %p monotonic, align 4
404  %2 = add i32 %1, 2
405  store atomic i32 %2, i32* %p seq_cst, align 4
406  ret void
407}
408
409define void @add_32r_seq_cst(i32* %p, i32 %v) {
410; X64-LABEL: add_32r_seq_cst:
411; X64:       # %bb.0:
412; X64-NEXT:    movl (%rdi), %eax
413; X64-NEXT:    addl %esi, %eax
414; X64-NEXT:    xchgl %eax, (%rdi)
415; X64-NEXT:    retq
416;
417; X32-LABEL: add_32r_seq_cst:
418; X32:       # %bb.0:
419; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
420; X32-NEXT:    movl (%eax), %ecx
421; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
422; X32-NEXT:    xchgl %ecx, (%eax)
423; X32-NEXT:    retl
424  %1 = load atomic i32, i32* %p monotonic, align 4
425  %2 = add i32 %1, %v
426  store atomic i32 %2, i32* %p seq_cst, align 4
427  ret void
428}
429
430; ----- SUB -----
431
432define void @sub_8r(i8* %p, i8 %v) {
433; X64-LABEL: sub_8r:
434; X64:       # %bb.0:
435; X64-NEXT:    subb %sil, (%rdi)
436; X64-NEXT:    retq
437;
438; X32-LABEL: sub_8r:
439; X32:       # %bb.0:
440; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
441; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
442; X32-NEXT:    subb %al, (%ecx)
443; X32-NEXT:    retl
444  %1 = load atomic i8, i8* %p seq_cst, align 1
445  %2 = sub i8 %1, %v
446  store atomic i8 %2, i8* %p release, align 1
447  ret void
448}
449
450define void @sub_16r(i16* %p, i16 %v) {
451; X64-LABEL: sub_16r:
452; X64:       # %bb.0:
453; X64-NEXT:    subw %si, (%rdi)
454; X64-NEXT:    retq
455;
456; X32-LABEL: sub_16r:
457; X32:       # %bb.0:
458; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
459; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
460; X32-NEXT:    subw %ax, (%ecx)
461; X32-NEXT:    retl
462  %1 = load atomic i16, i16* %p acquire, align 2
463  %2 = sub i16 %1, %v
464  store atomic i16 %2, i16* %p release, align 2
465  ret void
466}
467
468define void @sub_32r(i32* %p, i32 %v) {
469; X64-LABEL: sub_32r:
470; X64:       # %bb.0:
471; X64-NEXT:    subl %esi, (%rdi)
472; X64-NEXT:    retq
473;
474; X32-LABEL: sub_32r:
475; X32:       # %bb.0:
476; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
477; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
478; X32-NEXT:    subl %eax, (%ecx)
479; X32-NEXT:    retl
480  %1 = load atomic i32, i32* %p acquire, align 4
481  %2 = sub i32 %1, %v
482  store atomic i32 %2, i32* %p monotonic, align 4
483  ret void
484}
485
486; The following is a corner case where the load is subed to itself. The pattern
487; matching should not fold this. We only test with 32-bit sub, but the same
488; applies to other sizes and operations.
489define void @sub_32r_self(i32* %p) {
490; X64-LABEL: sub_32r_self:
491; X64:       # %bb.0:
492; X64-NEXT:    movl (%rdi), %eax
493; X64-NEXT:    movl $0, (%rdi)
494; X64-NEXT:    retq
495;
496; X32-LABEL: sub_32r_self:
497; X32:       # %bb.0:
498; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
499; X32-NEXT:    movl (%eax), %ecx
500; X32-NEXT:    movl $0, (%eax)
501; X32-NEXT:    retl
502  %1 = load atomic i32, i32* %p acquire, align 4
503  %2 = sub i32 %1, %1
504  store atomic i32 %2, i32* %p monotonic, align 4
505  ret void
506}
507
508; The following is a corner case where the load's result is returned. The
509; optimizer isn't allowed to duplicate the load because it's atomic.
510define i32 @sub_32r_ret_load(i32* %p, i32 %v) {
511; X64-LABEL: sub_32r_ret_load:
512; X64:       # %bb.0:
513; X64-NEXT:    movl (%rdi), %eax
514; X64-NEXT:    movl %eax, %ecx
515; X64-NEXT:    subl %esi, %ecx
516; X64-NEXT:    movl %ecx, (%rdi)
517; X64-NEXT:    retq
518;
519; X32-LABEL: sub_32r_ret_load:
520; X32:       # %bb.0:
521; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
522; X32-NEXT:    movl (%ecx), %eax
523; X32-NEXT:    movl %eax, %edx
524; X32-NEXT:    subl {{[0-9]+}}(%esp), %edx
525; X32-NEXT:    movl %edx, (%ecx)
526; X32-NEXT:    retl
527; More code here, we just don't want it to load from P.
528  %1 = load atomic i32, i32* %p acquire, align 4
529  %2 = sub i32 %1, %v
530  store atomic i32 %2, i32* %p monotonic, align 4
531  ret i32 %1
532}
533
534define void @sub_64r(i64* %p, i64 %v) {
535; X64-LABEL: sub_64r:
536; X64:       # %bb.0:
537; X64-NEXT:    subq %rsi, (%rdi)
538; X64-NEXT:    retq
539;
540; X32-LABEL: sub_64r:
541; X32:       # %bb.0:
542; X32-NEXT:    pushl %ebp
543; X32-NEXT:    .cfi_def_cfa_offset 8
544; X32-NEXT:    .cfi_offset %ebp, -8
545; X32-NEXT:    movl %esp, %ebp
546; X32-NEXT:    .cfi_def_cfa_register %ebp
547; X32-NEXT:    andl $-8, %esp
548; X32-NEXT:    subl $16, %esp
549; X32-NEXT:    movl 8(%ebp), %eax
550; X32-NEXT:    fildll (%eax)
551; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
552; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
553; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
554; X32-NEXT:    subl 12(%ebp), %ecx
555; X32-NEXT:    sbbl 16(%ebp), %edx
556; X32-NEXT:    movl %ecx, (%esp)
557; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
558; X32-NEXT:    fildll (%esp)
559; X32-NEXT:    fistpll (%eax)
560; X32-NEXT:    movl %ebp, %esp
561; X32-NEXT:    popl %ebp
562; X32-NEXT:    .cfi_def_cfa %esp, 4
563; X32-NEXT:    retl
564;   We do not check X86-32 as it cannot do 'subq'.
565  %1 = load atomic i64, i64* %p acquire, align 8
566  %2 = sub i64 %1, %v
567  store atomic i64 %2, i64* %p release, align 8
568  ret void
569}
570
571define void @sub_32r_seq_cst(i32* %p, i32 %v) {
572; X64-LABEL: sub_32r_seq_cst:
573; X64:       # %bb.0:
574; X64-NEXT:    movl (%rdi), %eax
575; X64-NEXT:    subl %esi, %eax
576; X64-NEXT:    xchgl %eax, (%rdi)
577; X64-NEXT:    retq
578;
579; X32-LABEL: sub_32r_seq_cst:
580; X32:       # %bb.0:
581; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
582; X32-NEXT:    movl (%eax), %ecx
583; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
584; X32-NEXT:    xchgl %ecx, (%eax)
585; X32-NEXT:    retl
586  %1 = load atomic i32, i32* %p monotonic, align 4
587  %2 = sub i32 %1, %v
588  store atomic i32 %2, i32* %p seq_cst, align 4
589  ret void
590}
591
592; ----- AND -----
593
594define void @and_8i(i8* %p) {
595; X64-LABEL: and_8i:
596; X64:       # %bb.0:
597; X64-NEXT:    andb $2, (%rdi)
598; X64-NEXT:    retq
599;
600; X32-LABEL: and_8i:
601; X32:       # %bb.0:
602; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
603; X32-NEXT:    andb $2, (%eax)
604; X32-NEXT:    retl
605  %1 = load atomic i8, i8* %p monotonic, align 1
606  %2 = and i8 %1, 2
607  store atomic i8 %2, i8* %p release, align 1
608  ret void
609}
610
611define void @and_8r(i8* %p, i8 %v) {
612; X64-LABEL: and_8r:
613; X64:       # %bb.0:
614; X64-NEXT:    andb %sil, (%rdi)
615; X64-NEXT:    retq
616;
617; X32-LABEL: and_8r:
618; X32:       # %bb.0:
619; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
620; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
621; X32-NEXT:    andb %al, (%ecx)
622; X32-NEXT:    retl
623  %1 = load atomic i8, i8* %p monotonic, align 1
624  %2 = and i8 %1, %v
625  store atomic i8 %2, i8* %p release, align 1
626  ret void
627}
628
629define void @and_16i(i16* %p) {
630; X64-LABEL: and_16i:
631; X64:       # %bb.0:
632; X64-NEXT:    andw $2, (%rdi)
633; X64-NEXT:    retq
634;
635; X32-LABEL: and_16i:
636; X32:       # %bb.0:
637; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
638; X32-NEXT:    andw $2, (%eax)
639; X32-NEXT:    retl
640  %1 = load atomic i16, i16* %p acquire, align 2
641  %2 = and i16 %1, 2
642  store atomic i16 %2, i16* %p release, align 2
643  ret void
644}
645
646define void @and_16r(i16* %p, i16 %v) {
647; X64-LABEL: and_16r:
648; X64:       # %bb.0:
649; X64-NEXT:    andw %si, (%rdi)
650; X64-NEXT:    retq
651;
652; X32-LABEL: and_16r:
653; X32:       # %bb.0:
654; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
655; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
656; X32-NEXT:    andw %ax, (%ecx)
657; X32-NEXT:    retl
658  %1 = load atomic i16, i16* %p acquire, align 2
659  %2 = and i16 %1, %v
660  store atomic i16 %2, i16* %p release, align 2
661  ret void
662}
663
664define void @and_32i(i32* %p) {
665; X64-LABEL: and_32i:
666; X64:       # %bb.0:
667; X64-NEXT:    andl $2, (%rdi)
668; X64-NEXT:    retq
669;
670; X32-LABEL: and_32i:
671; X32:       # %bb.0:
672; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
673; X32-NEXT:    andl $2, (%eax)
674; X32-NEXT:    retl
675  %1 = load atomic i32, i32* %p acquire, align 4
676  %2 = and i32 %1, 2
677  store atomic i32 %2, i32* %p release, align 4
678  ret void
679}
680
681define void @and_32r(i32* %p, i32 %v) {
682; X64-LABEL: and_32r:
683; X64:       # %bb.0:
684; X64-NEXT:    andl %esi, (%rdi)
685; X64-NEXT:    retq
686;
687; X32-LABEL: and_32r:
688; X32:       # %bb.0:
689; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
690; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
691; X32-NEXT:    andl %eax, (%ecx)
692; X32-NEXT:    retl
693  %1 = load atomic i32, i32* %p acquire, align 4
694  %2 = and i32 %1, %v
695  store atomic i32 %2, i32* %p release, align 4
696  ret void
697}
698
699define void @and_64i(i64* %p) {
700; X64-LABEL: and_64i:
701; X64:       # %bb.0:
702; X64-NEXT:    andq $2, (%rdi)
703; X64-NEXT:    retq
704;
705; X32-LABEL: and_64i:
706; X32:       # %bb.0:
707; X32-NEXT:    pushl %ebp
708; X32-NEXT:    .cfi_def_cfa_offset 8
709; X32-NEXT:    .cfi_offset %ebp, -8
710; X32-NEXT:    movl %esp, %ebp
711; X32-NEXT:    .cfi_def_cfa_register %ebp
712; X32-NEXT:    andl $-8, %esp
713; X32-NEXT:    subl $16, %esp
714; X32-NEXT:    movl 8(%ebp), %eax
715; X32-NEXT:    fildll (%eax)
716; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
717; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
718; X32-NEXT:    andl $2, %ecx
719; X32-NEXT:    movl %ecx, (%esp)
720; X32-NEXT:    movl $0, {{[0-9]+}}(%esp)
721; X32-NEXT:    fildll (%esp)
722; X32-NEXT:    fistpll (%eax)
723; X32-NEXT:    movl %ebp, %esp
724; X32-NEXT:    popl %ebp
725; X32-NEXT:    .cfi_def_cfa %esp, 4
726; X32-NEXT:    retl
727;   We do not check X86-32 as it cannot do 'andq'.
728  %1 = load atomic i64, i64* %p acquire, align 8
729  %2 = and i64 %1, 2
730  store atomic i64 %2, i64* %p release, align 8
731  ret void
732}
733
734define void @and_64r(i64* %p, i64 %v) {
735; X64-LABEL: and_64r:
736; X64:       # %bb.0:
737; X64-NEXT:    andq %rsi, (%rdi)
738; X64-NEXT:    retq
739;
740; X32-LABEL: and_64r:
741; X32:       # %bb.0:
742; X32-NEXT:    pushl %ebp
743; X32-NEXT:    .cfi_def_cfa_offset 8
744; X32-NEXT:    .cfi_offset %ebp, -8
745; X32-NEXT:    movl %esp, %ebp
746; X32-NEXT:    .cfi_def_cfa_register %ebp
747; X32-NEXT:    andl $-8, %esp
748; X32-NEXT:    subl $16, %esp
749; X32-NEXT:    movl 8(%ebp), %eax
750; X32-NEXT:    fildll (%eax)
751; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
752; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
753; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
754; X32-NEXT:    andl 16(%ebp), %edx
755; X32-NEXT:    andl 12(%ebp), %ecx
756; X32-NEXT:    movl %ecx, (%esp)
757; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
758; X32-NEXT:    fildll (%esp)
759; X32-NEXT:    fistpll (%eax)
760; X32-NEXT:    movl %ebp, %esp
761; X32-NEXT:    popl %ebp
762; X32-NEXT:    .cfi_def_cfa %esp, 4
763; X32-NEXT:    retl
764;   We do not check X86-32 as it cannot do 'andq'.
765  %1 = load atomic i64, i64* %p acquire, align 8
766  %2 = and i64 %1, %v
767  store atomic i64 %2, i64* %p release, align 8
768  ret void
769}
770
771define void @and_32i_seq_cst(i32* %p) {
772; X64-LABEL: and_32i_seq_cst:
773; X64:       # %bb.0:
774; X64-NEXT:    movl (%rdi), %eax
775; X64-NEXT:    andl $2, %eax
776; X64-NEXT:    xchgl %eax, (%rdi)
777; X64-NEXT:    retq
778;
779; X32-LABEL: and_32i_seq_cst:
780; X32:       # %bb.0:
781; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
782; X32-NEXT:    movl (%eax), %ecx
783; X32-NEXT:    andl $2, %ecx
784; X32-NEXT:    xchgl %ecx, (%eax)
785; X32-NEXT:    retl
786  %1 = load atomic i32, i32* %p monotonic, align 4
787  %2 = and i32 %1, 2
788  store atomic i32 %2, i32* %p seq_cst, align 4
789  ret void
790}
791
792define void @and_32r_seq_cst(i32* %p, i32 %v) {
793; X64-LABEL: and_32r_seq_cst:
794; X64:       # %bb.0:
795; X64-NEXT:    movl (%rdi), %eax
796; X64-NEXT:    andl %esi, %eax
797; X64-NEXT:    xchgl %eax, (%rdi)
798; X64-NEXT:    retq
799;
800; X32-LABEL: and_32r_seq_cst:
801; X32:       # %bb.0:
802; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
803; X32-NEXT:    movl (%eax), %ecx
804; X32-NEXT:    andl {{[0-9]+}}(%esp), %ecx
805; X32-NEXT:    xchgl %ecx, (%eax)
806; X32-NEXT:    retl
807  %1 = load atomic i32, i32* %p monotonic, align 4
808  %2 = and i32 %1, %v
809  store atomic i32 %2, i32* %p seq_cst, align 4
810  ret void
811}
812
813; ----- OR -----
814
815define void @or_8i(i8* %p) {
816; X64-LABEL: or_8i:
817; X64:       # %bb.0:
818; X64-NEXT:    orb $2, (%rdi)
819; X64-NEXT:    retq
820;
821; X32-LABEL: or_8i:
822; X32:       # %bb.0:
823; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
824; X32-NEXT:    orb $2, (%eax)
825; X32-NEXT:    retl
826  %1 = load atomic i8, i8* %p acquire, align 1
827  %2 = or i8 %1, 2
828  store atomic i8 %2, i8* %p release, align 1
829  ret void
830}
831
832define void @or_8r(i8* %p, i8 %v) {
833; X64-LABEL: or_8r:
834; X64:       # %bb.0:
835; X64-NEXT:    orb %sil, (%rdi)
836; X64-NEXT:    retq
837;
838; X32-LABEL: or_8r:
839; X32:       # %bb.0:
840; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
841; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
842; X32-NEXT:    orb %al, (%ecx)
843; X32-NEXT:    retl
844  %1 = load atomic i8, i8* %p acquire, align 1
845  %2 = or i8 %1, %v
846  store atomic i8 %2, i8* %p release, align 1
847  ret void
848}
849
850define void @or_16i(i16* %p) {
851; X64-LABEL: or_16i:
852; X64:       # %bb.0:
853; X64-NEXT:    orw $2, (%rdi)
854; X64-NEXT:    retq
855;
856; X32-LABEL: or_16i:
857; X32:       # %bb.0:
858; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
859; X32-NEXT:    orw $2, (%eax)
860; X32-NEXT:    retl
861  %1 = load atomic i16, i16* %p acquire, align 2
862  %2 = or i16 %1, 2
863  store atomic i16 %2, i16* %p release, align 2
864  ret void
865}
866
867define void @or_16r(i16* %p, i16 %v) {
868; X64-LABEL: or_16r:
869; X64:       # %bb.0:
870; X64-NEXT:    orw %si, (%rdi)
871; X64-NEXT:    retq
872;
873; X32-LABEL: or_16r:
874; X32:       # %bb.0:
875; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
876; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
877; X32-NEXT:    orw %ax, (%ecx)
878; X32-NEXT:    retl
879  %1 = load atomic i16, i16* %p acquire, align 2
880  %2 = or i16 %1, %v
881  store atomic i16 %2, i16* %p release, align 2
882  ret void
883}
884
885define void @or_32i(i32* %p) {
886; X64-LABEL: or_32i:
887; X64:       # %bb.0:
888; X64-NEXT:    orl $2, (%rdi)
889; X64-NEXT:    retq
890;
891; X32-LABEL: or_32i:
892; X32:       # %bb.0:
893; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
894; X32-NEXT:    orl $2, (%eax)
895; X32-NEXT:    retl
896  %1 = load atomic i32, i32* %p acquire, align 4
897  %2 = or i32 %1, 2
898  store atomic i32 %2, i32* %p release, align 4
899  ret void
900}
901
902define void @or_32r(i32* %p, i32 %v) {
903; X64-LABEL: or_32r:
904; X64:       # %bb.0:
905; X64-NEXT:    orl %esi, (%rdi)
906; X64-NEXT:    retq
907;
908; X32-LABEL: or_32r:
909; X32:       # %bb.0:
910; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
911; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
912; X32-NEXT:    orl %eax, (%ecx)
913; X32-NEXT:    retl
914  %1 = load atomic i32, i32* %p acquire, align 4
915  %2 = or i32 %1, %v
916  store atomic i32 %2, i32* %p release, align 4
917  ret void
918}
919
920define void @or_64i(i64* %p) {
921; X64-LABEL: or_64i:
922; X64:       # %bb.0:
923; X64-NEXT:    orq $2, (%rdi)
924; X64-NEXT:    retq
925;
926; X32-LABEL: or_64i:
927; X32:       # %bb.0:
928; X32-NEXT:    pushl %ebp
929; X32-NEXT:    .cfi_def_cfa_offset 8
930; X32-NEXT:    .cfi_offset %ebp, -8
931; X32-NEXT:    movl %esp, %ebp
932; X32-NEXT:    .cfi_def_cfa_register %ebp
933; X32-NEXT:    andl $-8, %esp
934; X32-NEXT:    subl $16, %esp
935; X32-NEXT:    movl 8(%ebp), %eax
936; X32-NEXT:    fildll (%eax)
937; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
938; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
939; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
940; X32-NEXT:    orl $2, %ecx
941; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
942; X32-NEXT:    movl %ecx, (%esp)
943; X32-NEXT:    fildll (%esp)
944; X32-NEXT:    fistpll (%eax)
945; X32-NEXT:    movl %ebp, %esp
946; X32-NEXT:    popl %ebp
947; X32-NEXT:    .cfi_def_cfa %esp, 4
948; X32-NEXT:    retl
949;   We do not check X86-32 as it cannot do 'orq'.
950  %1 = load atomic i64, i64* %p acquire, align 8
951  %2 = or i64 %1, 2
952  store atomic i64 %2, i64* %p release, align 8
953  ret void
954}
955
956define void @or_64r(i64* %p, i64 %v) {
957; X64-LABEL: or_64r:
958; X64:       # %bb.0:
959; X64-NEXT:    orq %rsi, (%rdi)
960; X64-NEXT:    retq
961;
962; X32-LABEL: or_64r:
963; X32:       # %bb.0:
964; X32-NEXT:    pushl %ebp
965; X32-NEXT:    .cfi_def_cfa_offset 8
966; X32-NEXT:    .cfi_offset %ebp, -8
967; X32-NEXT:    movl %esp, %ebp
968; X32-NEXT:    .cfi_def_cfa_register %ebp
969; X32-NEXT:    andl $-8, %esp
970; X32-NEXT:    subl $16, %esp
971; X32-NEXT:    movl 8(%ebp), %eax
972; X32-NEXT:    fildll (%eax)
973; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
974; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
975; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
976; X32-NEXT:    orl 16(%ebp), %edx
977; X32-NEXT:    orl 12(%ebp), %ecx
978; X32-NEXT:    movl %ecx, (%esp)
979; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
980; X32-NEXT:    fildll (%esp)
981; X32-NEXT:    fistpll (%eax)
982; X32-NEXT:    movl %ebp, %esp
983; X32-NEXT:    popl %ebp
984; X32-NEXT:    .cfi_def_cfa %esp, 4
985; X32-NEXT:    retl
986;   We do not check X86-32 as it cannot do 'orq'.
987  %1 = load atomic i64, i64* %p acquire, align 8
988  %2 = or i64 %1, %v
989  store atomic i64 %2, i64* %p release, align 8
990  ret void
991}
992
993define void @or_32i_seq_cst(i32* %p) {
994; X64-LABEL: or_32i_seq_cst:
995; X64:       # %bb.0:
996; X64-NEXT:    movl (%rdi), %eax
997; X64-NEXT:    orl $2, %eax
998; X64-NEXT:    xchgl %eax, (%rdi)
999; X64-NEXT:    retq
1000;
1001; X32-LABEL: or_32i_seq_cst:
1002; X32:       # %bb.0:
1003; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1004; X32-NEXT:    movl (%eax), %ecx
1005; X32-NEXT:    orl $2, %ecx
1006; X32-NEXT:    xchgl %ecx, (%eax)
1007; X32-NEXT:    retl
1008  %1 = load atomic i32, i32* %p monotonic, align 4
1009  %2 = or i32 %1, 2
1010  store atomic i32 %2, i32* %p seq_cst, align 4
1011  ret void
1012}
1013
1014define void @or_32r_seq_cst(i32* %p, i32 %v) {
1015; X64-LABEL: or_32r_seq_cst:
1016; X64:       # %bb.0:
1017; X64-NEXT:    movl (%rdi), %eax
1018; X64-NEXT:    orl %esi, %eax
1019; X64-NEXT:    xchgl %eax, (%rdi)
1020; X64-NEXT:    retq
1021;
1022; X32-LABEL: or_32r_seq_cst:
1023; X32:       # %bb.0:
1024; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1025; X32-NEXT:    movl (%eax), %ecx
1026; X32-NEXT:    orl {{[0-9]+}}(%esp), %ecx
1027; X32-NEXT:    xchgl %ecx, (%eax)
1028; X32-NEXT:    retl
1029  %1 = load atomic i32, i32* %p monotonic, align 4
1030  %2 = or i32 %1, %v
1031  store atomic i32 %2, i32* %p seq_cst, align 4
1032  ret void
1033}
1034
1035; ----- XOR -----
1036
1037define void @xor_8i(i8* %p) {
1038; X64-LABEL: xor_8i:
1039; X64:       # %bb.0:
1040; X64-NEXT:    xorb $2, (%rdi)
1041; X64-NEXT:    retq
1042;
1043; X32-LABEL: xor_8i:
1044; X32:       # %bb.0:
1045; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1046; X32-NEXT:    xorb $2, (%eax)
1047; X32-NEXT:    retl
1048  %1 = load atomic i8, i8* %p acquire, align 1
1049  %2 = xor i8 %1, 2
1050  store atomic i8 %2, i8* %p release, align 1
1051  ret void
1052}
1053
1054define void @xor_8r(i8* %p, i8 %v) {
1055; X64-LABEL: xor_8r:
1056; X64:       # %bb.0:
1057; X64-NEXT:    xorb %sil, (%rdi)
1058; X64-NEXT:    retq
1059;
1060; X32-LABEL: xor_8r:
1061; X32:       # %bb.0:
1062; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1063; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1064; X32-NEXT:    xorb %al, (%ecx)
1065; X32-NEXT:    retl
1066  %1 = load atomic i8, i8* %p acquire, align 1
1067  %2 = xor i8 %1, %v
1068  store atomic i8 %2, i8* %p release, align 1
1069  ret void
1070}
1071
1072define void @xor_16i(i16* %p) {
1073; X64-LABEL: xor_16i:
1074; X64:       # %bb.0:
1075; X64-NEXT:    xorw $2, (%rdi)
1076; X64-NEXT:    retq
1077;
1078; X32-LABEL: xor_16i:
1079; X32:       # %bb.0:
1080; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1081; X32-NEXT:    xorw $2, (%eax)
1082; X32-NEXT:    retl
1083  %1 = load atomic i16, i16* %p acquire, align 2
1084  %2 = xor i16 %1, 2
1085  store atomic i16 %2, i16* %p release, align 2
1086  ret void
1087}
1088
1089define void @xor_16r(i16* %p, i16 %v) {
1090; X64-LABEL: xor_16r:
1091; X64:       # %bb.0:
1092; X64-NEXT:    xorw %si, (%rdi)
1093; X64-NEXT:    retq
1094;
1095; X32-LABEL: xor_16r:
1096; X32:       # %bb.0:
1097; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
1098; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1099; X32-NEXT:    xorw %ax, (%ecx)
1100; X32-NEXT:    retl
1101  %1 = load atomic i16, i16* %p acquire, align 2
1102  %2 = xor i16 %1, %v
1103  store atomic i16 %2, i16* %p release, align 2
1104  ret void
1105}
1106
1107define void @xor_32i(i32* %p) {
1108; X64-LABEL: xor_32i:
1109; X64:       # %bb.0:
1110; X64-NEXT:    xorl $2, (%rdi)
1111; X64-NEXT:    retq
1112;
1113; X32-LABEL: xor_32i:
1114; X32:       # %bb.0:
1115; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1116; X32-NEXT:    xorl $2, (%eax)
1117; X32-NEXT:    retl
1118  %1 = load atomic i32, i32* %p acquire, align 4
1119  %2 = xor i32 %1, 2
1120  store atomic i32 %2, i32* %p release, align 4
1121  ret void
1122}
1123
1124define void @xor_32r(i32* %p, i32 %v) {
1125; X64-LABEL: xor_32r:
1126; X64:       # %bb.0:
1127; X64-NEXT:    xorl %esi, (%rdi)
1128; X64-NEXT:    retq
1129;
1130; X32-LABEL: xor_32r:
1131; X32:       # %bb.0:
1132; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1133; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1134; X32-NEXT:    xorl %eax, (%ecx)
1135; X32-NEXT:    retl
1136  %1 = load atomic i32, i32* %p acquire, align 4
1137  %2 = xor i32 %1, %v
1138  store atomic i32 %2, i32* %p release, align 4
1139  ret void
1140}
1141
1142define void @xor_64i(i64* %p) {
1143; X64-LABEL: xor_64i:
1144; X64:       # %bb.0:
1145; X64-NEXT:    xorq $2, (%rdi)
1146; X64-NEXT:    retq
1147;
1148; X32-LABEL: xor_64i:
1149; X32:       # %bb.0:
1150; X32-NEXT:    pushl %ebp
1151; X32-NEXT:    .cfi_def_cfa_offset 8
1152; X32-NEXT:    .cfi_offset %ebp, -8
1153; X32-NEXT:    movl %esp, %ebp
1154; X32-NEXT:    .cfi_def_cfa_register %ebp
1155; X32-NEXT:    andl $-8, %esp
1156; X32-NEXT:    subl $16, %esp
1157; X32-NEXT:    movl 8(%ebp), %eax
1158; X32-NEXT:    fildll (%eax)
1159; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
1160; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1161; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1162; X32-NEXT:    xorl $2, %ecx
1163; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
1164; X32-NEXT:    movl %ecx, (%esp)
1165; X32-NEXT:    fildll (%esp)
1166; X32-NEXT:    fistpll (%eax)
1167; X32-NEXT:    movl %ebp, %esp
1168; X32-NEXT:    popl %ebp
1169; X32-NEXT:    .cfi_def_cfa %esp, 4
1170; X32-NEXT:    retl
1171;   We do not check X86-32 as it cannot do 'xorq'.
1172  %1 = load atomic i64, i64* %p acquire, align 8
1173  %2 = xor i64 %1, 2
1174  store atomic i64 %2, i64* %p release, align 8
1175  ret void
1176}
1177
1178define void @xor_64r(i64* %p, i64 %v) {
1179; X64-LABEL: xor_64r:
1180; X64:       # %bb.0:
1181; X64-NEXT:    xorq %rsi, (%rdi)
1182; X64-NEXT:    retq
1183;
1184; X32-LABEL: xor_64r:
1185; X32:       # %bb.0:
1186; X32-NEXT:    pushl %ebp
1187; X32-NEXT:    .cfi_def_cfa_offset 8
1188; X32-NEXT:    .cfi_offset %ebp, -8
1189; X32-NEXT:    movl %esp, %ebp
1190; X32-NEXT:    .cfi_def_cfa_register %ebp
1191; X32-NEXT:    andl $-8, %esp
1192; X32-NEXT:    subl $16, %esp
1193; X32-NEXT:    movl 8(%ebp), %eax
1194; X32-NEXT:    fildll (%eax)
1195; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
1196; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1197; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1198; X32-NEXT:    xorl 16(%ebp), %edx
1199; X32-NEXT:    xorl 12(%ebp), %ecx
1200; X32-NEXT:    movl %ecx, (%esp)
1201; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
1202; X32-NEXT:    fildll (%esp)
1203; X32-NEXT:    fistpll (%eax)
1204; X32-NEXT:    movl %ebp, %esp
1205; X32-NEXT:    popl %ebp
1206; X32-NEXT:    .cfi_def_cfa %esp, 4
1207; X32-NEXT:    retl
1208;   We do not check X86-32 as it cannot do 'xorq'.
1209  %1 = load atomic i64, i64* %p acquire, align 8
1210  %2 = xor i64 %1, %v
1211  store atomic i64 %2, i64* %p release, align 8
1212  ret void
1213}
1214
1215define void @xor_32i_seq_cst(i32* %p) {
1216; X64-LABEL: xor_32i_seq_cst:
1217; X64:       # %bb.0:
1218; X64-NEXT:    movl (%rdi), %eax
1219; X64-NEXT:    xorl $2, %eax
1220; X64-NEXT:    xchgl %eax, (%rdi)
1221; X64-NEXT:    retq
1222;
1223; X32-LABEL: xor_32i_seq_cst:
1224; X32:       # %bb.0:
1225; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1226; X32-NEXT:    movl (%eax), %ecx
1227; X32-NEXT:    xorl $2, %ecx
1228; X32-NEXT:    xchgl %ecx, (%eax)
1229; X32-NEXT:    retl
1230  %1 = load atomic i32, i32* %p monotonic, align 4
1231  %2 = xor i32 %1, 2
1232  store atomic i32 %2, i32* %p seq_cst, align 4
1233  ret void
1234}
1235
1236define void @xor_32r_seq_cst(i32* %p, i32 %v) {
1237; X64-LABEL: xor_32r_seq_cst:
1238; X64:       # %bb.0:
1239; X64-NEXT:    movl (%rdi), %eax
1240; X64-NEXT:    xorl %esi, %eax
1241; X64-NEXT:    xchgl %eax, (%rdi)
1242; X64-NEXT:    retq
1243;
1244; X32-LABEL: xor_32r_seq_cst:
1245; X32:       # %bb.0:
1246; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1247; X32-NEXT:    movl (%eax), %ecx
1248; X32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
1249; X32-NEXT:    xchgl %ecx, (%eax)
1250; X32-NEXT:    retl
1251  %1 = load atomic i32, i32* %p monotonic, align 4
1252  %2 = xor i32 %1, %v
1253  store atomic i32 %2, i32* %p seq_cst, align 4
1254  ret void
1255}
1256
1257; ----- INC -----
1258
1259define void @inc_8(i8* %p) {
1260; FAST_INC-LABEL: inc_8:
1261; FAST_INC:       # %bb.0:
1262; FAST_INC-NEXT:    incb (%rdi)
1263; FAST_INC-NEXT:    retq
1264;
1265; X32-LABEL: inc_8:
1266; X32:       # %bb.0:
1267; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1268; X32-NEXT:    incb (%eax)
1269; X32-NEXT:    retl
1270;
1271; SLOW_INC-LABEL: inc_8:
1272; SLOW_INC:       # %bb.0:
1273; SLOW_INC-NEXT:    addb $1, (%rdi)
1274; SLOW_INC-NEXT:    retq
1275  %1 = load atomic i8, i8* %p seq_cst, align 1
1276  %2 = add i8 %1, 1
1277  store atomic i8 %2, i8* %p release, align 1
1278  ret void
1279}
1280
1281define void @inc_16(i16* %p) {
1282; FAST_INC-LABEL: inc_16:
1283; FAST_INC:       # %bb.0:
1284; FAST_INC-NEXT:    incw (%rdi)
1285; FAST_INC-NEXT:    retq
1286;
1287; X32-LABEL: inc_16:
1288; X32:       # %bb.0:
1289; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1290; X32-NEXT:    incw (%eax)
1291; X32-NEXT:    retl
1292;
1293; SLOW_INC-LABEL: inc_16:
1294; SLOW_INC:       # %bb.0:
1295; SLOW_INC-NEXT:    addw $1, (%rdi)
1296; SLOW_INC-NEXT:    retq
1297  %1 = load atomic i16, i16* %p acquire, align 2
1298  %2 = add i16 %1, 1
1299  store atomic i16 %2, i16* %p release, align 2
1300  ret void
1301}
1302
1303define void @inc_32(i32* %p) {
1304; FAST_INC-LABEL: inc_32:
1305; FAST_INC:       # %bb.0:
1306; FAST_INC-NEXT:    incl (%rdi)
1307; FAST_INC-NEXT:    retq
1308;
1309; X32-LABEL: inc_32:
1310; X32:       # %bb.0:
1311; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1312; X32-NEXT:    incl (%eax)
1313; X32-NEXT:    retl
1314;
1315; SLOW_INC-LABEL: inc_32:
1316; SLOW_INC:       # %bb.0:
1317; SLOW_INC-NEXT:    addl $1, (%rdi)
1318; SLOW_INC-NEXT:    retq
1319  %1 = load atomic i32, i32* %p acquire, align 4
1320  %2 = add i32 %1, 1
1321  store atomic i32 %2, i32* %p monotonic, align 4
1322  ret void
1323}
1324
1325define void @inc_64(i64* %p) {
1326; FAST_INC-LABEL: inc_64:
1327; FAST_INC:       # %bb.0:
1328; FAST_INC-NEXT:    incq (%rdi)
1329; FAST_INC-NEXT:    retq
1330;
1331; X32-LABEL: inc_64:
1332; X32:       # %bb.0:
1333; X32-NEXT:    pushl %ebp
1334; X32-NEXT:    .cfi_def_cfa_offset 8
1335; X32-NEXT:    .cfi_offset %ebp, -8
1336; X32-NEXT:    movl %esp, %ebp
1337; X32-NEXT:    .cfi_def_cfa_register %ebp
1338; X32-NEXT:    andl $-8, %esp
1339; X32-NEXT:    subl $16, %esp
1340; X32-NEXT:    movl 8(%ebp), %eax
1341; X32-NEXT:    fildll (%eax)
1342; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
1343; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1344; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1345; X32-NEXT:    addl $1, %ecx
1346; X32-NEXT:    adcl $0, %edx
1347; X32-NEXT:    movl %ecx, (%esp)
1348; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
1349; X32-NEXT:    fildll (%esp)
1350; X32-NEXT:    fistpll (%eax)
1351; X32-NEXT:    movl %ebp, %esp
1352; X32-NEXT:    popl %ebp
1353; X32-NEXT:    .cfi_def_cfa %esp, 4
1354; X32-NEXT:    retl
1355;
1356; SLOW_INC-LABEL: inc_64:
1357; SLOW_INC:       # %bb.0:
1358; SLOW_INC-NEXT:    addq $1, (%rdi)
1359; SLOW_INC-NEXT:    retq
1360;   We do not check X86-32 as it cannot do 'incq'.
1361  %1 = load atomic i64, i64* %p acquire, align 8
1362  %2 = add i64 %1, 1
1363  store atomic i64 %2, i64* %p release, align 8
1364  ret void
1365}
1366
1367define void @inc_32_seq_cst(i32* %p) {
1368; FAST_INC-LABEL: inc_32_seq_cst:
1369; FAST_INC:       # %bb.0:
1370; FAST_INC-NEXT:    movl (%rdi), %eax
1371; FAST_INC-NEXT:    incl %eax
1372; FAST_INC-NEXT:    xchgl %eax, (%rdi)
1373; FAST_INC-NEXT:    retq
1374;
1375; X32-LABEL: inc_32_seq_cst:
1376; X32:       # %bb.0:
1377; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1378; X32-NEXT:    movl (%eax), %ecx
1379; X32-NEXT:    incl %ecx
1380; X32-NEXT:    xchgl %ecx, (%eax)
1381; X32-NEXT:    retl
1382;
1383; SLOW_INC-LABEL: inc_32_seq_cst:
1384; SLOW_INC:       # %bb.0:
1385; SLOW_INC-NEXT:    movl (%rdi), %eax
1386; SLOW_INC-NEXT:    addl $1, %eax
1387; SLOW_INC-NEXT:    xchgl %eax, (%rdi)
1388; SLOW_INC-NEXT:    retq
1389  %1 = load atomic i32, i32* %p monotonic, align 4
1390  %2 = add i32 %1, 1
1391  store atomic i32 %2, i32* %p seq_cst, align 4
1392  ret void
1393}
1394
1395; ----- DEC -----
1396
1397define void @dec_8(i8* %p) {
1398; FAST_INC-LABEL: dec_8:
1399; FAST_INC:       # %bb.0:
1400; FAST_INC-NEXT:    decb (%rdi)
1401; FAST_INC-NEXT:    retq
1402;
1403; X32-LABEL: dec_8:
1404; X32:       # %bb.0:
1405; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1406; X32-NEXT:    decb (%eax)
1407; X32-NEXT:    retl
1408;
1409; SLOW_INC-LABEL: dec_8:
1410; SLOW_INC:       # %bb.0:
1411; SLOW_INC-NEXT:    addb $-1, (%rdi)
1412; SLOW_INC-NEXT:    retq
1413  %1 = load atomic i8, i8* %p seq_cst, align 1
1414  %2 = sub i8 %1, 1
1415  store atomic i8 %2, i8* %p release, align 1
1416  ret void
1417}
1418
1419define void @dec_16(i16* %p) {
1420; FAST_INC-LABEL: dec_16:
1421; FAST_INC:       # %bb.0:
1422; FAST_INC-NEXT:    decw (%rdi)
1423; FAST_INC-NEXT:    retq
1424;
1425; X32-LABEL: dec_16:
1426; X32:       # %bb.0:
1427; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1428; X32-NEXT:    decw (%eax)
1429; X32-NEXT:    retl
1430;
1431; SLOW_INC-LABEL: dec_16:
1432; SLOW_INC:       # %bb.0:
1433; SLOW_INC-NEXT:    addw $-1, (%rdi)
1434; SLOW_INC-NEXT:    retq
1435  %1 = load atomic i16, i16* %p acquire, align 2
1436  %2 = sub i16 %1, 1
1437  store atomic i16 %2, i16* %p release, align 2
1438  ret void
1439}
1440
1441define void @dec_32(i32* %p) {
1442; FAST_INC-LABEL: dec_32:
1443; FAST_INC:       # %bb.0:
1444; FAST_INC-NEXT:    decl (%rdi)
1445; FAST_INC-NEXT:    retq
1446;
1447; X32-LABEL: dec_32:
1448; X32:       # %bb.0:
1449; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1450; X32-NEXT:    decl (%eax)
1451; X32-NEXT:    retl
1452;
1453; SLOW_INC-LABEL: dec_32:
1454; SLOW_INC:       # %bb.0:
1455; SLOW_INC-NEXT:    addl $-1, (%rdi)
1456; SLOW_INC-NEXT:    retq
1457  %1 = load atomic i32, i32* %p acquire, align 4
1458  %2 = sub i32 %1, 1
1459  store atomic i32 %2, i32* %p monotonic, align 4
1460  ret void
1461}
1462
1463define void @dec_64(i64* %p) {
1464; FAST_INC-LABEL: dec_64:
1465; FAST_INC:       # %bb.0:
1466; FAST_INC-NEXT:    decq (%rdi)
1467; FAST_INC-NEXT:    retq
1468;
1469; X32-LABEL: dec_64:
1470; X32:       # %bb.0:
1471; X32-NEXT:    pushl %ebp
1472; X32-NEXT:    .cfi_def_cfa_offset 8
1473; X32-NEXT:    .cfi_offset %ebp, -8
1474; X32-NEXT:    movl %esp, %ebp
1475; X32-NEXT:    .cfi_def_cfa_register %ebp
1476; X32-NEXT:    andl $-8, %esp
1477; X32-NEXT:    subl $16, %esp
1478; X32-NEXT:    movl 8(%ebp), %eax
1479; X32-NEXT:    fildll (%eax)
1480; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
1481; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1482; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1483; X32-NEXT:    addl $-1, %ecx
1484; X32-NEXT:    adcl $-1, %edx
1485; X32-NEXT:    movl %ecx, (%esp)
1486; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
1487; X32-NEXT:    fildll (%esp)
1488; X32-NEXT:    fistpll (%eax)
1489; X32-NEXT:    movl %ebp, %esp
1490; X32-NEXT:    popl %ebp
1491; X32-NEXT:    .cfi_def_cfa %esp, 4
1492; X32-NEXT:    retl
1493;
1494; SLOW_INC-LABEL: dec_64:
1495; SLOW_INC:       # %bb.0:
1496; SLOW_INC-NEXT:    addq $-1, (%rdi)
1497; SLOW_INC-NEXT:    retq
1498;   We do not check X86-32 as it cannot do 'decq'.
1499  %1 = load atomic i64, i64* %p acquire, align 8
1500  %2 = sub i64 %1, 1
1501  store atomic i64 %2, i64* %p release, align 8
1502  ret void
1503}
1504
1505define void @dec_32_seq_cst(i32* %p) {
1506; FAST_INC-LABEL: dec_32_seq_cst:
1507; FAST_INC:       # %bb.0:
1508; FAST_INC-NEXT:    movl (%rdi), %eax
1509; FAST_INC-NEXT:    decl %eax
1510; FAST_INC-NEXT:    xchgl %eax, (%rdi)
1511; FAST_INC-NEXT:    retq
1512;
1513; X32-LABEL: dec_32_seq_cst:
1514; X32:       # %bb.0:
1515; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1516; X32-NEXT:    movl (%eax), %ecx
1517; X32-NEXT:    decl %ecx
1518; X32-NEXT:    xchgl %ecx, (%eax)
1519; X32-NEXT:    retl
1520;
1521; SLOW_INC-LABEL: dec_32_seq_cst:
1522; SLOW_INC:       # %bb.0:
1523; SLOW_INC-NEXT:    movl (%rdi), %eax
1524; SLOW_INC-NEXT:    addl $-1, %eax
1525; SLOW_INC-NEXT:    xchgl %eax, (%rdi)
1526; SLOW_INC-NEXT:    retq
1527  %1 = load atomic i32, i32* %p monotonic, align 4
1528  %2 = sub i32 %1, 1
1529  store atomic i32 %2, i32* %p seq_cst, align 4
1530  ret void
1531}
1532
1533; ----- NOT -----
1534
1535define void @not_8(i8* %p) {
1536; X64-LABEL: not_8:
1537; X64:       # %bb.0:
1538; X64-NEXT:    notb (%rdi)
1539; X64-NEXT:    retq
1540;
1541; X32-LABEL: not_8:
1542; X32:       # %bb.0:
1543; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1544; X32-NEXT:    notb (%eax)
1545; X32-NEXT:    retl
1546  %1 = load atomic i8, i8* %p seq_cst, align 1
1547  %2 = xor i8 %1, -1
1548  store atomic i8 %2, i8* %p release, align 1
1549  ret void
1550}
1551
1552define void @not_16(i16* %p) {
1553; X64-LABEL: not_16:
1554; X64:       # %bb.0:
1555; X64-NEXT:    notw (%rdi)
1556; X64-NEXT:    retq
1557;
1558; X32-LABEL: not_16:
1559; X32:       # %bb.0:
1560; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1561; X32-NEXT:    notw (%eax)
1562; X32-NEXT:    retl
1563  %1 = load atomic i16, i16* %p acquire, align 2
1564  %2 = xor i16 %1, -1
1565  store atomic i16 %2, i16* %p release, align 2
1566  ret void
1567}
1568
1569define void @not_32(i32* %p) {
1570; X64-LABEL: not_32:
1571; X64:       # %bb.0:
1572; X64-NEXT:    notl (%rdi)
1573; X64-NEXT:    retq
1574;
1575; X32-LABEL: not_32:
1576; X32:       # %bb.0:
1577; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1578; X32-NEXT:    notl (%eax)
1579; X32-NEXT:    retl
1580  %1 = load atomic i32, i32* %p acquire, align 4
1581  %2 = xor i32 %1, -1
1582  store atomic i32 %2, i32* %p monotonic, align 4
1583  ret void
1584}
1585
1586define void @not_64(i64* %p) {
1587; X64-LABEL: not_64:
1588; X64:       # %bb.0:
1589; X64-NEXT:    notq (%rdi)
1590; X64-NEXT:    retq
1591;
1592; X32-LABEL: not_64:
1593; X32:       # %bb.0:
1594; X32-NEXT:    pushl %ebp
1595; X32-NEXT:    .cfi_def_cfa_offset 8
1596; X32-NEXT:    .cfi_offset %ebp, -8
1597; X32-NEXT:    movl %esp, %ebp
1598; X32-NEXT:    .cfi_def_cfa_register %ebp
1599; X32-NEXT:    andl $-8, %esp
1600; X32-NEXT:    subl $16, %esp
1601; X32-NEXT:    movl 8(%ebp), %eax
1602; X32-NEXT:    fildll (%eax)
1603; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
1604; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1605; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1606; X32-NEXT:    notl %edx
1607; X32-NEXT:    notl %ecx
1608; X32-NEXT:    movl %ecx, (%esp)
1609; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
1610; X32-NEXT:    fildll (%esp)
1611; X32-NEXT:    fistpll (%eax)
1612; X32-NEXT:    movl %ebp, %esp
1613; X32-NEXT:    popl %ebp
1614; X32-NEXT:    .cfi_def_cfa %esp, 4
1615; X32-NEXT:    retl
1616;   We do not check X86-32 as it cannot do 'notq'.
1617  %1 = load atomic i64, i64* %p acquire, align 8
1618  %2 = xor i64 %1, -1
1619  store atomic i64 %2, i64* %p release, align 8
1620  ret void
1621}
1622
1623define void @not_32_seq_cst(i32* %p) {
1624; X64-LABEL: not_32_seq_cst:
1625; X64:       # %bb.0:
1626; X64-NEXT:    movl (%rdi), %eax
1627; X64-NEXT:    notl %eax
1628; X64-NEXT:    xchgl %eax, (%rdi)
1629; X64-NEXT:    retq
1630;
1631; X32-LABEL: not_32_seq_cst:
1632; X32:       # %bb.0:
1633; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1634; X32-NEXT:    movl (%eax), %ecx
1635; X32-NEXT:    notl %ecx
1636; X32-NEXT:    xchgl %ecx, (%eax)
1637; X32-NEXT:    retl
1638  %1 = load atomic i32, i32* %p monotonic, align 4
1639  %2 = xor i32 %1, -1
1640  store atomic i32 %2, i32* %p seq_cst, align 4
1641  ret void
1642}
1643
1644; ----- NEG -----
1645
1646define void @neg_8(i8* %p) {
1647; X64-LABEL: neg_8:
1648; X64:       # %bb.0:
1649; X64-NEXT:    negb (%rdi)
1650; X64-NEXT:    retq
1651;
1652; X32-LABEL: neg_8:
1653; X32:       # %bb.0:
1654; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1655; X32-NEXT:    negb (%eax)
1656; X32-NEXT:    retl
1657  %1 = load atomic i8, i8* %p seq_cst, align 1
1658  %2 = sub i8 0, %1
1659  store atomic i8 %2, i8* %p release, align 1
1660  ret void
1661}
1662
1663define void @neg_16(i16* %p) {
1664; X64-LABEL: neg_16:
1665; X64:       # %bb.0:
1666; X64-NEXT:    movzwl (%rdi), %eax
1667; X64-NEXT:    negl %eax
1668; X64-NEXT:    movw %ax, (%rdi)
1669; X64-NEXT:    retq
1670;
1671; X32-LABEL: neg_16:
1672; X32:       # %bb.0:
1673; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1674; X32-NEXT:    movzwl (%eax), %ecx
1675; X32-NEXT:    negl %ecx
1676; X32-NEXT:    movw %cx, (%eax)
1677; X32-NEXT:    retl
1678  %1 = load atomic i16, i16* %p acquire, align 2
1679  %2 = sub i16 0, %1
1680  store atomic i16 %2, i16* %p release, align 2
1681  ret void
1682}
1683
1684define void @neg_32(i32* %p) {
1685; X64-LABEL: neg_32:
1686; X64:       # %bb.0:
1687; X64-NEXT:    negl (%rdi)
1688; X64-NEXT:    retq
1689;
1690; X32-LABEL: neg_32:
1691; X32:       # %bb.0:
1692; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1693; X32-NEXT:    negl (%eax)
1694; X32-NEXT:    retl
1695  %1 = load atomic i32, i32* %p acquire, align 4
1696  %2 = sub i32 0, %1
1697  store atomic i32 %2, i32* %p monotonic, align 4
1698  ret void
1699}
1700
1701define void @neg_64(i64* %p) {
1702; X64-LABEL: neg_64:
1703; X64:       # %bb.0:
1704; X64-NEXT:    negq (%rdi)
1705; X64-NEXT:    retq
1706;
1707; X32-LABEL: neg_64:
1708; X32:       # %bb.0:
1709; X32-NEXT:    pushl %ebp
1710; X32-NEXT:    .cfi_def_cfa_offset 8
1711; X32-NEXT:    .cfi_offset %ebp, -8
1712; X32-NEXT:    movl %esp, %ebp
1713; X32-NEXT:    .cfi_def_cfa_register %ebp
1714; X32-NEXT:    andl $-8, %esp
1715; X32-NEXT:    subl $16, %esp
1716; X32-NEXT:    movl 8(%ebp), %eax
1717; X32-NEXT:    fildll (%eax)
1718; X32-NEXT:    fistpll {{[0-9]+}}(%esp)
1719; X32-NEXT:    xorl %ecx, %ecx
1720; X32-NEXT:    xorl %edx, %edx
1721; X32-NEXT:    subl {{[0-9]+}}(%esp), %edx
1722; X32-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
1723; X32-NEXT:    movl %edx, (%esp)
1724; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1725; X32-NEXT:    fildll (%esp)
1726; X32-NEXT:    fistpll (%eax)
1727; X32-NEXT:    movl %ebp, %esp
1728; X32-NEXT:    popl %ebp
1729; X32-NEXT:    .cfi_def_cfa %esp, 4
1730; X32-NEXT:    retl
1731;   We do neg check X86-32 as it canneg do 'negq'.
1732  %1 = load atomic i64, i64* %p acquire, align 8
1733  %2 = sub i64 0, %1
1734  store atomic i64 %2, i64* %p release, align 8
1735  ret void
1736}
1737
1738define void @neg_32_seq_cst(i32* %p) {
1739; X64-LABEL: neg_32_seq_cst:
1740; X64:       # %bb.0:
1741; X64-NEXT:    movl (%rdi), %eax
1742; X64-NEXT:    negl %eax
1743; X64-NEXT:    xchgl %eax, (%rdi)
1744; X64-NEXT:    retq
1745;
1746; X32-LABEL: neg_32_seq_cst:
1747; X32:       # %bb.0:
1748; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1749; X32-NEXT:    movl (%eax), %ecx
1750; X32-NEXT:    negl %ecx
1751; X32-NEXT:    xchgl %ecx, (%eax)
1752; X32-NEXT:    retl
1753  %1 = load atomic i32, i32* %p monotonic, align 4
1754  %2 = sub i32 0, %1
1755  store atomic i32 %2, i32* %p seq_cst, align 4
1756  ret void
1757}
1758
1759