1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
3; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck %s -check-prefixes=CHECK32
4; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=-cx16 | FileCheck %s -check-prefixes=CHECK32
5
6@var = global i128 0
7
8; Due to the scheduling right after isel for cmpxchg and given the
9; machine scheduler and copy coalescer do not mess up with physical
10; register live-ranges, we end up with a useless copy.
11define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
12; CHECK-LABEL: val_compare_and_swap:
13; CHECK:       ## %bb.0:
14; CHECK-NEXT:    pushq %rbx
15; CHECK-NEXT:    .cfi_def_cfa_offset 16
16; CHECK-NEXT:    .cfi_offset %rbx, -16
17; CHECK-NEXT:    movq %rcx, %rbx
18; CHECK-NEXT:    movq %rsi, %rax
19; CHECK-NEXT:    movq %r8, %rcx
20; CHECK-NEXT:    lock cmpxchg16b (%rdi)
21; CHECK-NEXT:    popq %rbx
22; CHECK-NEXT:    retq
23;
24; CHECK32-LABEL: val_compare_and_swap:
25; CHECK32:       # %bb.0:
26; CHECK32-NEXT:    pushl %edi
27; CHECK32-NEXT:    .cfi_def_cfa_offset 8
28; CHECK32-NEXT:    pushl %esi
29; CHECK32-NEXT:    .cfi_def_cfa_offset 12
30; CHECK32-NEXT:    subl $20, %esp
31; CHECK32-NEXT:    .cfi_def_cfa_offset 32
32; CHECK32-NEXT:    .cfi_offset %esi, -12
33; CHECK32-NEXT:    .cfi_offset %edi, -8
34; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
35; CHECK32-NEXT:    subl $8, %esp
36; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
37; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
38; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
39; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
40; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
41; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
42; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
43; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
44; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
45; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
46; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
47; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
48; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
49; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
50; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
51; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
52; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
53; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
54; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
55; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
56; CHECK32-NEXT:    pushl %eax
57; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
58; CHECK32-NEXT:    calll __sync_val_compare_and_swap_16
59; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
60; CHECK32-NEXT:    addl $44, %esp
61; CHECK32-NEXT:    .cfi_adjust_cfa_offset -44
62; CHECK32-NEXT:    movl (%esp), %eax
63; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
64; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
65; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi
66; CHECK32-NEXT:    movl %edi, 8(%esi)
67; CHECK32-NEXT:    movl %edx, 12(%esi)
68; CHECK32-NEXT:    movl %eax, (%esi)
69; CHECK32-NEXT:    movl %ecx, 4(%esi)
70; CHECK32-NEXT:    movl %esi, %eax
71; CHECK32-NEXT:    addl $20, %esp
72; CHECK32-NEXT:    .cfi_def_cfa_offset 12
73; CHECK32-NEXT:    popl %esi
74; CHECK32-NEXT:    .cfi_def_cfa_offset 8
75; CHECK32-NEXT:    popl %edi
76; CHECK32-NEXT:    .cfi_def_cfa_offset 4
77; CHECK32-NEXT:    retl $4
78  %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
79  %val = extractvalue { i128, i1 } %pair, 0
80  ret i128 %val
81}
82
83@cmpxchg16b_global = external dso_local global { i128, i128 }, align 16
84
85;; Make sure we retain the offset of the global variable.
86define void @cmpxchg16b_global_with_offset() nounwind {
87; CHECK-LABEL: cmpxchg16b_global_with_offset:
88; CHECK:       ## %bb.0: ## %entry
89; CHECK-NEXT:    pushq %rbx
90; CHECK-NEXT:    xorl %eax, %eax
91; CHECK-NEXT:    xorl %edx, %edx
92; CHECK-NEXT:    xorl %ecx, %ecx
93; CHECK-NEXT:    xorl %ebx, %ebx
94; CHECK-NEXT:    lock cmpxchg16b _cmpxchg16b_global+{{.*}}(%rip)
95; CHECK-NEXT:    popq %rbx
96; CHECK-NEXT:    retq
97;
98; CHECK32-LABEL: cmpxchg16b_global_with_offset:
99; CHECK32:       # %bb.0: # %entry
100; CHECK32-NEXT:    subl $36, %esp
101; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
102; CHECK32-NEXT:    pushl $0
103; CHECK32-NEXT:    pushl $0
104; CHECK32-NEXT:    pushl $0
105; CHECK32-NEXT:    pushl $0
106; CHECK32-NEXT:    pushl $0
107; CHECK32-NEXT:    pushl $0
108; CHECK32-NEXT:    pushl $0
109; CHECK32-NEXT:    pushl $0
110; CHECK32-NEXT:    pushl $cmpxchg16b_global+16
111; CHECK32-NEXT:    pushl %eax
112; CHECK32-NEXT:    calll __sync_val_compare_and_swap_16
113; CHECK32-NEXT:    addl $72, %esp
114; CHECK32-NEXT:    retl
115entry:
116  %0 = load atomic i128, i128* getelementptr inbounds ({i128, i128}, {i128, i128}* @cmpxchg16b_global, i64 0, i32 1) acquire, align 16
117  ret void
118}
119
120define void @fetch_and_nand(i128* %p, i128 %bits) {
121; CHECK-LABEL: fetch_and_nand:
122; CHECK:       ## %bb.0:
123; CHECK-NEXT:    pushq %rbx
124; CHECK-NEXT:    .cfi_def_cfa_offset 16
125; CHECK-NEXT:    .cfi_offset %rbx, -16
126; CHECK-NEXT:    movq %rdx, %r8
127; CHECK-NEXT:    movq (%rdi), %rax
128; CHECK-NEXT:    movq 8(%rdi), %rdx
129; CHECK-NEXT:    .p2align 4, 0x90
130; CHECK-NEXT:  LBB2_1: ## %atomicrmw.start
131; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
132; CHECK-NEXT:    movq %rdx, %rcx
133; CHECK-NEXT:    andq %r8, %rcx
134; CHECK-NEXT:    movq %rax, %rbx
135; CHECK-NEXT:    andq %rsi, %rbx
136; CHECK-NEXT:    notq %rbx
137; CHECK-NEXT:    notq %rcx
138; CHECK-NEXT:    lock cmpxchg16b (%rdi)
139; CHECK-NEXT:    jne LBB2_1
140; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
141; CHECK-NEXT:    movq %rax, {{.*}}(%rip)
142; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
143; CHECK-NEXT:    popq %rbx
144; CHECK-NEXT:    retq
145;
146; CHECK32-LABEL: fetch_and_nand:
147; CHECK32:       # %bb.0:
148; CHECK32-NEXT:    pushl %esi
149; CHECK32-NEXT:    .cfi_def_cfa_offset 8
150; CHECK32-NEXT:    subl $24, %esp
151; CHECK32-NEXT:    .cfi_def_cfa_offset 32
152; CHECK32-NEXT:    .cfi_offset %esi, -8
153; CHECK32-NEXT:    subl $8, %esp
154; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
155; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
156; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
157; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
158; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
159; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
160; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
161; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
162; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
163; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
164; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
165; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
166; CHECK32-NEXT:    pushl %eax
167; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
168; CHECK32-NEXT:    calll __sync_fetch_and_nand_16
169; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
170; CHECK32-NEXT:    addl $28, %esp
171; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
172; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
173; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
174; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
175; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
176; CHECK32-NEXT:    movl %esi, var+8
177; CHECK32-NEXT:    movl %edx, var+12
178; CHECK32-NEXT:    movl %eax, var
179; CHECK32-NEXT:    movl %ecx, var+4
180; CHECK32-NEXT:    addl $24, %esp
181; CHECK32-NEXT:    .cfi_def_cfa_offset 8
182; CHECK32-NEXT:    popl %esi
183; CHECK32-NEXT:    .cfi_def_cfa_offset 4
184; CHECK32-NEXT:    retl
185  %val = atomicrmw nand i128* %p, i128 %bits release
186  store i128 %val, i128* @var, align 16
187  ret void
188}
189
190define void @fetch_and_or(i128* %p, i128 %bits) {
191; CHECK-LABEL: fetch_and_or:
192; CHECK:       ## %bb.0:
193; CHECK-NEXT:    pushq %rbx
194; CHECK-NEXT:    .cfi_def_cfa_offset 16
195; CHECK-NEXT:    .cfi_offset %rbx, -16
196; CHECK-NEXT:    movq %rdx, %r8
197; CHECK-NEXT:    movq (%rdi), %rax
198; CHECK-NEXT:    movq 8(%rdi), %rdx
199; CHECK-NEXT:    .p2align 4, 0x90
200; CHECK-NEXT:  LBB3_1: ## %atomicrmw.start
201; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
202; CHECK-NEXT:    movq %rax, %rbx
203; CHECK-NEXT:    orq %rsi, %rbx
204; CHECK-NEXT:    movq %rdx, %rcx
205; CHECK-NEXT:    orq %r8, %rcx
206; CHECK-NEXT:    lock cmpxchg16b (%rdi)
207; CHECK-NEXT:    jne LBB3_1
208; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
209; CHECK-NEXT:    movq %rax, {{.*}}(%rip)
210; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
211; CHECK-NEXT:    popq %rbx
212; CHECK-NEXT:    retq
213;
214; CHECK32-LABEL: fetch_and_or:
215; CHECK32:       # %bb.0:
216; CHECK32-NEXT:    pushl %esi
217; CHECK32-NEXT:    .cfi_def_cfa_offset 8
218; CHECK32-NEXT:    subl $24, %esp
219; CHECK32-NEXT:    .cfi_def_cfa_offset 32
220; CHECK32-NEXT:    .cfi_offset %esi, -8
221; CHECK32-NEXT:    subl $8, %esp
222; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
223; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
224; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
225; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
226; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
227; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
228; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
229; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
230; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
231; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
232; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
233; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
234; CHECK32-NEXT:    pushl %eax
235; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
236; CHECK32-NEXT:    calll __sync_fetch_and_or_16
237; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
238; CHECK32-NEXT:    addl $28, %esp
239; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
240; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
241; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
242; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
243; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
244; CHECK32-NEXT:    movl %esi, var+8
245; CHECK32-NEXT:    movl %edx, var+12
246; CHECK32-NEXT:    movl %eax, var
247; CHECK32-NEXT:    movl %ecx, var+4
248; CHECK32-NEXT:    addl $24, %esp
249; CHECK32-NEXT:    .cfi_def_cfa_offset 8
250; CHECK32-NEXT:    popl %esi
251; CHECK32-NEXT:    .cfi_def_cfa_offset 4
252; CHECK32-NEXT:    retl
253  %val = atomicrmw or i128* %p, i128 %bits seq_cst
254  store i128 %val, i128* @var, align 16
255  ret void
256}
257
258define void @fetch_and_add(i128* %p, i128 %bits) {
259; CHECK-LABEL: fetch_and_add:
260; CHECK:       ## %bb.0:
261; CHECK-NEXT:    pushq %rbx
262; CHECK-NEXT:    .cfi_def_cfa_offset 16
263; CHECK-NEXT:    .cfi_offset %rbx, -16
264; CHECK-NEXT:    movq %rdx, %r8
265; CHECK-NEXT:    movq (%rdi), %rax
266; CHECK-NEXT:    movq 8(%rdi), %rdx
267; CHECK-NEXT:    .p2align 4, 0x90
268; CHECK-NEXT:  LBB4_1: ## %atomicrmw.start
269; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
270; CHECK-NEXT:    movq %rax, %rbx
271; CHECK-NEXT:    addq %rsi, %rbx
272; CHECK-NEXT:    movq %rdx, %rcx
273; CHECK-NEXT:    adcq %r8, %rcx
274; CHECK-NEXT:    lock cmpxchg16b (%rdi)
275; CHECK-NEXT:    jne LBB4_1
276; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
277; CHECK-NEXT:    movq %rax, {{.*}}(%rip)
278; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
279; CHECK-NEXT:    popq %rbx
280; CHECK-NEXT:    retq
281;
282; CHECK32-LABEL: fetch_and_add:
283; CHECK32:       # %bb.0:
284; CHECK32-NEXT:    pushl %esi
285; CHECK32-NEXT:    .cfi_def_cfa_offset 8
286; CHECK32-NEXT:    subl $24, %esp
287; CHECK32-NEXT:    .cfi_def_cfa_offset 32
288; CHECK32-NEXT:    .cfi_offset %esi, -8
289; CHECK32-NEXT:    subl $8, %esp
290; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
291; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
292; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
293; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
294; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
295; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
296; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
297; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
298; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
299; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
300; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
301; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
302; CHECK32-NEXT:    pushl %eax
303; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
304; CHECK32-NEXT:    calll __sync_fetch_and_add_16
305; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
306; CHECK32-NEXT:    addl $28, %esp
307; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
308; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
309; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
310; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
311; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
312; CHECK32-NEXT:    movl %esi, var+8
313; CHECK32-NEXT:    movl %edx, var+12
314; CHECK32-NEXT:    movl %eax, var
315; CHECK32-NEXT:    movl %ecx, var+4
316; CHECK32-NEXT:    addl $24, %esp
317; CHECK32-NEXT:    .cfi_def_cfa_offset 8
318; CHECK32-NEXT:    popl %esi
319; CHECK32-NEXT:    .cfi_def_cfa_offset 4
320; CHECK32-NEXT:    retl
321  %val = atomicrmw add i128* %p, i128 %bits seq_cst
322  store i128 %val, i128* @var, align 16
323  ret void
324}
325
326define void @fetch_and_sub(i128* %p, i128 %bits) {
327; CHECK-LABEL: fetch_and_sub:
328; CHECK:       ## %bb.0:
329; CHECK-NEXT:    pushq %rbx
330; CHECK-NEXT:    .cfi_def_cfa_offset 16
331; CHECK-NEXT:    .cfi_offset %rbx, -16
332; CHECK-NEXT:    movq %rdx, %r8
333; CHECK-NEXT:    movq (%rdi), %rax
334; CHECK-NEXT:    movq 8(%rdi), %rdx
335; CHECK-NEXT:    .p2align 4, 0x90
336; CHECK-NEXT:  LBB5_1: ## %atomicrmw.start
337; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
338; CHECK-NEXT:    movq %rax, %rbx
339; CHECK-NEXT:    subq %rsi, %rbx
340; CHECK-NEXT:    movq %rdx, %rcx
341; CHECK-NEXT:    sbbq %r8, %rcx
342; CHECK-NEXT:    lock cmpxchg16b (%rdi)
343; CHECK-NEXT:    jne LBB5_1
344; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
345; CHECK-NEXT:    movq %rax, {{.*}}(%rip)
346; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
347; CHECK-NEXT:    popq %rbx
348; CHECK-NEXT:    retq
349;
350; CHECK32-LABEL: fetch_and_sub:
351; CHECK32:       # %bb.0:
352; CHECK32-NEXT:    pushl %esi
353; CHECK32-NEXT:    .cfi_def_cfa_offset 8
354; CHECK32-NEXT:    subl $24, %esp
355; CHECK32-NEXT:    .cfi_def_cfa_offset 32
356; CHECK32-NEXT:    .cfi_offset %esi, -8
357; CHECK32-NEXT:    subl $8, %esp
358; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
359; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
360; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
361; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
362; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
363; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
364; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
365; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
366; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
367; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
368; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
369; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
370; CHECK32-NEXT:    pushl %eax
371; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
372; CHECK32-NEXT:    calll __sync_fetch_and_sub_16
373; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
374; CHECK32-NEXT:    addl $28, %esp
375; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
376; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
377; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
378; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
379; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
380; CHECK32-NEXT:    movl %esi, var+8
381; CHECK32-NEXT:    movl %edx, var+12
382; CHECK32-NEXT:    movl %eax, var
383; CHECK32-NEXT:    movl %ecx, var+4
384; CHECK32-NEXT:    addl $24, %esp
385; CHECK32-NEXT:    .cfi_def_cfa_offset 8
386; CHECK32-NEXT:    popl %esi
387; CHECK32-NEXT:    .cfi_def_cfa_offset 4
388; CHECK32-NEXT:    retl
389  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
390  store i128 %val, i128* @var, align 16
391  ret void
392}
393
394define void @fetch_and_min(i128* %p, i128 %bits) {
395; CHECK-LABEL: fetch_and_min:
396; CHECK:       ## %bb.0:
397; CHECK-NEXT:    pushq %rbx
398; CHECK-NEXT:    .cfi_def_cfa_offset 16
399; CHECK-NEXT:    .cfi_offset %rbx, -16
400; CHECK-NEXT:    movq %rdx, %r8
401; CHECK-NEXT:    movq (%rdi), %rax
402; CHECK-NEXT:    movq 8(%rdi), %rdx
403; CHECK-NEXT:    .p2align 4, 0x90
404; CHECK-NEXT:  LBB6_1: ## %atomicrmw.start
405; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
406; CHECK-NEXT:    cmpq %rax, %rsi
407; CHECK-NEXT:    movq %r8, %rcx
408; CHECK-NEXT:    sbbq %rdx, %rcx
409; CHECK-NEXT:    movq %r8, %rcx
410; CHECK-NEXT:    cmovgeq %rdx, %rcx
411; CHECK-NEXT:    movq %rsi, %rbx
412; CHECK-NEXT:    cmovgeq %rax, %rbx
413; CHECK-NEXT:    lock cmpxchg16b (%rdi)
414; CHECK-NEXT:    jne LBB6_1
415; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
416; CHECK-NEXT:    movq %rax, {{.*}}(%rip)
417; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
418; CHECK-NEXT:    popq %rbx
419; CHECK-NEXT:    retq
420;
421; CHECK32-LABEL: fetch_and_min:
422; CHECK32:       # %bb.0:
423; CHECK32-NEXT:    pushl %esi
424; CHECK32-NEXT:    .cfi_def_cfa_offset 8
425; CHECK32-NEXT:    subl $24, %esp
426; CHECK32-NEXT:    .cfi_def_cfa_offset 32
427; CHECK32-NEXT:    .cfi_offset %esi, -8
428; CHECK32-NEXT:    subl $8, %esp
429; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
430; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
431; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
432; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
433; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
434; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
435; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
436; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
437; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
438; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
439; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
440; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
441; CHECK32-NEXT:    pushl %eax
442; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
443; CHECK32-NEXT:    calll __sync_fetch_and_min_16
444; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
445; CHECK32-NEXT:    addl $28, %esp
446; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
447; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
448; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
449; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
450; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
451; CHECK32-NEXT:    movl %esi, var+8
452; CHECK32-NEXT:    movl %edx, var+12
453; CHECK32-NEXT:    movl %eax, var
454; CHECK32-NEXT:    movl %ecx, var+4
455; CHECK32-NEXT:    addl $24, %esp
456; CHECK32-NEXT:    .cfi_def_cfa_offset 8
457; CHECK32-NEXT:    popl %esi
458; CHECK32-NEXT:    .cfi_def_cfa_offset 4
459; CHECK32-NEXT:    retl
460  %val = atomicrmw min i128* %p, i128 %bits seq_cst
461  store i128 %val, i128* @var, align 16
462  ret void
463}
464
465define void @fetch_and_max(i128* %p, i128 %bits) {
466; CHECK-LABEL: fetch_and_max:
467; CHECK:       ## %bb.0:
468; CHECK-NEXT:    pushq %rbx
469; CHECK-NEXT:    .cfi_def_cfa_offset 16
470; CHECK-NEXT:    .cfi_offset %rbx, -16
471; CHECK-NEXT:    movq %rdx, %r8
472; CHECK-NEXT:    movq (%rdi), %rax
473; CHECK-NEXT:    movq 8(%rdi), %rdx
474; CHECK-NEXT:    .p2align 4, 0x90
475; CHECK-NEXT:  LBB7_1: ## %atomicrmw.start
476; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
477; CHECK-NEXT:    cmpq %rax, %rsi
478; CHECK-NEXT:    movq %r8, %rcx
479; CHECK-NEXT:    sbbq %rdx, %rcx
480; CHECK-NEXT:    movq %r8, %rcx
481; CHECK-NEXT:    cmovlq %rdx, %rcx
482; CHECK-NEXT:    movq %rsi, %rbx
483; CHECK-NEXT:    cmovlq %rax, %rbx
484; CHECK-NEXT:    lock cmpxchg16b (%rdi)
485; CHECK-NEXT:    jne LBB7_1
486; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
487; CHECK-NEXT:    movq %rax, {{.*}}(%rip)
488; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
489; CHECK-NEXT:    popq %rbx
490; CHECK-NEXT:    retq
491;
492; CHECK32-LABEL: fetch_and_max:
493; CHECK32:       # %bb.0:
494; CHECK32-NEXT:    pushl %esi
495; CHECK32-NEXT:    .cfi_def_cfa_offset 8
496; CHECK32-NEXT:    subl $24, %esp
497; CHECK32-NEXT:    .cfi_def_cfa_offset 32
498; CHECK32-NEXT:    .cfi_offset %esi, -8
499; CHECK32-NEXT:    subl $8, %esp
500; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
501; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
502; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
503; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
504; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
505; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
506; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
507; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
508; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
509; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
510; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
511; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
512; CHECK32-NEXT:    pushl %eax
513; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
514; CHECK32-NEXT:    calll __sync_fetch_and_max_16
515; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
516; CHECK32-NEXT:    addl $28, %esp
517; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
518; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
519; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
520; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
521; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
522; CHECK32-NEXT:    movl %esi, var+8
523; CHECK32-NEXT:    movl %edx, var+12
524; CHECK32-NEXT:    movl %eax, var
525; CHECK32-NEXT:    movl %ecx, var+4
526; CHECK32-NEXT:    addl $24, %esp
527; CHECK32-NEXT:    .cfi_def_cfa_offset 8
528; CHECK32-NEXT:    popl %esi
529; CHECK32-NEXT:    .cfi_def_cfa_offset 4
530; CHECK32-NEXT:    retl
531  %val = atomicrmw max i128* %p, i128 %bits seq_cst
532  store i128 %val, i128* @var, align 16
533  ret void
534}
535
536define void @fetch_and_umin(i128* %p, i128 %bits) {
537; CHECK-LABEL: fetch_and_umin:
538; CHECK:       ## %bb.0:
539; CHECK-NEXT:    pushq %rbx
540; CHECK-NEXT:    .cfi_def_cfa_offset 16
541; CHECK-NEXT:    .cfi_offset %rbx, -16
542; CHECK-NEXT:    movq %rdx, %r8
543; CHECK-NEXT:    movq (%rdi), %rax
544; CHECK-NEXT:    movq 8(%rdi), %rdx
545; CHECK-NEXT:    .p2align 4, 0x90
546; CHECK-NEXT:  LBB8_1: ## %atomicrmw.start
547; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
548; CHECK-NEXT:    cmpq %rax, %rsi
549; CHECK-NEXT:    movq %r8, %rcx
550; CHECK-NEXT:    sbbq %rdx, %rcx
551; CHECK-NEXT:    movq %r8, %rcx
552; CHECK-NEXT:    cmovaeq %rdx, %rcx
553; CHECK-NEXT:    movq %rsi, %rbx
554; CHECK-NEXT:    cmovaeq %rax, %rbx
555; CHECK-NEXT:    lock cmpxchg16b (%rdi)
556; CHECK-NEXT:    jne LBB8_1
557; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
558; CHECK-NEXT:    movq %rax, {{.*}}(%rip)
559; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
560; CHECK-NEXT:    popq %rbx
561; CHECK-NEXT:    retq
562;
563; CHECK32-LABEL: fetch_and_umin:
564; CHECK32:       # %bb.0:
565; CHECK32-NEXT:    pushl %esi
566; CHECK32-NEXT:    .cfi_def_cfa_offset 8
567; CHECK32-NEXT:    subl $24, %esp
568; CHECK32-NEXT:    .cfi_def_cfa_offset 32
569; CHECK32-NEXT:    .cfi_offset %esi, -8
570; CHECK32-NEXT:    subl $8, %esp
571; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
572; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
573; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
574; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
575; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
576; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
577; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
578; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
579; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
580; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
581; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
582; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
583; CHECK32-NEXT:    pushl %eax
584; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
585; CHECK32-NEXT:    calll __sync_fetch_and_umin_16
586; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
587; CHECK32-NEXT:    addl $28, %esp
588; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
589; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
590; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
591; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
592; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
593; CHECK32-NEXT:    movl %esi, var+8
594; CHECK32-NEXT:    movl %edx, var+12
595; CHECK32-NEXT:    movl %eax, var
596; CHECK32-NEXT:    movl %ecx, var+4
597; CHECK32-NEXT:    addl $24, %esp
598; CHECK32-NEXT:    .cfi_def_cfa_offset 8
599; CHECK32-NEXT:    popl %esi
600; CHECK32-NEXT:    .cfi_def_cfa_offset 4
601; CHECK32-NEXT:    retl
602  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
603  store i128 %val, i128* @var, align 16
604  ret void
605}
606
607define void @fetch_and_umax(i128* %p, i128 %bits) {
608; CHECK-LABEL: fetch_and_umax:
609; CHECK:       ## %bb.0:
610; CHECK-NEXT:    pushq %rbx
611; CHECK-NEXT:    .cfi_def_cfa_offset 16
612; CHECK-NEXT:    .cfi_offset %rbx, -16
613; CHECK-NEXT:    movq %rdx, %r8
614; CHECK-NEXT:    movq (%rdi), %rax
615; CHECK-NEXT:    movq 8(%rdi), %rdx
616; CHECK-NEXT:    .p2align 4, 0x90
617; CHECK-NEXT:  LBB9_1: ## %atomicrmw.start
618; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
619; CHECK-NEXT:    cmpq %rax, %rsi
620; CHECK-NEXT:    movq %r8, %rcx
621; CHECK-NEXT:    sbbq %rdx, %rcx
622; CHECK-NEXT:    movq %r8, %rcx
623; CHECK-NEXT:    cmovbq %rdx, %rcx
624; CHECK-NEXT:    movq %rsi, %rbx
625; CHECK-NEXT:    cmovbq %rax, %rbx
626; CHECK-NEXT:    lock cmpxchg16b (%rdi)
627; CHECK-NEXT:    jne LBB9_1
628; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
629; CHECK-NEXT:    movq %rax, {{.*}}(%rip)
630; CHECK-NEXT:    movq %rdx, _var+{{.*}}(%rip)
631; CHECK-NEXT:    popq %rbx
632; CHECK-NEXT:    retq
633;
634; CHECK32-LABEL: fetch_and_umax:
635; CHECK32:       # %bb.0:
636; CHECK32-NEXT:    pushl %esi
637; CHECK32-NEXT:    .cfi_def_cfa_offset 8
638; CHECK32-NEXT:    subl $24, %esp
639; CHECK32-NEXT:    .cfi_def_cfa_offset 32
640; CHECK32-NEXT:    .cfi_offset %esi, -8
641; CHECK32-NEXT:    subl $8, %esp
642; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
643; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
644; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
645; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
646; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
647; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
648; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
649; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
650; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
651; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
652; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
653; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
654; CHECK32-NEXT:    pushl %eax
655; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
656; CHECK32-NEXT:    calll __sync_fetch_and_umax_16
657; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
658; CHECK32-NEXT:    addl $28, %esp
659; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
660; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
661; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
662; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
663; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
664; CHECK32-NEXT:    movl %esi, var+8
665; CHECK32-NEXT:    movl %edx, var+12
666; CHECK32-NEXT:    movl %eax, var
667; CHECK32-NEXT:    movl %ecx, var+4
668; CHECK32-NEXT:    addl $24, %esp
669; CHECK32-NEXT:    .cfi_def_cfa_offset 8
670; CHECK32-NEXT:    popl %esi
671; CHECK32-NEXT:    .cfi_def_cfa_offset 4
672; CHECK32-NEXT:    retl
673  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
674  store i128 %val, i128* @var, align 16
675  ret void
676}
677
678define i128 @atomic_load_seq_cst(i128* %p) {
679; CHECK-LABEL: atomic_load_seq_cst:
680; CHECK:       ## %bb.0:
681; CHECK-NEXT:    pushq %rbx
682; CHECK-NEXT:    .cfi_def_cfa_offset 16
683; CHECK-NEXT:    .cfi_offset %rbx, -16
684; CHECK-NEXT:    xorl %eax, %eax
685; CHECK-NEXT:    xorl %edx, %edx
686; CHECK-NEXT:    xorl %ecx, %ecx
687; CHECK-NEXT:    xorl %ebx, %ebx
688; CHECK-NEXT:    lock cmpxchg16b (%rdi)
689; CHECK-NEXT:    popq %rbx
690; CHECK-NEXT:    retq
691;
692; CHECK32-LABEL: atomic_load_seq_cst:
693; CHECK32:       # %bb.0:
694; CHECK32-NEXT:    pushl %edi
695; CHECK32-NEXT:    .cfi_def_cfa_offset 8
696; CHECK32-NEXT:    pushl %esi
697; CHECK32-NEXT:    .cfi_def_cfa_offset 12
698; CHECK32-NEXT:    subl $20, %esp
699; CHECK32-NEXT:    .cfi_def_cfa_offset 32
700; CHECK32-NEXT:    .cfi_offset %esi, -12
701; CHECK32-NEXT:    .cfi_offset %edi, -8
702; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
703; CHECK32-NEXT:    subl $8, %esp
704; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
705; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
706; CHECK32-NEXT:    pushl $0
707; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
708; CHECK32-NEXT:    pushl $0
709; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
710; CHECK32-NEXT:    pushl $0
711; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
712; CHECK32-NEXT:    pushl $0
713; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
714; CHECK32-NEXT:    pushl $0
715; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
716; CHECK32-NEXT:    pushl $0
717; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
718; CHECK32-NEXT:    pushl $0
719; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
720; CHECK32-NEXT:    pushl $0
721; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
722; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
723; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
724; CHECK32-NEXT:    pushl %eax
725; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
726; CHECK32-NEXT:    calll __sync_val_compare_and_swap_16
727; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
728; CHECK32-NEXT:    addl $44, %esp
729; CHECK32-NEXT:    .cfi_adjust_cfa_offset -44
730; CHECK32-NEXT:    movl (%esp), %eax
731; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
732; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
733; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi
734; CHECK32-NEXT:    movl %edi, 8(%esi)
735; CHECK32-NEXT:    movl %edx, 12(%esi)
736; CHECK32-NEXT:    movl %eax, (%esi)
737; CHECK32-NEXT:    movl %ecx, 4(%esi)
738; CHECK32-NEXT:    movl %esi, %eax
739; CHECK32-NEXT:    addl $20, %esp
740; CHECK32-NEXT:    .cfi_def_cfa_offset 12
741; CHECK32-NEXT:    popl %esi
742; CHECK32-NEXT:    .cfi_def_cfa_offset 8
743; CHECK32-NEXT:    popl %edi
744; CHECK32-NEXT:    .cfi_def_cfa_offset 4
745; CHECK32-NEXT:    retl $4
746   %r = load atomic i128, i128* %p seq_cst, align 16
747   ret i128 %r
748}
749
750define i128 @atomic_load_relaxed(i128* %p) {
751; CHECK-LABEL: atomic_load_relaxed:
752; CHECK:       ## %bb.0:
753; CHECK-NEXT:    pushq %rbx
754; CHECK-NEXT:    .cfi_def_cfa_offset 16
755; CHECK-NEXT:    .cfi_offset %rbx, -16
756; CHECK-NEXT:    xorl %eax, %eax
757; CHECK-NEXT:    xorl %edx, %edx
758; CHECK-NEXT:    xorl %ecx, %ecx
759; CHECK-NEXT:    xorl %ebx, %ebx
760; CHECK-NEXT:    lock cmpxchg16b (%rdi)
761; CHECK-NEXT:    popq %rbx
762; CHECK-NEXT:    retq
763;
764; CHECK32-LABEL: atomic_load_relaxed:
765; CHECK32:       # %bb.0:
766; CHECK32-NEXT:    pushl %edi
767; CHECK32-NEXT:    .cfi_def_cfa_offset 8
768; CHECK32-NEXT:    pushl %esi
769; CHECK32-NEXT:    .cfi_def_cfa_offset 12
770; CHECK32-NEXT:    subl $20, %esp
771; CHECK32-NEXT:    .cfi_def_cfa_offset 32
772; CHECK32-NEXT:    .cfi_offset %esi, -12
773; CHECK32-NEXT:    .cfi_offset %edi, -8
774; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi
775; CHECK32-NEXT:    subl $8, %esp
776; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
777; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
778; CHECK32-NEXT:    pushl $0
779; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
780; CHECK32-NEXT:    pushl $0
781; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
782; CHECK32-NEXT:    pushl $0
783; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
784; CHECK32-NEXT:    pushl $0
785; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
786; CHECK32-NEXT:    pushl $0
787; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
788; CHECK32-NEXT:    pushl $0
789; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
790; CHECK32-NEXT:    pushl $0
791; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
792; CHECK32-NEXT:    pushl $0
793; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
794; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
795; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
796; CHECK32-NEXT:    pushl %eax
797; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
798; CHECK32-NEXT:    calll __sync_val_compare_and_swap_16
799; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
800; CHECK32-NEXT:    addl $44, %esp
801; CHECK32-NEXT:    .cfi_adjust_cfa_offset -44
802; CHECK32-NEXT:    movl (%esp), %eax
803; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
804; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx
805; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi
806; CHECK32-NEXT:    movl %edi, 8(%esi)
807; CHECK32-NEXT:    movl %edx, 12(%esi)
808; CHECK32-NEXT:    movl %eax, (%esi)
809; CHECK32-NEXT:    movl %ecx, 4(%esi)
810; CHECK32-NEXT:    movl %esi, %eax
811; CHECK32-NEXT:    addl $20, %esp
812; CHECK32-NEXT:    .cfi_def_cfa_offset 12
813; CHECK32-NEXT:    popl %esi
814; CHECK32-NEXT:    .cfi_def_cfa_offset 8
815; CHECK32-NEXT:    popl %edi
816; CHECK32-NEXT:    .cfi_def_cfa_offset 4
817; CHECK32-NEXT:    retl $4
818   %r = load atomic i128, i128* %p monotonic, align 16
819   ret i128 %r
820}
821
822define void @atomic_store_seq_cst(i128* %p, i128 %in) {
823; CHECK-LABEL: atomic_store_seq_cst:
824; CHECK:       ## %bb.0:
825; CHECK-NEXT:    pushq %rbx
826; CHECK-NEXT:    .cfi_def_cfa_offset 16
827; CHECK-NEXT:    .cfi_offset %rbx, -16
828; CHECK-NEXT:    movq %rdx, %rcx
829; CHECK-NEXT:    movq %rsi, %rbx
830; CHECK-NEXT:    movq (%rdi), %rax
831; CHECK-NEXT:    movq 8(%rdi), %rdx
832; CHECK-NEXT:    .p2align 4, 0x90
833; CHECK-NEXT:  LBB12_1: ## %atomicrmw.start
834; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
835; CHECK-NEXT:    lock cmpxchg16b (%rdi)
836; CHECK-NEXT:    jne LBB12_1
837; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
838; CHECK-NEXT:    popq %rbx
839; CHECK-NEXT:    retq
840;
841; CHECK32-LABEL: atomic_store_seq_cst:
842; CHECK32:       # %bb.0:
843; CHECK32-NEXT:    subl $36, %esp
844; CHECK32-NEXT:    .cfi_adjust_cfa_offset 36
845; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
846; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
847; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
848; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
849; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
850; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
851; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
852; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
853; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
854; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
855; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
856; CHECK32-NEXT:    pushl %eax
857; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
858; CHECK32-NEXT:    calll __sync_lock_test_and_set_16
859; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
860; CHECK32-NEXT:    addl $56, %esp
861; CHECK32-NEXT:    .cfi_adjust_cfa_offset -56
862; CHECK32-NEXT:    retl
863   store atomic i128 %in, i128* %p seq_cst, align 16
864   ret void
865}
866
867define void @atomic_store_release(i128* %p, i128 %in) {
868; CHECK-LABEL: atomic_store_release:
869; CHECK:       ## %bb.0:
870; CHECK-NEXT:    pushq %rbx
871; CHECK-NEXT:    .cfi_def_cfa_offset 16
872; CHECK-NEXT:    .cfi_offset %rbx, -16
873; CHECK-NEXT:    movq %rdx, %rcx
874; CHECK-NEXT:    movq %rsi, %rbx
875; CHECK-NEXT:    movq (%rdi), %rax
876; CHECK-NEXT:    movq 8(%rdi), %rdx
877; CHECK-NEXT:    .p2align 4, 0x90
878; CHECK-NEXT:  LBB13_1: ## %atomicrmw.start
879; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
880; CHECK-NEXT:    lock cmpxchg16b (%rdi)
881; CHECK-NEXT:    jne LBB13_1
882; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
883; CHECK-NEXT:    popq %rbx
884; CHECK-NEXT:    retq
885;
886; CHECK32-LABEL: atomic_store_release:
887; CHECK32:       # %bb.0:
888; CHECK32-NEXT:    subl $36, %esp
889; CHECK32-NEXT:    .cfi_adjust_cfa_offset 36
890; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
891; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
892; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
893; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
894; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
895; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
896; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
897; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
898; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
899; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
900; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
901; CHECK32-NEXT:    pushl %eax
902; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
903; CHECK32-NEXT:    calll __sync_lock_test_and_set_16
904; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
905; CHECK32-NEXT:    addl $56, %esp
906; CHECK32-NEXT:    .cfi_adjust_cfa_offset -56
907; CHECK32-NEXT:    retl
908   store atomic i128 %in, i128* %p release, align 16
909   ret void
910}
911
912define void @atomic_store_relaxed(i128* %p, i128 %in) {
913; CHECK-LABEL: atomic_store_relaxed:
914; CHECK:       ## %bb.0:
915; CHECK-NEXT:    pushq %rbx
916; CHECK-NEXT:    .cfi_def_cfa_offset 16
917; CHECK-NEXT:    .cfi_offset %rbx, -16
918; CHECK-NEXT:    movq %rdx, %rcx
919; CHECK-NEXT:    movq %rsi, %rbx
920; CHECK-NEXT:    movq (%rdi), %rax
921; CHECK-NEXT:    movq 8(%rdi), %rdx
922; CHECK-NEXT:    .p2align 4, 0x90
923; CHECK-NEXT:  LBB14_1: ## %atomicrmw.start
924; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
925; CHECK-NEXT:    lock cmpxchg16b (%rdi)
926; CHECK-NEXT:    jne LBB14_1
927; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
928; CHECK-NEXT:    popq %rbx
929; CHECK-NEXT:    retq
930;
931; CHECK32-LABEL: atomic_store_relaxed:
932; CHECK32:       # %bb.0:
933; CHECK32-NEXT:    subl $36, %esp
934; CHECK32-NEXT:    .cfi_adjust_cfa_offset 36
935; CHECK32-NEXT:    leal {{[0-9]+}}(%esp), %eax
936; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
937; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
938; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
939; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
940; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
941; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
942; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
943; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
944; CHECK32-NEXT:    pushl {{[0-9]+}}(%esp)
945; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
946; CHECK32-NEXT:    pushl %eax
947; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
948; CHECK32-NEXT:    calll __sync_lock_test_and_set_16
949; CHECK32-NEXT:    .cfi_adjust_cfa_offset -4
950; CHECK32-NEXT:    addl $56, %esp
951; CHECK32-NEXT:    .cfi_adjust_cfa_offset -56
952; CHECK32-NEXT:    retl
953   store atomic i128 %in, i128* %p unordered, align 16
954   ret void
955}
956
957
958