1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86-NOSSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE1
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE2
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX
10
11; ----- FADD -----
12
13define void @fadd_32r(float* %loc, float %val) nounwind {
14; X86-NOSSE-LABEL: fadd_32r:
15; X86-NOSSE:       # %bb.0:
16; X86-NOSSE-NEXT:    subl $8, %esp
17; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
18; X86-NOSSE-NEXT:    movl (%eax), %ecx
19; X86-NOSSE-NEXT:    movl %ecx, (%esp)
20; X86-NOSSE-NEXT:    flds (%esp)
21; X86-NOSSE-NEXT:    fadds {{[0-9]+}}(%esp)
22; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
23; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
24; X86-NOSSE-NEXT:    movl %ecx, (%eax)
25; X86-NOSSE-NEXT:    addl $8, %esp
26; X86-NOSSE-NEXT:    retl
27;
28; X86-SSE1-LABEL: fadd_32r:
29; X86-SSE1:       # %bb.0:
30; X86-SSE1-NEXT:    subl $8, %esp
31; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
32; X86-SSE1-NEXT:    movl (%eax), %ecx
33; X86-SSE1-NEXT:    movl %ecx, (%esp)
34; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
35; X86-SSE1-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
36; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
37; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
38; X86-SSE1-NEXT:    movl %ecx, (%eax)
39; X86-SSE1-NEXT:    addl $8, %esp
40; X86-SSE1-NEXT:    retl
41;
42; X86-SSE2-LABEL: fadd_32r:
43; X86-SSE2:       # %bb.0:
44; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
45; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
46; X86-SSE2-NEXT:    addss (%eax), %xmm0
47; X86-SSE2-NEXT:    movss %xmm0, (%eax)
48; X86-SSE2-NEXT:    retl
49;
50; X86-AVX-LABEL: fadd_32r:
51; X86-AVX:       # %bb.0:
52; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
53; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
54; X86-AVX-NEXT:    vaddss (%eax), %xmm0, %xmm0
55; X86-AVX-NEXT:    vmovss %xmm0, (%eax)
56; X86-AVX-NEXT:    retl
57;
58; X64-SSE-LABEL: fadd_32r:
59; X64-SSE:       # %bb.0:
60; X64-SSE-NEXT:    addss (%rdi), %xmm0
61; X64-SSE-NEXT:    movss %xmm0, (%rdi)
62; X64-SSE-NEXT:    retq
63;
64; X64-AVX-LABEL: fadd_32r:
65; X64-AVX:       # %bb.0:
66; X64-AVX-NEXT:    vaddss (%rdi), %xmm0, %xmm0
67; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
68; X64-AVX-NEXT:    retq
69  %floc = bitcast float* %loc to i32*
70  %1 = load atomic i32, i32* %floc seq_cst, align 4
71  %2 = bitcast i32 %1 to float
72  %add = fadd float %2, %val
73  %3 = bitcast float %add to i32
74  store atomic i32 %3, i32* %floc release, align 4
75  ret void
76}
77
78define void @fadd_64r(double* %loc, double %val) nounwind {
79; X86-NOSSE-LABEL: fadd_64r:
80; X86-NOSSE:       # %bb.0:
81; X86-NOSSE-NEXT:    pushl %ebp
82; X86-NOSSE-NEXT:    movl %esp, %ebp
83; X86-NOSSE-NEXT:    andl $-8, %esp
84; X86-NOSSE-NEXT:    subl $32, %esp
85; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
86; X86-NOSSE-NEXT:    fildll (%eax)
87; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
88; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
89; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
90; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
91; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
92; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
93; X86-NOSSE-NEXT:    faddl 12(%ebp)
94; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
95; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
96; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
97; X86-NOSSE-NEXT:    movl %ecx, (%esp)
98; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
99; X86-NOSSE-NEXT:    fildll (%esp)
100; X86-NOSSE-NEXT:    fistpll (%eax)
101; X86-NOSSE-NEXT:    movl %ebp, %esp
102; X86-NOSSE-NEXT:    popl %ebp
103; X86-NOSSE-NEXT:    retl
104;
105; X86-SSE1-LABEL: fadd_64r:
106; X86-SSE1:       # %bb.0:
107; X86-SSE1-NEXT:    pushl %ebp
108; X86-SSE1-NEXT:    movl %esp, %ebp
109; X86-SSE1-NEXT:    andl $-8, %esp
110; X86-SSE1-NEXT:    subl $16, %esp
111; X86-SSE1-NEXT:    movl 8(%ebp), %eax
112; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
113; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
114; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
115; X86-SSE1-NEXT:    movss %xmm1, (%esp)
116; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
117; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
118; X86-SSE1-NEXT:    fldl (%esp)
119; X86-SSE1-NEXT:    faddl 12(%ebp)
120; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
121; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
122; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
123; X86-SSE1-NEXT:    movl %ebp, %esp
124; X86-SSE1-NEXT:    popl %ebp
125; X86-SSE1-NEXT:    retl
126;
127; X86-SSE2-LABEL: fadd_64r:
128; X86-SSE2:       # %bb.0:
129; X86-SSE2-NEXT:    pushl %ebp
130; X86-SSE2-NEXT:    movl %esp, %ebp
131; X86-SSE2-NEXT:    andl $-8, %esp
132; X86-SSE2-NEXT:    subl $8, %esp
133; X86-SSE2-NEXT:    movl 8(%ebp), %eax
134; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
135; X86-SSE2-NEXT:    addsd 12(%ebp), %xmm0
136; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
137; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
138; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
139; X86-SSE2-NEXT:    movl %ebp, %esp
140; X86-SSE2-NEXT:    popl %ebp
141; X86-SSE2-NEXT:    retl
142;
143; X86-AVX-LABEL: fadd_64r:
144; X86-AVX:       # %bb.0:
145; X86-AVX-NEXT:    pushl %ebp
146; X86-AVX-NEXT:    movl %esp, %ebp
147; X86-AVX-NEXT:    andl $-8, %esp
148; X86-AVX-NEXT:    subl $8, %esp
149; X86-AVX-NEXT:    movl 8(%ebp), %eax
150; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
151; X86-AVX-NEXT:    vaddsd 12(%ebp), %xmm0, %xmm0
152; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
153; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
154; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
155; X86-AVX-NEXT:    movl %ebp, %esp
156; X86-AVX-NEXT:    popl %ebp
157; X86-AVX-NEXT:    retl
158;
159; X64-SSE-LABEL: fadd_64r:
160; X64-SSE:       # %bb.0:
161; X64-SSE-NEXT:    addsd (%rdi), %xmm0
162; X64-SSE-NEXT:    movsd %xmm0, (%rdi)
163; X64-SSE-NEXT:    retq
164;
165; X64-AVX-LABEL: fadd_64r:
166; X64-AVX:       # %bb.0:
167; X64-AVX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0
168; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
169; X64-AVX-NEXT:    retq
170  %floc = bitcast double* %loc to i64*
171  %1 = load atomic i64, i64* %floc seq_cst, align 8
172  %2 = bitcast i64 %1 to double
173  %add = fadd double %2, %val
174  %3 = bitcast double %add to i64
175  store atomic i64 %3, i64* %floc release, align 8
176  ret void
177}
178
179@glob32 = global float 0.000000e+00, align 4
180@glob64 = global double 0.000000e+00, align 8
181
182; Floating-point add to a global using an immediate.
183define void @fadd_32g() nounwind {
184; X86-NOSSE-LABEL: fadd_32g:
185; X86-NOSSE:       # %bb.0:
186; X86-NOSSE-NEXT:    subl $8, %esp
187; X86-NOSSE-NEXT:    movl glob32, %eax
188; X86-NOSSE-NEXT:    movl %eax, (%esp)
189; X86-NOSSE-NEXT:    fld1
190; X86-NOSSE-NEXT:    fadds (%esp)
191; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
192; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
193; X86-NOSSE-NEXT:    movl %eax, glob32
194; X86-NOSSE-NEXT:    addl $8, %esp
195; X86-NOSSE-NEXT:    retl
196;
197; X86-SSE1-LABEL: fadd_32g:
198; X86-SSE1:       # %bb.0:
199; X86-SSE1-NEXT:    subl $8, %esp
200; X86-SSE1-NEXT:    movl glob32, %eax
201; X86-SSE1-NEXT:    movl %eax, (%esp)
202; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
203; X86-SSE1-NEXT:    addss {{\.LCPI.*}}, %xmm0
204; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
205; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
206; X86-SSE1-NEXT:    movl %eax, glob32
207; X86-SSE1-NEXT:    addl $8, %esp
208; X86-SSE1-NEXT:    retl
209;
210; X86-SSE2-LABEL: fadd_32g:
211; X86-SSE2:       # %bb.0:
212; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
213; X86-SSE2-NEXT:    addss glob32, %xmm0
214; X86-SSE2-NEXT:    movss %xmm0, glob32
215; X86-SSE2-NEXT:    retl
216;
217; X86-AVX-LABEL: fadd_32g:
218; X86-AVX:       # %bb.0:
219; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220; X86-AVX-NEXT:    vaddss glob32, %xmm0, %xmm0
221; X86-AVX-NEXT:    vmovss %xmm0, glob32
222; X86-AVX-NEXT:    retl
223;
224; X64-SSE-LABEL: fadd_32g:
225; X64-SSE:       # %bb.0:
226; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
227; X64-SSE-NEXT:    addss {{.*}}(%rip), %xmm0
228; X64-SSE-NEXT:    movss %xmm0, {{.*}}(%rip)
229; X64-SSE-NEXT:    retq
230;
231; X64-AVX-LABEL: fadd_32g:
232; X64-AVX:       # %bb.0:
233; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
234; X64-AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
235; X64-AVX-NEXT:    vmovss %xmm0, {{.*}}(%rip)
236; X64-AVX-NEXT:    retq
237  %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
238  %f = bitcast i32 %i to float
239  %add = fadd float %f, 1.000000e+00
240  %s = bitcast float %add to i32
241  store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
242  ret void
243}
244
245define void @fadd_64g() nounwind {
246; X86-NOSSE-LABEL: fadd_64g:
247; X86-NOSSE:       # %bb.0:
248; X86-NOSSE-NEXT:    pushl %ebp
249; X86-NOSSE-NEXT:    movl %esp, %ebp
250; X86-NOSSE-NEXT:    andl $-8, %esp
251; X86-NOSSE-NEXT:    subl $32, %esp
252; X86-NOSSE-NEXT:    fildll glob64
253; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
254; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
255; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
256; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
257; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
258; X86-NOSSE-NEXT:    fld1
259; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
260; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
261; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
262; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
263; X86-NOSSE-NEXT:    movl %eax, (%esp)
264; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
265; X86-NOSSE-NEXT:    fildll (%esp)
266; X86-NOSSE-NEXT:    fistpll glob64
267; X86-NOSSE-NEXT:    movl %ebp, %esp
268; X86-NOSSE-NEXT:    popl %ebp
269; X86-NOSSE-NEXT:    retl
270;
271; X86-SSE1-LABEL: fadd_64g:
272; X86-SSE1:       # %bb.0:
273; X86-SSE1-NEXT:    pushl %ebp
274; X86-SSE1-NEXT:    movl %esp, %ebp
275; X86-SSE1-NEXT:    andl $-8, %esp
276; X86-SSE1-NEXT:    subl $16, %esp
277; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
278; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
279; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
280; X86-SSE1-NEXT:    movss %xmm1, (%esp)
281; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
282; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
283; X86-SSE1-NEXT:    fld1
284; X86-SSE1-NEXT:    faddl (%esp)
285; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
286; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
287; X86-SSE1-NEXT:    movlps %xmm0, glob64
288; X86-SSE1-NEXT:    movl %ebp, %esp
289; X86-SSE1-NEXT:    popl %ebp
290; X86-SSE1-NEXT:    retl
291;
292; X86-SSE2-LABEL: fadd_64g:
293; X86-SSE2:       # %bb.0:
294; X86-SSE2-NEXT:    pushl %ebp
295; X86-SSE2-NEXT:    movl %esp, %ebp
296; X86-SSE2-NEXT:    andl $-8, %esp
297; X86-SSE2-NEXT:    subl $8, %esp
298; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
299; X86-SSE2-NEXT:    addsd {{\.LCPI.*}}, %xmm0
300; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
301; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
302; X86-SSE2-NEXT:    movlps %xmm0, glob64
303; X86-SSE2-NEXT:    movl %ebp, %esp
304; X86-SSE2-NEXT:    popl %ebp
305; X86-SSE2-NEXT:    retl
306;
307; X86-AVX-LABEL: fadd_64g:
308; X86-AVX:       # %bb.0:
309; X86-AVX-NEXT:    pushl %ebp
310; X86-AVX-NEXT:    movl %esp, %ebp
311; X86-AVX-NEXT:    andl $-8, %esp
312; X86-AVX-NEXT:    subl $8, %esp
313; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
314; X86-AVX-NEXT:    vaddsd {{\.LCPI.*}}, %xmm0, %xmm0
315; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
316; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
317; X86-AVX-NEXT:    vmovlps %xmm0, glob64
318; X86-AVX-NEXT:    movl %ebp, %esp
319; X86-AVX-NEXT:    popl %ebp
320; X86-AVX-NEXT:    retl
321;
322; X64-SSE-LABEL: fadd_64g:
323; X64-SSE:       # %bb.0:
324; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
325; X64-SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
326; X64-SSE-NEXT:    movsd %xmm0, {{.*}}(%rip)
327; X64-SSE-NEXT:    retq
328;
329; X64-AVX-LABEL: fadd_64g:
330; X64-AVX:       # %bb.0:
331; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
332; X64-AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
333; X64-AVX-NEXT:    vmovsd %xmm0, {{.*}}(%rip)
334; X64-AVX-NEXT:    retq
335  %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
336  %f = bitcast i64 %i to double
337  %add = fadd double %f, 1.000000e+00
338  %s = bitcast double %add to i64
339  store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
340  ret void
341}
342
343; Floating-point add to a hard-coded immediate location using an immediate.
344define void @fadd_32imm() nounwind {
345; X86-NOSSE-LABEL: fadd_32imm:
346; X86-NOSSE:       # %bb.0:
347; X86-NOSSE-NEXT:    subl $8, %esp
348; X86-NOSSE-NEXT:    movl -559038737, %eax
349; X86-NOSSE-NEXT:    movl %eax, (%esp)
350; X86-NOSSE-NEXT:    fld1
351; X86-NOSSE-NEXT:    fadds (%esp)
352; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
353; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
354; X86-NOSSE-NEXT:    movl %eax, -559038737
355; X86-NOSSE-NEXT:    addl $8, %esp
356; X86-NOSSE-NEXT:    retl
357;
358; X86-SSE1-LABEL: fadd_32imm:
359; X86-SSE1:       # %bb.0:
360; X86-SSE1-NEXT:    subl $8, %esp
361; X86-SSE1-NEXT:    movl -559038737, %eax
362; X86-SSE1-NEXT:    movl %eax, (%esp)
363; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
364; X86-SSE1-NEXT:    addss {{\.LCPI.*}}, %xmm0
365; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
366; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
367; X86-SSE1-NEXT:    movl %eax, -559038737
368; X86-SSE1-NEXT:    addl $8, %esp
369; X86-SSE1-NEXT:    retl
370;
371; X86-SSE2-LABEL: fadd_32imm:
372; X86-SSE2:       # %bb.0:
373; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
374; X86-SSE2-NEXT:    addss -559038737, %xmm0
375; X86-SSE2-NEXT:    movss %xmm0, -559038737
376; X86-SSE2-NEXT:    retl
377;
378; X86-AVX-LABEL: fadd_32imm:
379; X86-AVX:       # %bb.0:
380; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
381; X86-AVX-NEXT:    vaddss -559038737, %xmm0, %xmm0
382; X86-AVX-NEXT:    vmovss %xmm0, -559038737
383; X86-AVX-NEXT:    retl
384;
385; X64-SSE-LABEL: fadd_32imm:
386; X64-SSE:       # %bb.0:
387; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
388; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
389; X64-SSE-NEXT:    addss (%rax), %xmm0
390; X64-SSE-NEXT:    movss %xmm0, (%rax)
391; X64-SSE-NEXT:    retq
392;
393; X64-AVX-LABEL: fadd_32imm:
394; X64-AVX:       # %bb.0:
395; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
396; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
397; X64-AVX-NEXT:    vaddss (%rax), %xmm0, %xmm0
398; X64-AVX-NEXT:    vmovss %xmm0, (%rax)
399; X64-AVX-NEXT:    retq
400  %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
401  %f = bitcast i32 %i to float
402  %add = fadd float %f, 1.000000e+00
403  %s = bitcast float %add to i32
404  store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
405  ret void
406}
407
408define void @fadd_64imm() nounwind {
409; X86-NOSSE-LABEL: fadd_64imm:
410; X86-NOSSE:       # %bb.0:
411; X86-NOSSE-NEXT:    pushl %ebp
412; X86-NOSSE-NEXT:    movl %esp, %ebp
413; X86-NOSSE-NEXT:    andl $-8, %esp
414; X86-NOSSE-NEXT:    subl $32, %esp
415; X86-NOSSE-NEXT:    fildll -559038737
416; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
417; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
418; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
419; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
420; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
421; X86-NOSSE-NEXT:    fld1
422; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
423; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
424; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
425; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
426; X86-NOSSE-NEXT:    movl %eax, (%esp)
427; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
428; X86-NOSSE-NEXT:    fildll (%esp)
429; X86-NOSSE-NEXT:    fistpll -559038737
430; X86-NOSSE-NEXT:    movl %ebp, %esp
431; X86-NOSSE-NEXT:    popl %ebp
432; X86-NOSSE-NEXT:    retl
433;
434; X86-SSE1-LABEL: fadd_64imm:
435; X86-SSE1:       # %bb.0:
436; X86-SSE1-NEXT:    pushl %ebp
437; X86-SSE1-NEXT:    movl %esp, %ebp
438; X86-SSE1-NEXT:    andl $-8, %esp
439; X86-SSE1-NEXT:    subl $16, %esp
440; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
441; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
442; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
443; X86-SSE1-NEXT:    movss %xmm1, (%esp)
444; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
445; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
446; X86-SSE1-NEXT:    fld1
447; X86-SSE1-NEXT:    faddl (%esp)
448; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
449; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
450; X86-SSE1-NEXT:    movlps %xmm0, -559038737
451; X86-SSE1-NEXT:    movl %ebp, %esp
452; X86-SSE1-NEXT:    popl %ebp
453; X86-SSE1-NEXT:    retl
454;
455; X86-SSE2-LABEL: fadd_64imm:
456; X86-SSE2:       # %bb.0:
457; X86-SSE2-NEXT:    pushl %ebp
458; X86-SSE2-NEXT:    movl %esp, %ebp
459; X86-SSE2-NEXT:    andl $-8, %esp
460; X86-SSE2-NEXT:    subl $8, %esp
461; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
462; X86-SSE2-NEXT:    addsd {{\.LCPI.*}}, %xmm0
463; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
464; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
465; X86-SSE2-NEXT:    movlps %xmm0, -559038737
466; X86-SSE2-NEXT:    movl %ebp, %esp
467; X86-SSE2-NEXT:    popl %ebp
468; X86-SSE2-NEXT:    retl
469;
470; X86-AVX-LABEL: fadd_64imm:
471; X86-AVX:       # %bb.0:
472; X86-AVX-NEXT:    pushl %ebp
473; X86-AVX-NEXT:    movl %esp, %ebp
474; X86-AVX-NEXT:    andl $-8, %esp
475; X86-AVX-NEXT:    subl $8, %esp
476; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
477; X86-AVX-NEXT:    vaddsd {{\.LCPI.*}}, %xmm0, %xmm0
478; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
479; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
480; X86-AVX-NEXT:    vmovlps %xmm0, -559038737
481; X86-AVX-NEXT:    movl %ebp, %esp
482; X86-AVX-NEXT:    popl %ebp
483; X86-AVX-NEXT:    retl
484;
485; X64-SSE-LABEL: fadd_64imm:
486; X64-SSE:       # %bb.0:
487; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
488; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
489; X64-SSE-NEXT:    addsd (%rax), %xmm0
490; X64-SSE-NEXT:    movsd %xmm0, (%rax)
491; X64-SSE-NEXT:    retq
492;
493; X64-AVX-LABEL: fadd_64imm:
494; X64-AVX:       # %bb.0:
495; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
496; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
497; X64-AVX-NEXT:    vaddsd (%rax), %xmm0, %xmm0
498; X64-AVX-NEXT:    vmovsd %xmm0, (%rax)
499; X64-AVX-NEXT:    retq
500  %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
501  %f = bitcast i64 %i to double
502  %add = fadd double %f, 1.000000e+00
503  %s = bitcast double %add to i64
504  store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
505  ret void
506}
507
508; Floating-point add to a stack location.
509define void @fadd_32stack() nounwind {
510; X86-NOSSE-LABEL: fadd_32stack:
511; X86-NOSSE:       # %bb.0:
512; X86-NOSSE-NEXT:    subl $12, %esp
513; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
514; X86-NOSSE-NEXT:    movl %eax, (%esp)
515; X86-NOSSE-NEXT:    fld1
516; X86-NOSSE-NEXT:    fadds (%esp)
517; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
518; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
519; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
520; X86-NOSSE-NEXT:    addl $12, %esp
521; X86-NOSSE-NEXT:    retl
522;
523; X86-SSE1-LABEL: fadd_32stack:
524; X86-SSE1:       # %bb.0:
525; X86-SSE1-NEXT:    subl $12, %esp
526; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
527; X86-SSE1-NEXT:    movl %eax, (%esp)
528; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
529; X86-SSE1-NEXT:    addss {{\.LCPI.*}}, %xmm0
530; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
531; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
532; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
533; X86-SSE1-NEXT:    addl $12, %esp
534; X86-SSE1-NEXT:    retl
535;
536; X86-SSE2-LABEL: fadd_32stack:
537; X86-SSE2:       # %bb.0:
538; X86-SSE2-NEXT:    pushl %eax
539; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
540; X86-SSE2-NEXT:    addss (%esp), %xmm0
541; X86-SSE2-NEXT:    movss %xmm0, (%esp)
542; X86-SSE2-NEXT:    popl %eax
543; X86-SSE2-NEXT:    retl
544;
545; X86-AVX-LABEL: fadd_32stack:
546; X86-AVX:       # %bb.0:
547; X86-AVX-NEXT:    pushl %eax
548; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
549; X86-AVX-NEXT:    vaddss (%esp), %xmm0, %xmm0
550; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
551; X86-AVX-NEXT:    popl %eax
552; X86-AVX-NEXT:    retl
553;
554; X64-SSE-LABEL: fadd_32stack:
555; X64-SSE:       # %bb.0:
556; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
557; X64-SSE-NEXT:    addss -{{[0-9]+}}(%rsp), %xmm0
558; X64-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
559; X64-SSE-NEXT:    retq
560;
561; X64-AVX-LABEL: fadd_32stack:
562; X64-AVX:       # %bb.0:
563; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
564; X64-AVX-NEXT:    vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
565; X64-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
566; X64-AVX-NEXT:    retq
567  %ptr = alloca i32, align 4
568  %bc3 = bitcast i32* %ptr to float*
569  %load = load atomic i32, i32* %ptr acquire, align 4
570  %bc0 = bitcast i32 %load to float
571  %fadd = fadd float 1.000000e+00, %bc0
572  %bc1 = bitcast float %fadd to i32
573  store atomic i32 %bc1, i32* %ptr release, align 4
574  ret void
575}
576
577define void @fadd_64stack() nounwind {
578; X86-NOSSE-LABEL: fadd_64stack:
579; X86-NOSSE:       # %bb.0:
580; X86-NOSSE-NEXT:    pushl %ebp
581; X86-NOSSE-NEXT:    movl %esp, %ebp
582; X86-NOSSE-NEXT:    andl $-8, %esp
583; X86-NOSSE-NEXT:    subl $40, %esp
584; X86-NOSSE-NEXT:    fildll {{[0-9]+}}(%esp)
585; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
586; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
587; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
588; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
589; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
590; X86-NOSSE-NEXT:    fld1
591; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
592; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
593; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
594; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
595; X86-NOSSE-NEXT:    movl %eax, (%esp)
596; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
597; X86-NOSSE-NEXT:    fildll (%esp)
598; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
599; X86-NOSSE-NEXT:    movl %ebp, %esp
600; X86-NOSSE-NEXT:    popl %ebp
601; X86-NOSSE-NEXT:    retl
602;
603; X86-SSE1-LABEL: fadd_64stack:
604; X86-SSE1:       # %bb.0:
605; X86-SSE1-NEXT:    pushl %ebp
606; X86-SSE1-NEXT:    movl %esp, %ebp
607; X86-SSE1-NEXT:    andl $-8, %esp
608; X86-SSE1-NEXT:    subl $24, %esp
609; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
610; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
611; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
612; X86-SSE1-NEXT:    movss %xmm1, (%esp)
613; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
614; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
615; X86-SSE1-NEXT:    fld1
616; X86-SSE1-NEXT:    faddl (%esp)
617; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
618; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
619; X86-SSE1-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
620; X86-SSE1-NEXT:    movl %ebp, %esp
621; X86-SSE1-NEXT:    popl %ebp
622; X86-SSE1-NEXT:    retl
623;
624; X86-SSE2-LABEL: fadd_64stack:
625; X86-SSE2:       # %bb.0:
626; X86-SSE2-NEXT:    pushl %ebp
627; X86-SSE2-NEXT:    movl %esp, %ebp
628; X86-SSE2-NEXT:    andl $-8, %esp
629; X86-SSE2-NEXT:    subl $16, %esp
630; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
631; X86-SSE2-NEXT:    addsd {{\.LCPI.*}}, %xmm0
632; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
633; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
634; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
635; X86-SSE2-NEXT:    movl %ebp, %esp
636; X86-SSE2-NEXT:    popl %ebp
637; X86-SSE2-NEXT:    retl
638;
639; X86-AVX-LABEL: fadd_64stack:
640; X86-AVX:       # %bb.0:
641; X86-AVX-NEXT:    pushl %ebp
642; X86-AVX-NEXT:    movl %esp, %ebp
643; X86-AVX-NEXT:    andl $-8, %esp
644; X86-AVX-NEXT:    subl $16, %esp
645; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
646; X86-AVX-NEXT:    vaddsd {{\.LCPI.*}}, %xmm0, %xmm0
647; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
648; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
649; X86-AVX-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
650; X86-AVX-NEXT:    movl %ebp, %esp
651; X86-AVX-NEXT:    popl %ebp
652; X86-AVX-NEXT:    retl
653;
654; X64-SSE-LABEL: fadd_64stack:
655; X64-SSE:       # %bb.0:
656; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
657; X64-SSE-NEXT:    addsd -{{[0-9]+}}(%rsp), %xmm0
658; X64-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
659; X64-SSE-NEXT:    retq
660;
661; X64-AVX-LABEL: fadd_64stack:
662; X64-AVX:       # %bb.0:
663; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
664; X64-AVX-NEXT:    vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
665; X64-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
666; X64-AVX-NEXT:    retq
667  %ptr = alloca i64, align 8
668  %bc3 = bitcast i64* %ptr to double*
669  %load = load atomic i64, i64* %ptr acquire, align 8
670  %bc0 = bitcast i64 %load to double
671  %fadd = fadd double 1.000000e+00, %bc0
672  %bc1 = bitcast double %fadd to i64
673  store atomic i64 %bc1, i64* %ptr release, align 8
674  ret void
675}
676
677define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
678; X86-NOSSE-LABEL: fadd_array:
679; X86-NOSSE:       # %bb.0: # %bb
680; X86-NOSSE-NEXT:    pushl %ebp
681; X86-NOSSE-NEXT:    movl %esp, %ebp
682; X86-NOSSE-NEXT:    pushl %esi
683; X86-NOSSE-NEXT:    andl $-8, %esp
684; X86-NOSSE-NEXT:    subl $40, %esp
685; X86-NOSSE-NEXT:    movl 20(%ebp), %eax
686; X86-NOSSE-NEXT:    movl 8(%ebp), %ecx
687; X86-NOSSE-NEXT:    fildll (%ecx,%eax,8)
688; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
689; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
690; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
691; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
692; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
693; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
694; X86-NOSSE-NEXT:    faddl 12(%ebp)
695; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
696; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
697; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
698; X86-NOSSE-NEXT:    movl %edx, (%esp)
699; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
700; X86-NOSSE-NEXT:    fildll (%esp)
701; X86-NOSSE-NEXT:    fistpll (%ecx,%eax,8)
702; X86-NOSSE-NEXT:    leal -4(%ebp), %esp
703; X86-NOSSE-NEXT:    popl %esi
704; X86-NOSSE-NEXT:    popl %ebp
705; X86-NOSSE-NEXT:    retl
706;
707; X86-SSE1-LABEL: fadd_array:
708; X86-SSE1:       # %bb.0: # %bb
709; X86-SSE1-NEXT:    pushl %ebp
710; X86-SSE1-NEXT:    movl %esp, %ebp
711; X86-SSE1-NEXT:    andl $-8, %esp
712; X86-SSE1-NEXT:    subl $16, %esp
713; X86-SSE1-NEXT:    movl 20(%ebp), %eax
714; X86-SSE1-NEXT:    movl 8(%ebp), %ecx
715; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
716; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
717; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
718; X86-SSE1-NEXT:    movss %xmm1, (%esp)
719; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
720; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
721; X86-SSE1-NEXT:    fldl (%esp)
722; X86-SSE1-NEXT:    faddl 12(%ebp)
723; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
724; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
725; X86-SSE1-NEXT:    movlps %xmm0, (%ecx,%eax,8)
726; X86-SSE1-NEXT:    movl %ebp, %esp
727; X86-SSE1-NEXT:    popl %ebp
728; X86-SSE1-NEXT:    retl
729;
730; X86-SSE2-LABEL: fadd_array:
731; X86-SSE2:       # %bb.0: # %bb
732; X86-SSE2-NEXT:    pushl %ebp
733; X86-SSE2-NEXT:    movl %esp, %ebp
734; X86-SSE2-NEXT:    andl $-8, %esp
735; X86-SSE2-NEXT:    subl $8, %esp
736; X86-SSE2-NEXT:    movl 20(%ebp), %eax
737; X86-SSE2-NEXT:    movl 8(%ebp), %ecx
738; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
739; X86-SSE2-NEXT:    addsd 12(%ebp), %xmm0
740; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
741; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
742; X86-SSE2-NEXT:    movlps %xmm0, (%ecx,%eax,8)
743; X86-SSE2-NEXT:    movl %ebp, %esp
744; X86-SSE2-NEXT:    popl %ebp
745; X86-SSE2-NEXT:    retl
746;
747; X86-AVX-LABEL: fadd_array:
748; X86-AVX:       # %bb.0: # %bb
749; X86-AVX-NEXT:    pushl %ebp
750; X86-AVX-NEXT:    movl %esp, %ebp
751; X86-AVX-NEXT:    andl $-8, %esp
752; X86-AVX-NEXT:    subl $8, %esp
753; X86-AVX-NEXT:    movl 20(%ebp), %eax
754; X86-AVX-NEXT:    movl 8(%ebp), %ecx
755; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
756; X86-AVX-NEXT:    vaddsd 12(%ebp), %xmm0, %xmm0
757; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
758; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
759; X86-AVX-NEXT:    vmovlps %xmm0, (%ecx,%eax,8)
760; X86-AVX-NEXT:    movl %ebp, %esp
761; X86-AVX-NEXT:    popl %ebp
762; X86-AVX-NEXT:    retl
763;
764; X64-SSE-LABEL: fadd_array:
765; X64-SSE:       # %bb.0: # %bb
766; X64-SSE-NEXT:    addsd (%rdi,%rsi,8), %xmm0
767; X64-SSE-NEXT:    movsd %xmm0, (%rdi,%rsi,8)
768; X64-SSE-NEXT:    retq
769;
770; X64-AVX-LABEL: fadd_array:
771; X64-AVX:       # %bb.0: # %bb
772; X64-AVX-NEXT:    vaddsd (%rdi,%rsi,8), %xmm0, %xmm0
773; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi,%rsi,8)
774; X64-AVX-NEXT:    retq
775bb:
776  %tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2
777  %tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8
778  %tmp7 = bitcast i64 %tmp6 to double
779  %tmp8 = fadd double %tmp7, %arg1
780  %tmp9 = bitcast double %tmp8 to i64
781  store atomic i64 %tmp9, i64* %tmp4 monotonic, align 8
782  ret void
783}
784