1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2,-sse | FileCheck %s --check-prefix=X64
4
5define void @fadd_2f64_mem(<2 x double>* %p0, <2 x double>* %p1, <2 x double>* %p2) nounwind {
6; X32-LABEL: fadd_2f64_mem:
7; X32:       # %bb.0:
8; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
10; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
11; X32-NEXT:    fldl 8(%edx)
12; X32-NEXT:    fldl (%edx)
13; X32-NEXT:    faddl (%ecx)
14; X32-NEXT:    fxch %st(1)
15; X32-NEXT:    faddl 8(%ecx)
16; X32-NEXT:    fstpl 8(%eax)
17; X32-NEXT:    fstpl (%eax)
18; X32-NEXT:    retl
19;
20; X64-LABEL: fadd_2f64_mem:
21; X64:       # %bb.0:
22; X64-NEXT:    fldl 8(%rdi)
23; X64-NEXT:    fldl (%rdi)
24; X64-NEXT:    faddl (%rsi)
25; X64-NEXT:    fxch %st(1)
26; X64-NEXT:    faddl 8(%rsi)
27; X64-NEXT:    fstpl 8(%rdx)
28; X64-NEXT:    fstpl (%rdx)
29; X64-NEXT:    retq
30  %1 = load <2 x double>, <2 x double>* %p0
31  %2 = load <2 x double>, <2 x double>* %p1
32  %3 = fadd <2 x double> %1, %2
33  store <2 x double> %3, <2 x double>* %p2
34  ret void
35}
36
37define void @fadd_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind {
38; X32-LABEL: fadd_4f32_mem:
39; X32:       # %bb.0:
40; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
41; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
42; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
43; X32-NEXT:    flds 12(%edx)
44; X32-NEXT:    flds 8(%edx)
45; X32-NEXT:    flds 4(%edx)
46; X32-NEXT:    flds (%edx)
47; X32-NEXT:    fadds (%ecx)
48; X32-NEXT:    fxch %st(1)
49; X32-NEXT:    fadds 4(%ecx)
50; X32-NEXT:    fxch %st(2)
51; X32-NEXT:    fadds 8(%ecx)
52; X32-NEXT:    fxch %st(3)
53; X32-NEXT:    fadds 12(%ecx)
54; X32-NEXT:    fstps 12(%eax)
55; X32-NEXT:    fxch %st(2)
56; X32-NEXT:    fstps 8(%eax)
57; X32-NEXT:    fstps 4(%eax)
58; X32-NEXT:    fstps (%eax)
59; X32-NEXT:    retl
60;
61; X64-LABEL: fadd_4f32_mem:
62; X64:       # %bb.0:
63; X64-NEXT:    flds 12(%rdi)
64; X64-NEXT:    flds 8(%rdi)
65; X64-NEXT:    flds 4(%rdi)
66; X64-NEXT:    flds (%rdi)
67; X64-NEXT:    fadds (%rsi)
68; X64-NEXT:    fxch %st(1)
69; X64-NEXT:    fadds 4(%rsi)
70; X64-NEXT:    fxch %st(2)
71; X64-NEXT:    fadds 8(%rsi)
72; X64-NEXT:    fxch %st(3)
73; X64-NEXT:    fadds 12(%rsi)
74; X64-NEXT:    fstps 12(%rdx)
75; X64-NEXT:    fxch %st(2)
76; X64-NEXT:    fstps 8(%rdx)
77; X64-NEXT:    fstps 4(%rdx)
78; X64-NEXT:    fstps (%rdx)
79; X64-NEXT:    retq
80  %1 = load <4 x float>, <4 x float>* %p0
81  %2 = load <4 x float>, <4 x float>* %p1
82  %3 = fadd <4 x float> %1, %2
83  store <4 x float> %3, <4 x float>* %p2
84  ret void
85}
86
87define void @fdiv_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind {
88; X32-LABEL: fdiv_4f32_mem:
89; X32:       # %bb.0:
90; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
91; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
92; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
93; X32-NEXT:    flds 12(%edx)
94; X32-NEXT:    flds 8(%edx)
95; X32-NEXT:    flds 4(%edx)
96; X32-NEXT:    flds (%edx)
97; X32-NEXT:    fdivs (%ecx)
98; X32-NEXT:    fxch %st(1)
99; X32-NEXT:    fdivs 4(%ecx)
100; X32-NEXT:    fxch %st(2)
101; X32-NEXT:    fdivs 8(%ecx)
102; X32-NEXT:    fxch %st(3)
103; X32-NEXT:    fdivs 12(%ecx)
104; X32-NEXT:    fstps 12(%eax)
105; X32-NEXT:    fxch %st(2)
106; X32-NEXT:    fstps 8(%eax)
107; X32-NEXT:    fstps 4(%eax)
108; X32-NEXT:    fstps (%eax)
109; X32-NEXT:    retl
110;
111; X64-LABEL: fdiv_4f32_mem:
112; X64:       # %bb.0:
113; X64-NEXT:    flds 12(%rdi)
114; X64-NEXT:    flds 8(%rdi)
115; X64-NEXT:    flds 4(%rdi)
116; X64-NEXT:    flds (%rdi)
117; X64-NEXT:    fdivs (%rsi)
118; X64-NEXT:    fxch %st(1)
119; X64-NEXT:    fdivs 4(%rsi)
120; X64-NEXT:    fxch %st(2)
121; X64-NEXT:    fdivs 8(%rsi)
122; X64-NEXT:    fxch %st(3)
123; X64-NEXT:    fdivs 12(%rsi)
124; X64-NEXT:    fstps 12(%rdx)
125; X64-NEXT:    fxch %st(2)
126; X64-NEXT:    fstps 8(%rdx)
127; X64-NEXT:    fstps 4(%rdx)
128; X64-NEXT:    fstps (%rdx)
129; X64-NEXT:    retq
130  %1 = load <4 x float>, <4 x float>* %p0
131  %2 = load <4 x float>, <4 x float>* %p1
132  %3 = fdiv <4 x float> %1, %2
133  store <4 x float> %3, <4 x float>* %p2
134  ret void
135}
136
137define void @sitofp_4i64_4f32_mem(<4 x i64>* %p0, <4 x float>* %p1) nounwind {
138; X32-LABEL: sitofp_4i64_4f32_mem:
139; X32:       # %bb.0:
140; X32-NEXT:    pushl %ebp
141; X32-NEXT:    movl %esp, %ebp
142; X32-NEXT:    pushl %ebx
143; X32-NEXT:    pushl %edi
144; X32-NEXT:    pushl %esi
145; X32-NEXT:    andl $-8, %esp
146; X32-NEXT:    subl $48, %esp
147; X32-NEXT:    movl 8(%ebp), %eax
148; X32-NEXT:    movl 24(%eax), %ecx
149; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
150; X32-NEXT:    movl 28(%eax), %ecx
151; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
152; X32-NEXT:    movl 16(%eax), %esi
153; X32-NEXT:    movl 20(%eax), %edi
154; X32-NEXT:    movl 8(%eax), %ebx
155; X32-NEXT:    movl 12(%eax), %edx
156; X32-NEXT:    movl (%eax), %ecx
157; X32-NEXT:    movl 4(%eax), %eax
158; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
159; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
160; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
161; X32-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
162; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp)
163; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
164; X32-NEXT:    movl (%esp), %eax # 4-byte Reload
165; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
166; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
167; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
168; X32-NEXT:    movl 12(%ebp), %eax
169; X32-NEXT:    fildll {{[0-9]+}}(%esp)
170; X32-NEXT:    fildll {{[0-9]+}}(%esp)
171; X32-NEXT:    fildll {{[0-9]+}}(%esp)
172; X32-NEXT:    fildll {{[0-9]+}}(%esp)
173; X32-NEXT:    fstps 12(%eax)
174; X32-NEXT:    fstps 8(%eax)
175; X32-NEXT:    fstps 4(%eax)
176; X32-NEXT:    fstps (%eax)
177; X32-NEXT:    leal -12(%ebp), %esp
178; X32-NEXT:    popl %esi
179; X32-NEXT:    popl %edi
180; X32-NEXT:    popl %ebx
181; X32-NEXT:    popl %ebp
182; X32-NEXT:    retl
183;
184; X64-LABEL: sitofp_4i64_4f32_mem:
185; X64:       # %bb.0:
186; X64-NEXT:    movq 24(%rdi), %rax
187; X64-NEXT:    movq 16(%rdi), %rcx
188; X64-NEXT:    movq (%rdi), %rdx
189; X64-NEXT:    movq 8(%rdi), %rdi
190; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
191; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
192; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
193; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
194; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
195; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
196; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
197; X64-NEXT:    fildll -{{[0-9]+}}(%rsp)
198; X64-NEXT:    fstps 12(%rsi)
199; X64-NEXT:    fstps 8(%rsi)
200; X64-NEXT:    fstps 4(%rsi)
201; X64-NEXT:    fstps (%rsi)
202; X64-NEXT:    retq
203  %1 = load <4 x i64>, <4 x i64>* %p0
204  %2 = sitofp <4 x i64> %1 to <4 x float>
205  store <4 x float> %2, <4 x float>* %p1
206  ret void
207}
208
209define void @sitofp_4i32_4f32_mem(<4 x i32>* %p0, <4 x float>* %p1) nounwind {
210; X32-LABEL: sitofp_4i32_4f32_mem:
211; X32:       # %bb.0:
212; X32-NEXT:    pushl %edi
213; X32-NEXT:    pushl %esi
214; X32-NEXT:    subl $16, %esp
215; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
216; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
217; X32-NEXT:    movl 12(%ecx), %edx
218; X32-NEXT:    movl 8(%ecx), %esi
219; X32-NEXT:    movl (%ecx), %edi
220; X32-NEXT:    movl 4(%ecx), %ecx
221; X32-NEXT:    movl %edi, (%esp)
222; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
223; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
224; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
225; X32-NEXT:    fildl (%esp)
226; X32-NEXT:    fildl {{[0-9]+}}(%esp)
227; X32-NEXT:    fildl {{[0-9]+}}(%esp)
228; X32-NEXT:    fildl {{[0-9]+}}(%esp)
229; X32-NEXT:    fstps 12(%eax)
230; X32-NEXT:    fstps 8(%eax)
231; X32-NEXT:    fstps 4(%eax)
232; X32-NEXT:    fstps (%eax)
233; X32-NEXT:    addl $16, %esp
234; X32-NEXT:    popl %esi
235; X32-NEXT:    popl %edi
236; X32-NEXT:    retl
237;
238; X64-LABEL: sitofp_4i32_4f32_mem:
239; X64:       # %bb.0:
240; X64-NEXT:    movl 12(%rdi), %eax
241; X64-NEXT:    movl 8(%rdi), %ecx
242; X64-NEXT:    movl (%rdi), %edx
243; X64-NEXT:    movl 4(%rdi), %edi
244; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
245; X64-NEXT:    movl %edi, -{{[0-9]+}}(%rsp)
246; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
247; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
248; X64-NEXT:    fildl -{{[0-9]+}}(%rsp)
249; X64-NEXT:    fildl -{{[0-9]+}}(%rsp)
250; X64-NEXT:    fildl -{{[0-9]+}}(%rsp)
251; X64-NEXT:    fildl -{{[0-9]+}}(%rsp)
252; X64-NEXT:    fstps 12(%rsi)
253; X64-NEXT:    fstps 8(%rsi)
254; X64-NEXT:    fstps 4(%rsi)
255; X64-NEXT:    fstps (%rsi)
256; X64-NEXT:    retq
257  %1 = load <4 x i32>, <4 x i32>* %p0
258  %2 = sitofp <4 x i32> %1 to <4 x float>
259  store <4 x float> %2, <4 x float>* %p1
260  ret void
261}
262
263define void @add_2i64_mem(<2 x i64>* %p0, <2 x i64>* %p1, <2 x i64>* %p2) nounwind {
264; X32-LABEL: add_2i64_mem:
265; X32:       # %bb.0:
266; X32-NEXT:    pushl %ebx
267; X32-NEXT:    pushl %edi
268; X32-NEXT:    pushl %esi
269; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
270; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
271; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
272; X32-NEXT:    movl 12(%edx), %esi
273; X32-NEXT:    movl 8(%edx), %edi
274; X32-NEXT:    movl (%edx), %ebx
275; X32-NEXT:    movl 4(%edx), %edx
276; X32-NEXT:    addl (%ecx), %ebx
277; X32-NEXT:    adcl 4(%ecx), %edx
278; X32-NEXT:    addl 8(%ecx), %edi
279; X32-NEXT:    adcl 12(%ecx), %esi
280; X32-NEXT:    movl %edi, 8(%eax)
281; X32-NEXT:    movl %ebx, (%eax)
282; X32-NEXT:    movl %esi, 12(%eax)
283; X32-NEXT:    movl %edx, 4(%eax)
284; X32-NEXT:    popl %esi
285; X32-NEXT:    popl %edi
286; X32-NEXT:    popl %ebx
287; X32-NEXT:    retl
288;
289; X64-LABEL: add_2i64_mem:
290; X64:       # %bb.0:
291; X64-NEXT:    movq (%rdi), %rax
292; X64-NEXT:    movq 8(%rdi), %rcx
293; X64-NEXT:    addq (%rsi), %rax
294; X64-NEXT:    addq 8(%rsi), %rcx
295; X64-NEXT:    movq %rcx, 8(%rdx)
296; X64-NEXT:    movq %rax, (%rdx)
297; X64-NEXT:    retq
298  %1 = load <2 x i64>, <2 x i64>* %p0
299  %2 = load <2 x i64>, <2 x i64>* %p1
300  %3 = add <2 x i64> %1, %2
301  store <2 x i64> %3, <2 x i64>* %p2
302  ret void
303}
304
305define void @add_4i32_mem(<4 x i32>* %p0, <4 x i32>* %p1, <4 x i32>* %p2) nounwind {
306; X32-LABEL: add_4i32_mem:
307; X32:       # %bb.0:
308; X32-NEXT:    pushl %ebx
309; X32-NEXT:    pushl %edi
310; X32-NEXT:    pushl %esi
311; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
312; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
313; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
314; X32-NEXT:    movl 12(%edx), %esi
315; X32-NEXT:    movl 8(%edx), %edi
316; X32-NEXT:    movl (%edx), %ebx
317; X32-NEXT:    movl 4(%edx), %edx
318; X32-NEXT:    addl (%ecx), %ebx
319; X32-NEXT:    addl 4(%ecx), %edx
320; X32-NEXT:    addl 8(%ecx), %edi
321; X32-NEXT:    addl 12(%ecx), %esi
322; X32-NEXT:    movl %esi, 12(%eax)
323; X32-NEXT:    movl %edi, 8(%eax)
324; X32-NEXT:    movl %edx, 4(%eax)
325; X32-NEXT:    movl %ebx, (%eax)
326; X32-NEXT:    popl %esi
327; X32-NEXT:    popl %edi
328; X32-NEXT:    popl %ebx
329; X32-NEXT:    retl
330;
331; X64-LABEL: add_4i32_mem:
332; X64:       # %bb.0:
333; X64-NEXT:    movl 12(%rdi), %eax
334; X64-NEXT:    movl 8(%rdi), %ecx
335; X64-NEXT:    movl (%rdi), %r8d
336; X64-NEXT:    movl 4(%rdi), %edi
337; X64-NEXT:    addl (%rsi), %r8d
338; X64-NEXT:    addl 4(%rsi), %edi
339; X64-NEXT:    addl 8(%rsi), %ecx
340; X64-NEXT:    addl 12(%rsi), %eax
341; X64-NEXT:    movl %eax, 12(%rdx)
342; X64-NEXT:    movl %ecx, 8(%rdx)
343; X64-NEXT:    movl %edi, 4(%rdx)
344; X64-NEXT:    movl %r8d, (%rdx)
345; X64-NEXT:    retq
346  %1 = load <4 x i32>, <4 x i32>* %p0
347  %2 = load <4 x i32>, <4 x i32>* %p1
348  %3 = add <4 x i32> %1, %2
349  store <4 x i32> %3, <4 x i32>* %p2
350  ret void
351}
352