1; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Darwin
2; RUN: llc < %s -mattr=+sse2      -mtriple=i686-pc-mingw32 -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Mingw32
3; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
4; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
5; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
6; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s -check-prefix=NHM_64
7
8
9@.str = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
10@.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4
11
12define void @t1(i32 %argc, i8** %argv) nounwind  {
13entry:
14; SSE2-Darwin-LABEL: t1:
15; SSE2-Darwin: movsd _.str+16, %xmm0
16; SSE2-Darwin: movsd %xmm0, 16(%esp)
17; SSE2-Darwin: movaps _.str, %xmm0
18; SSE2-Darwin: movaps %xmm0
19; SSE2-Darwin: movb $0, 24(%esp)
20
21; SSE2-Mingw32-LABEL: t1:
22; SSE2-Mingw32: movsd _.str+16, %xmm0
23; SSE2-Mingw32: movsd %xmm0, 16(%esp)
24; SSE2-Mingw32: movaps _.str, %xmm0
25; SSE2-Mingw32: movups %xmm0
26; SSE2-Mingw32: movb $0, 24(%esp)
27
28; SSE1-LABEL: t1:
29; SSE1: movaps _.str, %xmm0
30; SSE1: movaps %xmm0
31; SSE1: movb $0, 24(%esp)
32; SSE1: movl $0, 20(%esp)
33; SSE1: movl $0, 16(%esp)
34
35; NOSSE-LABEL: t1:
36; NOSSE: movb $0
37; NOSSE: movl $0
38; NOSSE: movl $0
39; NOSSE: movl $0
40; NOSSE: movl $0
41; NOSSE: movl $101
42; NOSSE: movl $1734438249
43
44; X86-64-LABEL: t1:
45; X86-64: movaps _.str(%rip), %xmm0
46; X86-64: movaps %xmm0
47; X86-64: movb $0
48; X86-64: movq $0
49  %tmp1 = alloca [25 x i8]
50  %tmp2 = bitcast [25 x i8]* %tmp1 to i8*
51  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1, i1 false)
52  unreachable
53}
54
55;rdar://7774704
56%struct.s0 = type { [2 x double] }
57
58define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
59entry:
60; SSE2-Darwin-LABEL: t2:
61; SSE2-Darwin: movaps (%ecx), %xmm0
62; SSE2-Darwin: movaps %xmm0, (%eax)
63
64; SSE2-Mingw32-LABEL: t2:
65; SSE2-Mingw32: movaps (%ecx), %xmm0
66; SSE2-Mingw32: movaps %xmm0, (%eax)
67
68; SSE1-LABEL: t2:
69; SSE1: movaps (%ecx), %xmm0
70; SSE1: movaps %xmm0, (%eax)
71
72; NOSSE-LABEL: t2:
73; NOSSE: movl
74; NOSSE: movl
75; NOSSE: movl
76; NOSSE: movl
77; NOSSE: movl
78; NOSSE: movl
79; NOSSE: movl
80; NOSSE: movl
81; NOSSE: movl
82; NOSSE: movl
83
84; X86-64-LABEL: t2:
85; X86-64: movaps (%rsi), %xmm0
86; X86-64: movaps %xmm0, (%rdi)
87  %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
88  %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
89  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
90  ret void
91}
92
93define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
94entry:
95; SSE2-Darwin-LABEL: t3:
96; SSE2-Darwin: movsd (%ecx), %xmm0
97; SSE2-Darwin: movsd 8(%ecx), %xmm1
98; SSE2-Darwin: movsd %xmm1, 8(%eax)
99; SSE2-Darwin: movsd %xmm0, (%eax)
100
101; SSE2-Mingw32-LABEL: t3:
102; SSE2-Mingw32: movsd (%ecx), %xmm0
103; SSE2-Mingw32: movsd 8(%ecx), %xmm1
104; SSE2-Mingw32: movsd %xmm1, 8(%eax)
105; SSE2-Mingw32: movsd %xmm0, (%eax)
106
107; SSE1-LABEL: t3:
108; SSE1: movl
109; SSE1: movl
110; SSE1: movl
111; SSE1: movl
112; SSE1: movl
113; SSE1: movl
114; SSE1: movl
115; SSE1: movl
116; SSE1: movl
117; SSE1: movl
118
119; NOSSE-LABEL: t3:
120; NOSSE: movl
121; NOSSE: movl
122; NOSSE: movl
123; NOSSE: movl
124; NOSSE: movl
125; NOSSE: movl
126; NOSSE: movl
127; NOSSE: movl
128; NOSSE: movl
129; NOSSE: movl
130
131; X86-64-LABEL: t3:
132; X86-64: movq (%rsi), %rax
133; X86-64: movq 8(%rsi), %rcx
134; X86-64: movq %rcx, 8(%rdi)
135; X86-64: movq %rax, (%rdi)
136  %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
137  %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
138  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8, i1 false)
139  ret void
140}
141
142define void @t4() nounwind {
143entry:
144; SSE2-Darwin-LABEL: t4:
145; SSE2-Darwin: movw $120
146; SSE2-Darwin: movl $2021161080
147; SSE2-Darwin: movl $2021161080
148; SSE2-Darwin: movl $2021161080
149; SSE2-Darwin: movl $2021161080
150; SSE2-Darwin: movl $2021161080
151; SSE2-Darwin: movl $2021161080
152; SSE2-Darwin: movl $2021161080
153
154; SSE2-Mingw32-LABEL: t4:
155; SSE2-Mingw32: movw $120
156; SSE2-Mingw32: movl $2021161080
157; SSE2-Mingw32: movl $2021161080
158; SSE2-Mingw32: movl $2021161080
159; SSE2-Mingw32: movl $2021161080
160; SSE2-Mingw32: movl $2021161080
161; SSE2-Mingw32: movl $2021161080
162; SSE2-Mingw32: movl $2021161080
163
164; SSE1-LABEL: t4:
165; SSE1: movw $120
166; SSE1: movl $2021161080
167; SSE1: movl $2021161080
168; SSE1: movl $2021161080
169; SSE1: movl $2021161080
170; SSE1: movl $2021161080
171; SSE1: movl $2021161080
172; SSE1: movl $2021161080
173
174; NOSSE-LABEL: t4:
175; NOSSE: movw $120
176; NOSSE: movl $2021161080
177; NOSSE: movl $2021161080
178; NOSSE: movl $2021161080
179; NOSSE: movl $2021161080
180; NOSSE: movl $2021161080
181; NOSSE: movl $2021161080
182; NOSSE: movl $2021161080
183
184;;; TODO: (1) Some of the loads and stores are certainly unaligned and (2) the first load and first
185;;; store overlap with the second load and second store respectively.
186;;;
187;;; Is either of the sequences ideal?
188
189; X86-64-LABEL: t4:
190; X86-64: movabsq  $33909456017848440, %rax ## imm = 0x78787878787878
191; X86-64: movq     %rax, -10(%rsp)
192; X86-64: movabsq  $8680820740569200760, %rax ## imm = 0x7878787878787878
193; X86-64: movq     %rax, -16(%rsp)
194; X86-64: movq     %rax, -24(%rsp)
195; X86-64: movq     %rax, -32(%rsp)
196
197; NHM_64-LABEL: t4:
198; NHM_64: movups   _.str2+14(%rip), %xmm0
199; NHM_64: movups   %xmm0, -26(%rsp)
200; NHM_64: movups   _.str2(%rip), %xmm0
201; NHM_64: movaps   %xmm0, -40(%rsp)
202
203  %tmp1 = alloca [30 x i8]
204  %tmp2 = bitcast [30 x i8]* %tmp1 to i8*
205  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
206  unreachable
207}
208
209declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
210