1; RUN: opt < %s -memcpyopt -S | FileCheck %s
2
3; All the stores in this example should be merged into a single memset.
4
5target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
6target triple = "i386-apple-darwin8"
7
8define void @test1(i8 signext  %c) nounwind  {
9entry:
10	%x = alloca [19 x i8]		; <[19 x i8]*> [#uses=20]
11	%tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0		; <i8*> [#uses=1]
12	store i8 %c, i8* %tmp, align 1
13	%tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1		; <i8*> [#uses=1]
14	store i8 %c, i8* %tmp5, align 1
15	%tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2		; <i8*> [#uses=1]
16	store i8 %c, i8* %tmp9, align 1
17	%tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3		; <i8*> [#uses=1]
18	store i8 %c, i8* %tmp13, align 1
19	%tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4		; <i8*> [#uses=1]
20	store i8 %c, i8* %tmp17, align 1
21	%tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5		; <i8*> [#uses=1]
22	store i8 %c, i8* %tmp21, align 1
23	%tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6		; <i8*> [#uses=1]
24	store i8 %c, i8* %tmp25, align 1
25	%tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7		; <i8*> [#uses=1]
26	store i8 %c, i8* %tmp29, align 1
27	%tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8		; <i8*> [#uses=1]
28	store i8 %c, i8* %tmp33, align 1
29	%tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9		; <i8*> [#uses=1]
30	store i8 %c, i8* %tmp37, align 1
31	%tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10		; <i8*> [#uses=1]
32	store i8 %c, i8* %tmp41, align 1
33	%tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11		; <i8*> [#uses=1]
34	store i8 %c, i8* %tmp45, align 1
35	%tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12		; <i8*> [#uses=1]
36	store i8 %c, i8* %tmp49, align 1
37	%tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13		; <i8*> [#uses=1]
38	store i8 %c, i8* %tmp53, align 1
39	%tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14		; <i8*> [#uses=1]
40	store i8 %c, i8* %tmp57, align 1
41	%tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15		; <i8*> [#uses=1]
42	store i8 %c, i8* %tmp61, align 1
43	%tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16		; <i8*> [#uses=1]
44	store i8 %c, i8* %tmp65, align 1
45	%tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17		; <i8*> [#uses=1]
46	store i8 %c, i8* %tmp69, align 1
47	%tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18		; <i8*> [#uses=1]
48	store i8 %c, i8* %tmp73, align 1
49	%tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind
50	ret void
51; CHECK-LABEL: @test1(
52; CHECK-NOT: store
53; CHECK: call void @llvm.memset.p0i8.i64
54; CHECK-NOT: store
55; CHECK: ret
56}
57
58declare i32 @bar(...)
59
60%struct.MV = type { i16, i16 }
61
62
63define void @test2() nounwind  {
64entry:
65	%ref_idx = alloca [8 x i8]		; <[8 x i8]*> [#uses=8]
66	%left_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
67	%up_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
68	%tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7		; <i8*> [#uses=1]
69	store i8 -1, i8* %tmp20, align 1
70	%tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6		; <i8*> [#uses=1]
71	store i8 -1, i8* %tmp23, align 1
72	%tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5		; <i8*> [#uses=1]
73	store i8 -1, i8* %tmp26, align 1
74	%tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4		; <i8*> [#uses=1]
75	store i8 -1, i8* %tmp29, align 1
76	%tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3		; <i8*> [#uses=1]
77	store i8 -1, i8* %tmp32, align 1
78	%tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2		; <i8*> [#uses=1]
79	store i8 -1, i8* %tmp35, align 1
80	%tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1		; <i8*> [#uses=1]
81	store i8 -1, i8* %tmp38, align 1
82	%tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0		; <i8*> [#uses=2]
83	store i8 -1, i8* %tmp41, align 1
84	%tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
85	store i16 0, i16* %tmp43, align 2
86	%tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
87	store i16 0, i16* %tmp46, align 2
88	%tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
89	store i16 0, i16* %tmp57, align 2
90	%tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
91	store i16 0, i16* %tmp60, align 2
92	%tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
93	store i16 0, i16* %tmp71, align 2
94	%tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
95	store i16 0, i16* %tmp74, align 2
96	%tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
97	store i16 0, i16* %tmp85, align 2
98	%tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
99	store i16 0, i16* %tmp88, align 2
100	%tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
101	store i16 0, i16* %tmp99, align 2
102	%tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
103	store i16 0, i16* %tmp102, align 2
104	%tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
105	store i16 0, i16* %tmp113, align 2
106	%tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
107	store i16 0, i16* %tmp116, align 2
108	%tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
109	store i16 0, i16* %tmp127, align 2
110	%tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
111	store i16 0, i16* %tmp130, align 2
112	%tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
113	store i16 0, i16* %tmp141, align 8
114	%tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
115	store i16 0, i16* %tmp144, align 2
116	%tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
117	store i16 0, i16* %tmp148, align 2
118	%tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
119	store i16 0, i16* %tmp151, align 2
120	%tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
121	store i16 0, i16* %tmp162, align 2
122	%tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
123	store i16 0, i16* %tmp165, align 2
124	%tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
125	store i16 0, i16* %tmp176, align 2
126	%tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
127	store i16 0, i16* %tmp179, align 2
128	%tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
129	store i16 0, i16* %tmp190, align 2
130	%tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
131	store i16 0, i16* %tmp193, align 2
132	%tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
133	store i16 0, i16* %tmp204, align 2
134	%tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
135	store i16 0, i16* %tmp207, align 2
136	%tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
137	store i16 0, i16* %tmp218, align 2
138	%tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
139	store i16 0, i16* %tmp221, align 2
140	%tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
141	store i16 0, i16* %tmp232, align 2
142	%tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
143	store i16 0, i16* %tmp235, align 2
144	%tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
145	store i16 0, i16* %tmp246, align 8
146	%tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
147	store i16 0, i16* %tmp249, align 2
148	%up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
149	%left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
150	call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind
151	ret void
152
153; CHECK-LABEL: @test2(
154; CHECK-NOT: store
155; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false)
156; CHECK-NOT: store
157; CHECK: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 32, i32 8, i1 false)
158; CHECK-NOT: store
159; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 32, i32 8, i1 false)
160; CHECK-NOT: store
161; CHECK: ret
162}
163
164declare void @foo(%struct.MV*, %struct.MV*, i8*)
165
166
167; Store followed by memset.
168define void @test3(i32* nocapture %P) nounwind ssp {
169entry:
170  %arrayidx = getelementptr inbounds i32, i32* %P, i64 1
171  store i32 0, i32* %arrayidx, align 4
172  %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
173  %0 = bitcast i32* %add.ptr to i8*
174  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
175  ret void
176; CHECK-LABEL: @test3(
177; CHECK-NOT: store
178; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
179}
180
181; store followed by memset, different offset scenario
182define void @test4(i32* nocapture %P) nounwind ssp {
183entry:
184  store i32 0, i32* %P, align 4
185  %add.ptr = getelementptr inbounds i32, i32* %P, i64 1
186  %0 = bitcast i32* %add.ptr to i8*
187  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
188  ret void
189; CHECK-LABEL: @test4(
190; CHECK-NOT: store
191; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
192}
193
194declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
195
196; Memset followed by store.
197define void @test5(i32* nocapture %P) nounwind ssp {
198entry:
199  %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
200  %0 = bitcast i32* %add.ptr to i8*
201  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
202  %arrayidx = getelementptr inbounds i32, i32* %P, i64 1
203  store i32 0, i32* %arrayidx, align 4
204  ret void
205; CHECK-LABEL: @test5(
206; CHECK-NOT: store
207; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
208}
209
210;; Memset followed by memset.
211define void @test6(i32* nocapture %P) nounwind ssp {
212entry:
213  %0 = bitcast i32* %P to i8*
214  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false)
215  %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
216  %1 = bitcast i32* %add.ptr to i8*
217  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
218  ret void
219; CHECK-LABEL: @test6(
220; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
221}
222
223; More aggressive heuristic
224; rdar://9892684
225define void @test7(i32* nocapture %c) nounwind optsize {
226  store i32 -1, i32* %c, align 4
227  %1 = getelementptr inbounds i32, i32* %c, i32 1
228  store i32 -1, i32* %1, align 4
229  %2 = getelementptr inbounds i32, i32* %c, i32 2
230  store i32 -1, i32* %2, align 4
231  %3 = getelementptr inbounds i32, i32* %c, i32 3
232  store i32 -1, i32* %3, align 4
233  %4 = getelementptr inbounds i32, i32* %c, i32 4
234  store i32 -1, i32* %4, align 4
235; CHECK-LABEL: @test7(
236; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false)
237  ret void
238}
239
240%struct.test8 = type { [4 x i32] }
241
242define void @test8() {
243entry:
244  %memtmp = alloca %struct.test8, align 16
245  %0 = bitcast %struct.test8* %memtmp to <4 x i32>*
246  store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
247  ret void
248; CHECK-LABEL: @test8(
249; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
250}
251
252@test9buf = internal unnamed_addr global [16 x i64] zeroinitializer, align 16
253
254define void @test9() nounwind {
255  store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16
256  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1
257  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2
258  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 3), align 1
259  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 4), align 4
260  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 5), align 1
261  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 6), align 2
262  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 7), align 1
263  store i8 -1, i8* bitcast (i64* getelementptr inbounds ([16 x i64], [16 x i64]* @test9buf, i64 0, i64 1) to i8*), align 8
264  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 9), align 1
265  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 10), align 2
266  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 11), align 1
267  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 12), align 4
268  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 13), align 1
269  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2
270  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1
271  ret void
272; CHECK-LABEL: @test9(
273; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false)
274}
275
276; PR19092
277define void @test10(i8* nocapture %P) nounwind {
278  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
279  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false)
280  ret void
281; CHECK-LABEL: @test10(
282; CHECK-NOT: memset
283; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
284; CHECK-NOT: memset
285; CHECK: ret void
286}
287
288; Memset followed by odd store.
289define void @test11(i32* nocapture %P) nounwind ssp {
290entry:
291  %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
292  %0 = bitcast i32* %add.ptr to i8*
293  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 1, i64 11, i32 1, i1 false)
294  %arrayidx = getelementptr inbounds i32, i32* %P, i64 0
295  %arrayidx.cast = bitcast i32* %arrayidx to i96*
296  store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4
297  ret void
298; CHECK-LABEL: @test11(
299; CHECK-NOT: store
300; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 1, i64 23, i32 4, i1 false)
301}
302