1; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3target triple = "x86_64-apple-darwin10.0.0"
4
5define void @test1(i8* %Base, i64 %Size) nounwind ssp {
6bb.nph:                                           ; preds = %entry
7  br label %for.body
8
9for.body:                                         ; preds = %bb.nph, %for.body
10  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
11  %I.0.014 = getelementptr i8* %Base, i64 %indvar
12  store i8 0, i8* %I.0.014, align 1
13  %indvar.next = add i64 %indvar, 1
14  %exitcond = icmp eq i64 %indvar.next, %Size
15  br i1 %exitcond, label %for.end, label %for.body
16
17for.end:                                          ; preds = %for.body, %entry
18  ret void
19; CHECK: @test1
20; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
21; CHECK-NOT: store
22}
23
24; This is a loop that was rotated but where the blocks weren't merged.  This
25; shouldn't perturb us.
26define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
27bb.nph:                                           ; preds = %entry
28  br label %for.body
29
30for.body:                                         ; preds = %bb.nph, %for.body
31  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
32  %I.0.014 = getelementptr i8* %Base, i64 %indvar
33  store i8 0, i8* %I.0.014, align 1
34  %indvar.next = add i64 %indvar, 1
35  br label %for.body.cont
36for.body.cont:
37  %exitcond = icmp eq i64 %indvar.next, %Size
38  br i1 %exitcond, label %for.end, label %for.body
39
40for.end:                                          ; preds = %for.body, %entry
41  ret void
42; CHECK: @test1a
43; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
44; CHECK-NOT: store
45}
46
47
48define void @test2(i32* %Base, i64 %Size) nounwind ssp {
49entry:
50  %cmp10 = icmp eq i64 %Size, 0
51  br i1 %cmp10, label %for.end, label %for.body
52
53for.body:                                         ; preds = %entry, %for.body
54  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
55  %add.ptr.i = getelementptr i32* %Base, i64 %i.011
56  store i32 16843009, i32* %add.ptr.i, align 4
57  %inc = add nsw i64 %i.011, 1
58  %exitcond = icmp eq i64 %inc, %Size
59  br i1 %exitcond, label %for.end, label %for.body
60
61for.end:                                          ; preds = %for.body, %entry
62  ret void
63; CHECK: @test2
64; CHECK: br i1 %cmp10,
65; CHECK: %0 = mul i64 %Size, 4
66; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
67; CHECK-NOT: store
68}
69
70; This is a case where there is an extra may-aliased store in the loop, we can't
71; promote the memset.
72define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
73entry:
74  br label %for.body
75
76for.body:                                         ; preds = %entry, %for.body
77  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
78  %add.ptr.i = getelementptr i32* %Base, i64 %i.011
79  store i32 16843009, i32* %add.ptr.i, align 4
80
81  store i8 42, i8* %MayAlias
82  %inc = add nsw i64 %i.011, 1
83  %exitcond = icmp eq i64 %inc, %Size
84  br i1 %exitcond, label %for.end, label %for.body
85
86for.end:                                          ; preds = %entry
87  ret void
88; CHECK: @test3
89; CHECK-NOT: memset
90; CHECK: ret void
91}
92
93
94;; TODO: We should be able to promote this memset.  Not yet though.
95define void @test4(i8* %Base) nounwind ssp {
96bb.nph:                                           ; preds = %entry
97  %Base100 = getelementptr i8* %Base, i64 1000
98  br label %for.body
99
100for.body:                                         ; preds = %bb.nph, %for.body
101  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
102  %I.0.014 = getelementptr i8* %Base, i64 %indvar
103  store i8 0, i8* %I.0.014, align 1
104
105  ;; Store beyond the range memset, should be safe to promote.
106  store i8 42, i8* %Base100
107
108  %indvar.next = add i64 %indvar, 1
109  %exitcond = icmp eq i64 %indvar.next, 100
110  br i1 %exitcond, label %for.end, label %for.body
111
112for.end:                                          ; preds = %for.body, %entry
113  ret void
114; CHECK-TODO: @test4
115; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
116; CHECK-TODO-NOT: store
117}
118
119; This can't be promoted: the memset is a store of a loop variant value.
120define void @test5(i8* %Base, i64 %Size) nounwind ssp {
121bb.nph:                                           ; preds = %entry
122  br label %for.body
123
124for.body:                                         ; preds = %bb.nph, %for.body
125  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
126  %I.0.014 = getelementptr i8* %Base, i64 %indvar
127
128  %V = trunc i64 %indvar to i8
129  store i8 %V, i8* %I.0.014, align 1
130  %indvar.next = add i64 %indvar, 1
131  %exitcond = icmp eq i64 %indvar.next, %Size
132  br i1 %exitcond, label %for.end, label %for.body
133
134for.end:                                          ; preds = %for.body, %entry
135  ret void
136; CHECK: @test5
137; CHECK-NOT: memset
138; CHECK: ret void
139}
140
141
142;; memcpy formation
143define void @test6(i64 %Size) nounwind ssp {
144bb.nph:
145  %Base = alloca i8, i32 10000
146  %Dest = alloca i8, i32 10000
147  br label %for.body
148
149for.body:                                         ; preds = %bb.nph, %for.body
150  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
151  %I.0.014 = getelementptr i8* %Base, i64 %indvar
152  %DestI = getelementptr i8* %Dest, i64 %indvar
153  %V = load i8* %I.0.014, align 1
154  store i8 %V, i8* %DestI, align 1
155  %indvar.next = add i64 %indvar, 1
156  %exitcond = icmp eq i64 %indvar.next, %Size
157  br i1 %exitcond, label %for.end, label %for.body
158
159for.end:                                          ; preds = %for.body, %entry
160  ret void
161; CHECK: @test6
162; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
163; CHECK-NOT: store
164; CHECK: ret void
165}
166
167
168; This is a loop that was rotated but where the blocks weren't merged.  This
169; shouldn't perturb us.
170define void @test7(i8* %Base, i64 %Size) nounwind ssp {
171bb.nph:                                           ; preds = %entry
172  br label %for.body
173
174for.body:                                         ; preds = %bb.nph, %for.body
175  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
176  br label %for.body.cont
177for.body.cont:
178  %I.0.014 = getelementptr i8* %Base, i64 %indvar
179  store i8 0, i8* %I.0.014, align 1
180  %indvar.next = add i64 %indvar, 1
181  %exitcond = icmp eq i64 %indvar.next, %Size
182  br i1 %exitcond, label %for.end, label %for.body
183
184for.end:                                          ; preds = %for.body, %entry
185  ret void
186; CHECK: @test7
187; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
188; CHECK-NOT: store
189}
190
191; This is a loop should not be transformed, it only executes one iteration.
192define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
193bb.nph:                                           ; preds = %entry
194  br label %for.body
195
196for.body:                                         ; preds = %bb.nph, %for.body
197  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
198  %PI = getelementptr i64* %Ptr, i64 %indvar
199  store i64 0, i64 *%PI
200  %indvar.next = add i64 %indvar, 1
201  %exitcond = icmp eq i64 %indvar.next, 1
202  br i1 %exitcond, label %for.end, label %for.body
203
204for.end:                                          ; preds = %for.body, %entry
205  ret void
206; CHECK: @test8
207; CHECK: store i64 0, i64* %PI
208}
209
210declare i8* @external(i8*)
211
212;; This cannot be transformed into a memcpy, because the read-from location is
213;; mutated by the loop.
214define void @test9(i64 %Size) nounwind ssp {
215bb.nph:
216  %Base = alloca i8, i32 10000
217  %Dest = alloca i8, i32 10000
218
219  %BaseAlias = call i8* @external(i8* %Base)
220  br label %for.body
221
222for.body:                                         ; preds = %bb.nph, %for.body
223  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
224  %I.0.014 = getelementptr i8* %Base, i64 %indvar
225  %DestI = getelementptr i8* %Dest, i64 %indvar
226  %V = load i8* %I.0.014, align 1
227  store i8 %V, i8* %DestI, align 1
228
229  ;; This store can clobber the input.
230  store i8 4, i8* %BaseAlias
231
232  %indvar.next = add i64 %indvar, 1
233  %exitcond = icmp eq i64 %indvar.next, %Size
234  br i1 %exitcond, label %for.end, label %for.body
235
236for.end:                                          ; preds = %for.body, %entry
237  ret void
238; CHECK: @test9
239; CHECK-NOT: llvm.memcpy
240; CHECK: ret void
241}
242
243; Two dimensional nested loop should be promoted to one big memset.
244define void @test10(i8* %X) nounwind ssp {
245entry:
246  br label %bb.nph
247
248bb.nph:                                           ; preds = %entry, %for.inc10
249  %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
250  br label %for.body5
251
252for.body5:                                        ; preds = %for.body5, %bb.nph
253  %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
254  %mul = mul nsw i32 %i.04, 100
255  %add = add nsw i32 %j.02, %mul
256  %idxprom = sext i32 %add to i64
257  %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom
258  store i8 0, i8* %arrayidx, align 1
259  %inc = add nsw i32 %j.02, 1
260  %cmp4 = icmp eq i32 %inc, 100
261  br i1 %cmp4, label %for.inc10, label %for.body5
262
263for.inc10:                                        ; preds = %for.body5
264  %inc12 = add nsw i32 %i.04, 1
265  %cmp = icmp eq i32 %inc12, 100
266  br i1 %cmp, label %for.end13, label %bb.nph
267
268for.end13:                                        ; preds = %for.inc10
269  ret void
270; CHECK: @test10
271; CHECK: entry:
272; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
273; CHECK-NOT: store
274; CHECK: ret void
275}
276
277; On darwin10 (which is the triple in this .ll file) this loop can be turned
278; into a memset_pattern call.
279; rdar://9009151
280define void @test11_pattern(i32* nocapture %P) nounwind ssp {
281entry:
282  br label %for.body
283
284for.body:                                         ; preds = %entry, %for.body
285  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
286  %arrayidx = getelementptr i32* %P, i64 %indvar
287  store i32 1, i32* %arrayidx, align 4
288  %indvar.next = add i64 %indvar, 1
289  %exitcond = icmp eq i64 %indvar.next, 10000
290  br i1 %exitcond, label %for.end, label %for.body
291
292for.end:                                          ; preds = %for.body
293  ret void
294; CHECK: @test11_pattern
295; CHECK-NEXT: entry:
296; CHECK-NEXT: bitcast
297; CHECK-NEXT: memset_pattern
298; CHECK-NOT: store
299; CHECK: ret void
300}
301
302; Store of null should turn into memset of zero.
303define void @test12(i32** nocapture %P) nounwind ssp {
304entry:
305  br label %for.body
306
307for.body:                                         ; preds = %entry, %for.body
308  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
309  %arrayidx = getelementptr i32** %P, i64 %indvar
310  store i32* null, i32** %arrayidx, align 4
311  %indvar.next = add i64 %indvar, 1
312  %exitcond = icmp eq i64 %indvar.next, 10000
313  br i1 %exitcond, label %for.end, label %for.body
314
315for.end:                                          ; preds = %for.body
316  ret void
317; CHECK: @test12
318; CHECK-NEXT: entry:
319; CHECK-NEXT: bitcast
320; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
321; CHECK-NOT: store
322; CHECK: ret void
323}
324
325@G = global i32 5
326
327; This store-of-address loop can be turned into a memset_pattern call.
328; rdar://9009151
329define void @test13_pattern(i32** nocapture %P) nounwind ssp {
330entry:
331  br label %for.body
332
333for.body:                                         ; preds = %entry, %for.body
334  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
335  %arrayidx = getelementptr i32** %P, i64 %indvar
336  store i32* @G, i32** %arrayidx, align 4
337  %indvar.next = add i64 %indvar, 1
338  %exitcond = icmp eq i64 %indvar.next, 10000
339  br i1 %exitcond, label %for.end, label %for.body
340
341for.end:                                          ; preds = %for.body
342  ret void
343; CHECK: @test13_pattern
344; CHECK-NEXT: entry:
345; CHECK-NEXT: bitcast
346; CHECK-NEXT: memset_pattern
347; CHECK-NOT: store
348; CHECK: ret void
349}
350
351
352
353; PR9815 - This is a partial overlap case that cannot be safely transformed
354; into a memcpy.
355@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
356
357define i32 @test14() nounwind {
358entry:
359  br label %for.body
360
361for.body:                                         ; preds = %for.inc, %for.body.lr.ph
362  %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
363  %add = add nsw i32 %tmp5, 4
364  %idxprom = sext i32 %add to i64
365  %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom
366  %tmp2 = load i32* %arrayidx, align 4
367  %add4 = add nsw i32 %tmp5, 5
368  %idxprom5 = sext i32 %add4 to i64
369  %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5
370  store i32 %tmp2, i32* %arrayidx6, align 4
371  %inc = add nsw i32 %tmp5, 1
372  %cmp = icmp slt i32 %inc, 2
373  br i1 %cmp, label %for.body, label %for.end
374
375for.end:                                          ; preds = %for.inc
376  %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4
377  ret i32 %tmp8
378; CHECK: @test14
379; CHECK: for.body:
380; CHECK: load i32
381; CHECK: store i32
382; CHECK: br i1 %cmp
383
384}
385
386
387