1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -dse -S | FileCheck %s
3
4target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
5declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
6
7define void @test13(i32* noalias %P) {
8; CHECK-LABEL: @test13(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    br label [[FOR:%.*]]
11; CHECK:       for:
12; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
13; CHECK-NEXT:    br i1 false, label [[FOR]], label [[END:%.*]]
14; CHECK:       end:
15; CHECK-NEXT:    ret void
16;
17entry:
18  br label %for
19for:
20  store i32 0, i32* %P
21  br i1 false, label %for, label %end
22end:
23  ret void
24}
25
26
27define void @test14(i32* noalias %P) {
28; CHECK-LABEL: @test14(
29; CHECK-NEXT:  entry:
30; CHECK-NEXT:    br label [[FOR:%.*]]
31; CHECK:       for:
32; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
33; CHECK-NEXT:    br i1 false, label [[FOR]], label [[END:%.*]]
34; CHECK:       end:
35; CHECK-NEXT:    ret void
36;
37entry:
38  store i32 1, i32* %P
39  br label %for
40for:
41  store i32 0, i32* %P
42  br i1 false, label %for, label %end
43end:
44  ret void
45}
46
47define void @test18(i32* noalias %P) {
48; CHECK-LABEL: @test18(
49; CHECK-NEXT:  entry:
50; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
51; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
52; CHECK-NEXT:    br label [[FOR:%.*]]
53; CHECK:       for:
54; CHECK-NEXT:    store i8 1, i8* [[P2]], align 1
55; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]], align 4
56; CHECK-NEXT:    store i8 2, i8* [[P2]], align 1
57; CHECK-NEXT:    br i1 false, label [[FOR]], label [[END:%.*]]
58; CHECK:       end:
59; CHECK-NEXT:    ret void
60;
61entry:
62  %P2 = bitcast i32* %P to i8*
63  store i32 0, i32* %P
64  br label %for
65for:
66  store i8 1, i8* %P2
67  %x = load i32, i32* %P
68  store i8 2, i8* %P2
69  br i1 false, label %for, label %end
70end:
71  ret void
72}
73
74define void @test21(i32* noalias %P) {
75; CHECK-LABEL: @test21(
76; CHECK-NEXT:  entry:
77; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
78; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
79; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
80; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
81; CHECK-NEXT:    br label [[FOR:%.*]]
82; CHECK:       for:
83; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
84; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
85; CHECK-NEXT:    br i1 false, label [[FOR]], label [[END:%.*]]
86; CHECK:       end:
87; CHECK-NEXT:    ret void
88;
89entry:
90  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
91  %p3 = bitcast i32* %arrayidx0 to i8*
92  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
93  br label %for
94for:
95  %arrayidx1 = getelementptr inbounds i32, i32* %P, i64 1
96  store i32 1, i32* %arrayidx1, align 4
97  br i1 false, label %for, label %end
98end:
99  ret void
100}
101
102define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %x, i32* noalias nocapture %b) local_unnamed_addr {
103; CHECK-LABEL: @test_loop(
104; CHECK-NEXT:  entry:
105; CHECK-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
106; CHECK-NEXT:    br i1 [[CMP27]], label [[FOR_BODY4_LR_PH_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
107; CHECK:       for.body4.lr.ph.preheader:
108; CHECK-NEXT:    br label [[FOR_BODY4_LR_PH:%.*]]
109; CHECK:       for.cond.cleanup:
110; CHECK-NEXT:    ret void
111; CHECK:       for.body4.lr.ph:
112; CHECK-NEXT:    [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ]
113; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]]
114; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX]], align 4
115; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]]
116; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
117; CHECK:       for.body4:
118; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[ADD9:%.*]], [[FOR_BODY4]] ]
119; CHECK-NEXT:    [[J_026:%.*]] = phi i32 [ 0, [[FOR_BODY4_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY4]] ]
120; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[J_026]], [[MUL]]
121; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[ADD]]
122; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
123; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[J_026]]
124; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
125; CHECK-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
126; CHECK-NEXT:    [[ADD9]] = add nsw i32 [[MUL7]], [[TMP0]]
127; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[J_026]], 1
128; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
129; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]]
130; CHECK:       for.cond.cleanup3:
131; CHECK-NEXT:    store i32 [[ADD9]], i32* [[ARRAYIDX]], align 4
132; CHECK-NEXT:    [[INC11]] = add nuw nsw i32 [[I_028]], 1
133; CHECK-NEXT:    [[EXITCOND29:%.*]] = icmp eq i32 [[INC11]], [[N]]
134; CHECK-NEXT:    br i1 [[EXITCOND29]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_LR_PH]]
135;
136entry:
137  %cmp27 = icmp sgt i32 %N, 0
138  br i1 %cmp27, label %for.body4.lr.ph.preheader, label %for.cond.cleanup
139
140for.body4.lr.ph.preheader:                        ; preds = %entry
141  br label %for.body4.lr.ph
142
143for.cond.cleanup:                                 ; preds = %for.cond.cleanup3, %entry
144  ret void
145
146for.body4.lr.ph:                                  ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3
147  %i.028 = phi i32 [ %inc11, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ]
148  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.028
149  store i32 0, i32* %arrayidx, align 4
150  %mul = mul nsw i32 %i.028, %N
151  br label %for.body4
152
153for.body4:                                        ; preds = %for.body4, %for.body4.lr.ph
154  %0 = phi i32 [ 0, %for.body4.lr.ph ], [ %add9, %for.body4 ]
155  %j.026 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ]
156  %add = add nsw i32 %j.026, %mul
157  %arrayidx5 = getelementptr inbounds i32, i32* %A, i32 %add
158  %1 = load i32, i32* %arrayidx5, align 4
159  %arrayidx6 = getelementptr inbounds i32, i32* %x, i32 %j.026
160  %2 = load i32, i32* %arrayidx6, align 4
161  %mul7 = mul nsw i32 %2, %1
162  %add9 = add nsw i32 %mul7, %0
163  %inc = add nuw nsw i32 %j.026, 1
164  %exitcond = icmp eq i32 %inc, %N
165  br i1 %exitcond, label %for.cond.cleanup3, label %for.body4
166
167for.cond.cleanup3:                                ; preds = %for.body4
168  store i32 %add9, i32* %arrayidx, align 4
169  %inc11 = add nuw nsw i32 %i.028, 1
170  %exitcond29 = icmp eq i32 %inc11, %N
171  br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph
172}
173
174declare i1 @cond() readnone
175
176; TODO: We can eliminate the store in for.header, but we currently hit a MemoryPhi.
177define void @loop_multiple_def_uses(i32* noalias %P) {
178; CHECK-LABEL: @loop_multiple_def_uses(
179; CHECK-NEXT:  entry:
180; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
181; CHECK:       for.header:
182; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
183; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
184; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
185; CHECK:       for.body:
186; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
187; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]], align 4
188; CHECK-NEXT:    br label [[FOR_HEADER]]
189; CHECK:       end:
190; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
191; CHECK-NEXT:    ret void
192;
193entry:
194  br label %for.header
195
196for.header:
197  store i32 1, i32* %P, align 4
198  %c1 = call i1 @cond()
199  br i1 %c1, label %for.body, label %end
200
201for.body:
202  store i32 1, i32* %P, align 4
203  %lv = load i32, i32* %P
204  br label %for.header
205
206end:
207  store i32 3, i32* %P, align 4
208  ret void
209}
210
211; We cannot eliminate the store in for.header, as it is only partially
212; overwritten in for.body and read afterwards.
213define void @loop_multiple_def_uses_partial_write(i32* noalias %p) {
214; CHECK-LABEL: @loop_multiple_def_uses_partial_write(
215; CHECK-NEXT:  entry:
216; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
217; CHECK:       for.header:
218; CHECK-NEXT:    store i32 1239491, i32* [[P:%.*]], align 4
219; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
220; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
221; CHECK:       for.body:
222; CHECK-NEXT:    [[C:%.*]] = bitcast i32* [[P]] to i8*
223; CHECK-NEXT:    store i8 1, i8* [[C]], align 4
224; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]], align 4
225; CHECK-NEXT:    br label [[FOR_HEADER]]
226; CHECK:       end:
227; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
228; CHECK-NEXT:    ret void
229;
230entry:
231  br label %for.header
232
233for.header:
234  store i32 1239491, i32* %p, align 4
235  %c1 = call i1 @cond()
236  br i1 %c1, label %for.body, label %end
237
238for.body:
239  %c = bitcast i32* %p to i8*
240  store i8 1, i8* %c, align 4
241  %lv = load i32, i32* %p
242  br label %for.header
243
244end:
245  store i32 3, i32* %p, align 4
246  ret void
247}
248
249; We cannot eliminate the store in for.header, as the location is not overwritten
250; in for.body and read afterwards.
251define void @loop_multiple_def_uses_mayalias_write(i32* %p, i32* %q) {
252; CHECK-LABEL: @loop_multiple_def_uses_mayalias_write(
253; CHECK-NEXT:  entry:
254; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
255; CHECK:       for.header:
256; CHECK-NEXT:    store i32 1239491, i32* [[P:%.*]], align 4
257; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
258; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
259; CHECK:       for.body:
260; CHECK-NEXT:    store i32 1, i32* [[Q:%.*]], align 4
261; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]], align 4
262; CHECK-NEXT:    br label [[FOR_HEADER]]
263; CHECK:       end:
264; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
265; CHECK-NEXT:    ret void
266;
267entry:
268  br label %for.header
269
270for.header:
271  store i32 1239491, i32* %p, align 4
272  %c1 = call i1 @cond()
273  br i1 %c1, label %for.body, label %end
274
275for.body:
276  store i32 1, i32* %q, align 4
277  %lv = load i32, i32* %p
278  br label %for.header
279
280end:
281  store i32 3, i32* %p, align 4
282  ret void
283}
284
285%struct.hoge = type { i32, i32 }
286
287@global = external local_unnamed_addr global %struct.hoge*, align 8
288
289define void @widget(i8* %tmp) {
290; CHECK-LABEL: @widget(
291; CHECK-NEXT:  bb:
292; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP:%.*]], i8* nonnull align 16 undef, i64 64, i1 false)
293; CHECK-NEXT:    br label [[BB1:%.*]]
294; CHECK:       bb1:
295; CHECK-NEXT:    [[TMP2:%.*]] = load %struct.hoge*, %struct.hoge** @global, align 8
296; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[TMP2]], i64 undef, i32 1
297; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 4
298; CHECK-NEXT:    [[TMP4:%.*]] = load %struct.hoge*, %struct.hoge** @global, align 8
299; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[TMP4]], i64 undef, i32 1
300; CHECK-NEXT:    store i32 10, i32* [[TMP5]], align 4
301; CHECK-NEXT:    br label [[BB1]]
302;
303bb:
304  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %tmp, i8* nonnull align 16 undef, i64 64, i1 false)
305  br label %bb1
306
307bb1:                                              ; preds = %bb1, %bb
308  %tmp2 = load %struct.hoge*, %struct.hoge** @global, align 8
309  %tmp3 = getelementptr inbounds %struct.hoge, %struct.hoge* %tmp2, i64 undef, i32 1
310  store i32 0, i32* %tmp3, align 4
311  %tmp4 = load %struct.hoge*, %struct.hoge** @global, align 8
312  %tmp5 = getelementptr inbounds %struct.hoge, %struct.hoge* %tmp4, i64 undef, i32 1
313  store i32 10, i32* %tmp5, align 4
314  br label %bb1
315}
316
317declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
318
319@x = global [10 x i16] zeroinitializer, align 1
320
321; Make sure we do not eliminate the store in %do.body, because it writes to
322; multiple locations in the loop and the store in %if.end10 only stores to
323; the last one.
324define i16 @test_loop_carried_dep() {
325; CHECK-LABEL: @test_loop_carried_dep(
326; CHECK-NEXT:  entry:
327; CHECK-NEXT:    br label [[DO_BODY:%.*]]
328; CHECK:       do.body:
329; CHECK-NEXT:    [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
330; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
331; CHECK-NEXT:    store i16 2, i16* [[ARRAYIDX2]], align 1
332; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4
333; CHECK-NEXT:    br i1 [[EXITCOND]], label [[IF_END10:%.*]], label [[IF_END]]
334; CHECK:       if.end:
335; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[I_0]], 1
336; CHECK-NEXT:    br label [[DO_BODY]]
337; CHECK:       if.end10:
338; CHECK-NEXT:    store i16 1, i16* [[ARRAYIDX2]], align 1
339; CHECK-NEXT:    ret i16 0
340;
341entry:
342  br label %do.body
343
344do.body:                                          ; preds = %if.end, %entry
345  %i.0 = phi i16 [ 0, %entry ], [ %inc, %if.end ]
346  %arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0
347  store i16 2, i16* %arrayidx2, align 1
348  %exitcond = icmp eq i16 %i.0, 4
349  br i1 %exitcond, label %if.end10, label %if.end
350
351if.end:                                           ; preds = %do.body
352  %inc = add nuw nsw i16 %i.0, 1
353  br label %do.body
354
355if.end10:                                         ; preds = %do.body
356  store i16 1, i16* %arrayidx2, align 1
357  ret i16 0
358}
359