1; RUN: opt -S -loop-fusion < %s | FileCheck %s
2
3@B = common global [1024 x i32] zeroinitializer, align 16
4
5; CHECK: void @dep_free_parametric
6; CHECK-next: entry:
7; CHECK: br i1 %{{.*}}, label %[[LOOP1PREHEADER:bb[0-9]*]], label %[[LOOP1SUCC:bb[0-9]+]]
8; CHECK: [[LOOP1PREHEADER]]
9; CHECK-NEXT: br label %[[LOOP1BODY:bb[0-9]*]]
10; CHECK: [[LOOP1BODY]]
11; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY]], label %[[LOOP2EXIT:bb[0-9]+]]
12; CHECK: [[LOOP2EXIT]]
13; CHECK: br label %[[LOOP1SUCC]]
14; CHECK: [[LOOP1SUCC]]
15; CHECK: ret void
16define void @dep_free_parametric(i32* noalias %A, i64 %N) {
17entry:
18  %cmp4 = icmp slt i64 0, %N
19  br i1 %cmp4, label %bb3, label %bb14
20
21bb3:                               ; preds = %entry
22  br label %bb5
23
24bb5:                                         ; preds = %bb3, %bb5
25  %i.05 = phi i64 [ %inc, %bb5 ], [ 0, %bb3 ]
26  %sub = sub nsw i64 %i.05, 3
27  %add = add nsw i64 %i.05, 3
28  %mul = mul nsw i64 %sub, %add
29  %rem = srem i64 %mul, %i.05
30  %conv = trunc i64 %rem to i32
31  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.05
32  store i32 %conv, i32* %arrayidx, align 4
33  %inc = add nsw i64 %i.05, 1
34  %cmp = icmp slt i64 %inc, %N
35  br i1 %cmp, label %bb5, label %bb10
36
37bb10:                                 ; preds = %bb5
38  br label %bb14
39
40bb14:                                          ; preds = %bb10, %entry
41  %cmp31 = icmp slt i64 0, %N
42  br i1 %cmp31, label %bb8, label %bb12
43
44bb8:                              ; preds = %bb14
45  br label %bb9
46
47bb9:                                        ; preds = %bb8, %bb9
48  %i1.02 = phi i64 [ %inc14, %bb9 ], [ 0, %bb8 ]
49  %sub7 = sub nsw i64 %i1.02, 3
50  %add8 = add nsw i64 %i1.02, 3
51  %mul9 = mul nsw i64 %sub7, %add8
52  %rem10 = srem i64 %mul9, %i1.02
53  %conv11 = trunc i64 %rem10 to i32
54  %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.02
55  store i32 %conv11, i32* %arrayidx12, align 4
56  %inc14 = add nsw i64 %i1.02, 1
57  %cmp3 = icmp slt i64 %inc14, %N
58  br i1 %cmp3, label %bb9, label %bb15
59
60bb15:                               ; preds = %bb9
61  br label %bb12
62
63bb12:                                        ; preds = %bb15, %bb14
64  ret void
65}
66
67; Test that `%add` is moved in for.first.preheader, and the two loops for.first
68; and for.second are fused.
69
70; CHECK: void @moveinsts_preheader
71; CHECK-LABEL: for.first.guard:
72; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
73; CHECK-LABEL: for.first.preheader:
74; CHECK-NEXT:  %add = add nsw i32 %x, 1
75; CHECK-NEXT:  br label %for.first
76; CHECK-LABEL: for.first:
77; CHECK:   br i1 %cmp.j, label %for.first, label %for.second.exit
78; CHECK-LABEL: for.second.exit:
79; CHECK-NEXT:   br label %for.end
80; CHECK-LABEL: for.end:
81; CHECK-NEXT:   ret void
82define void @moveinsts_preheader(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
83for.first.guard:
84  %cmp.guard = icmp slt i64 0, %N
85  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
86
87for.first.preheader:
88  br label %for.first
89
90for.first:
91  %i = phi i64 [ %inc.i, %for.first ], [ 0, %for.first.preheader ]
92  %Ai = getelementptr inbounds i32, i32* %A, i64 %i
93  store i32 0, i32* %Ai, align 4
94  %inc.i = add nsw i64 %i, 1
95  %cmp.i = icmp slt i64 %inc.i, %N
96  br i1 %cmp.i, label %for.first, label %for.first.exit
97
98for.first.exit:
99  br label %for.second.guard
100
101for.second.guard:
102  br i1 %cmp.guard, label %for.second.preheader, label %for.end
103
104for.second.preheader:
105  %add = add nsw i32 %x, 1
106  br label %for.second
107
108for.second:
109  %j = phi i64 [ %inc.j, %for.second ], [ 0, %for.second.preheader ]
110  %Bj = getelementptr inbounds i32, i32* %B, i64 %j
111  store i32 0, i32* %Bj, align 4
112  %inc.j = add nsw i64 %j, 1
113  %cmp.j = icmp slt i64 %inc.j, %N
114  br i1 %cmp.j, label %for.second, label %for.second.exit
115
116for.second.exit:
117  br label %for.end
118
119for.end:
120  ret void
121}
122
123; Test that `%add` is moved in for.second.exit, and the two loops for.first
124; and for.second are fused.
125
126; CHECK: void @moveinsts_exitblock
127; CHECK-LABEL: for.first.guard:
128; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
129; CHECK-LABEL: for.first.preheader:
130; CHECK-NEXT:  br label %for.first
131; CHECK-LABEL: for.first:
132; CHECK:   br i1 %cmp.j, label %for.first, label %for.second.exit
133; CHECK-LABEL: for.second.exit:
134; CHECK-NEXT:  %add = add nsw i32 %x, 1
135; CHECK-NEXT:   br label %for.end
136; CHECK-LABEL: for.end:
137; CHECK-NEXT:   ret void
138define void @moveinsts_exitblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
139for.first.guard:
140  %cmp.guard = icmp slt i64 0, %N
141  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
142
143for.first.preheader:
144  br label %for.first
145
146for.first:
147  %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
148  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
149  store i32 0, i32* %arrayidx, align 4
150  %inc = add nsw i64 %i.04, 1
151  %cmp = icmp slt i64 %inc, %N
152  br i1 %cmp, label %for.first, label %for.first.exit
153
154for.first.exit:
155  %add = add nsw i32 %x, 1
156  br label %for.second.guard
157
158for.second.guard:
159  br i1 %cmp.guard, label %for.second.preheader, label %for.end
160
161for.second.preheader:
162  br label %for.second
163
164for.second:
165  %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
166  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
167  store i32 0, i32* %arrayidx4, align 4
168  %inc6 = add nsw i64 %j.02, 1
169  %cmp.j = icmp slt i64 %inc6, %N
170  br i1 %cmp.j, label %for.second, label %for.second.exit
171
172for.second.exit:
173  br label %for.end
174
175for.end:
176  ret void
177}
178
179; Test that `%add` is moved in for.first.guard, and the two loops for.first
180; and for.second are fused.
181
182; CHECK: void @moveinsts_guardblock
183; CHECK-LABEL: for.first.guard:
184; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N
185; CHECK-NEXT:  %add = add nsw i32 %x, 1
186; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
187; CHECK-LABEL: for.first.preheader:
188; CHECK-NEXT:  br label %for.first
189; CHECK-LABEL: for.first:
190; CHECK:   br i1 %cmp.j, label %for.first, label %for.second.exit
191; CHECK-LABEL: for.second.exit:
192; CHECK-NEXT:   br label %for.end
193; CHECK-LABEL: for.end:
194; CHECK-NEXT:   ret void
195define void @moveinsts_guardblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
196for.first.guard:
197  %cmp.guard = icmp slt i64 0, %N
198  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
199
200for.first.preheader:
201  br label %for.first
202
203for.first:
204  %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
205  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
206  store i32 0, i32* %arrayidx, align 4
207  %inc = add nsw i64 %i.04, 1
208  %cmp = icmp slt i64 %inc, %N
209  br i1 %cmp, label %for.first, label %for.first.exit
210
211for.first.exit:
212  br label %for.second.guard
213
214for.second.guard:
215  %add = add nsw i32 %x, 1
216  br i1 %cmp.guard, label %for.second.preheader, label %for.end
217
218for.second.preheader:
219  br label %for.second
220
221for.second:
222  %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
223  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
224  store i32 0, i32* %arrayidx4, align 4
225  %inc6 = add nsw i64 %j.02, 1
226  %cmp.j = icmp slt i64 %inc6, %N
227  br i1 %cmp.j, label %for.second, label %for.second.exit
228
229for.second.exit:
230  br label %for.end
231
232for.end:
233  ret void
234}
235
236; Test that the incoming block of `%j.lcssa` is updated correctly
237; from for.second.guard to for.first.guard, and the two loops for.first and
238; for.second are fused.
239
240; CHECK: i64 @updatephi_guardnonloopblock
241; CHECK-LABEL: for.first.guard:
242; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N
243; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end
244; CHECK-LABEL: for.first.preheader:
245; CHECK-NEXT:  br label %for.first
246; CHECK-LABEL: for.first:
247; CHECK:   br i1 %cmp.j, label %for.first, label %for.second.exit
248; CHECK-LABEL: for.second.exit:
249; CHECK-NEXT:   br label %for.end
250; CHECK-LABEL: for.end:
251; CHECK-NEXT:   %j.lcssa = phi i64 [ 0, %for.first.guard ], [ %j.02, %for.second.exit ]
252; CHECK-NEXT:   ret i64 %j.lcssa
253
254define i64 @updatephi_guardnonloopblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) {
255for.first.guard:
256  %cmp.guard = icmp slt i64 0, %N
257  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
258
259for.first.preheader:
260  br label %for.first
261
262for.first:
263  %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
264  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
265  store i32 0, i32* %arrayidx, align 4
266  %inc = add nsw i64 %i.04, 1
267  %cmp = icmp slt i64 %inc, %N
268  br i1 %cmp, label %for.first, label %for.first.exit
269
270for.first.exit:
271  br label %for.second.guard
272
273for.second.guard:
274  br i1 %cmp.guard, label %for.second.preheader, label %for.end
275
276for.second.preheader:
277  br label %for.second
278
279for.second:
280  %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
281  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
282  store i32 0, i32* %arrayidx4, align 4
283  %inc6 = add nsw i64 %j.02, 1
284  %cmp.j = icmp slt i64 %inc6, %N
285  br i1 %cmp.j, label %for.second, label %for.second.exit
286
287for.second.exit:
288  br label %for.end
289
290for.end:
291  %j.lcssa = phi i64 [ 0, %for.second.guard ], [ %j.02, %for.second.exit ]
292  ret i64 %j.lcssa
293}
294