1; RUN: opt < %s -passes='print<loopnest>' -disable-output 2>&1 | FileCheck %s
2
3; Test a perfect 2-dim loop nest of the form:
4;   for(i=0; i<nx; ++i)
5;     for(j=0; j<nx; ++j)
6;       y[i][j] = x[i][j];
7
8define void @perf_nest_2D_1(i32** %y, i32** %x, i64 signext %nx, i64 signext %ny) {
9; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_1_loop_j, Loops: ( perf_nest_2D_1_loop_j )
10; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_1_loop_i, Loops: ( perf_nest_2D_1_loop_i perf_nest_2D_1_loop_j )
11entry:
12  br label %perf_nest_2D_1_loop_i
13
14perf_nest_2D_1_loop_i:
15  %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ]
16  %cmp21 = icmp slt i64 0, %ny
17  br i1 %cmp21, label %perf_nest_2D_1_loop_j, label %inc_i
18
19perf_nest_2D_1_loop_j:
20  %j = phi i64 [ 0, %perf_nest_2D_1_loop_i ], [ %inc, %inc_j ]
21  %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j
22  %0 = load i32*, i32** %arrayidx, align 8
23  %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j
24  %1 = load i32, i32* %arrayidx6, align 4
25  %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j
26  %2 = load i32*, i32** %arrayidx8, align 8
27  %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i
28  store i32 %1, i32* %arrayidx11, align 4
29  br label %inc_j
30
31inc_j:
32  %inc = add nsw i64 %j, 1
33  %cmp2 = icmp slt i64 %inc, %ny
34  br i1 %cmp2, label %perf_nest_2D_1_loop_j, label %inc_i
35
36inc_i:
37  %inc13 = add nsw i64 %i, 1
38  %cmp = icmp slt i64 %inc13, %nx
39  br i1 %cmp, label %perf_nest_2D_1_loop_i, label %perf_nest_2D_1_loop_i_end
40
41perf_nest_2D_1_loop_i_end:
42  ret void
43}
44
45; Test a perfect 2-dim loop nest of the form:
46;   for (i=0; i<100; ++i)
47;     for (j=0; j<100; ++j)
48;       y[i][j] = x[i][j];
49define void @perf_nest_2D_2(i32** %y, i32** %x) {
50; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_2D_2_loop_j, Loops: ( perf_nest_2D_2_loop_j )
51; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_2D_2_loop_i, Loops: ( perf_nest_2D_2_loop_i perf_nest_2D_2_loop_j )
52entry:
53  br label %perf_nest_2D_2_loop_i
54
55perf_nest_2D_2_loop_i:
56  %i = phi i64 [ 0, %entry ], [ %inc13, %inc_i ]
57  br label %perf_nest_2D_2_loop_j
58
59perf_nest_2D_2_loop_j:
60  %j = phi i64 [ 0, %perf_nest_2D_2_loop_i ], [ %inc, %inc_j ]
61  %arrayidx = getelementptr inbounds i32*, i32** %x, i64 %j
62  %0 = load i32*, i32** %arrayidx, align 8
63  %arrayidx6 = getelementptr inbounds i32, i32* %0, i64 %j
64  %1 = load i32, i32* %arrayidx6, align 4
65  %arrayidx8 = getelementptr inbounds i32*, i32** %y, i64 %j
66  %2 = load i32*, i32** %arrayidx8, align 8
67  %arrayidx11 = getelementptr inbounds i32, i32* %2, i64 %i
68  store i32 %1, i32* %arrayidx11, align 4
69  br label %inc_j
70
71inc_j:
72  %inc = add nsw i64 %j, 1
73  %cmp2 = icmp slt i64 %inc, 100
74  br i1 %cmp2, label %perf_nest_2D_2_loop_j, label %loop_j_end
75
76loop_j_end:
77  br label %inc_i
78
79inc_i:
80  %inc13 = add nsw i64 %i, 1
81  %cmp = icmp slt i64 %inc13, 100
82  br i1 %cmp, label %perf_nest_2D_2_loop_i, label %perf_nest_2D_2_loop_i_end
83
84perf_nest_2D_2_loop_i_end:
85  ret void
86}
87
88; Test a perfect 3-dim loop nest of the form:
89;   for (i=0; i<nx; ++i)
90;     for (j=0; j<ny; ++j)
91;       for (k=0; j<nk; ++k)
92;          y[j][j][k] = x[i][j][k];
93;
94
95define void @perf_nest_3D_1(i32*** %y, i32*** %x, i32 signext %nx, i32 signext %ny, i32 signext %nk) {
96; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_3D_1_loop_k, Loops: ( perf_nest_3D_1_loop_k )
97; CHECK-NEXT: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_3D_1_loop_j, Loops: ( perf_nest_3D_1_loop_j perf_nest_3D_1_loop_k )
98; CHECK-NEXT: IsPerfect=true, Depth=3, OutermostLoop: perf_nest_3D_1_loop_i, Loops: ( perf_nest_3D_1_loop_i perf_nest_3D_1_loop_j perf_nest_3D_1_loop_k )
99entry:
100  br label %perf_nest_3D_1_loop_i
101
102perf_nest_3D_1_loop_i:
103  %i = phi i32 [ 0, %entry ], [ %inci, %for.inci ]
104  %cmp21 = icmp slt i32 0, %ny
105  br i1 %cmp21, label %perf_nest_3D_1_loop_j, label %for.inci
106
107perf_nest_3D_1_loop_j:
108  %j = phi i32 [ 0, %perf_nest_3D_1_loop_i ], [ %incj, %for.incj ]
109  %cmp22 = icmp slt i32 0, %nk
110  br i1 %cmp22, label %perf_nest_3D_1_loop_k, label %for.incj
111
112perf_nest_3D_1_loop_k:
113  %k = phi i32 [ 0, %perf_nest_3D_1_loop_j ], [ %inck, %for.inck ]
114  %idxprom = sext i32 %i to i64
115  %arrayidx = getelementptr inbounds i32**, i32*** %x, i64 %idxprom
116  %0 = load i32**, i32*** %arrayidx, align 8
117  %idxprom7 = sext i32 %j to i64
118  %arrayidx8 = getelementptr inbounds i32*, i32** %0, i64 %idxprom7
119  %1 = load i32*, i32** %arrayidx8, align 8
120  %idxprom9 = sext i32 %k to i64
121  %arrayidx10 = getelementptr inbounds i32, i32* %1, i64 %idxprom9
122  %2 = load i32, i32* %arrayidx10, align 4
123  %idxprom11 = sext i32 %j to i64
124  %arrayidx12 = getelementptr inbounds i32**, i32*** %y, i64 %idxprom11
125  %3 = load i32**, i32*** %arrayidx12, align 8
126  %idxprom13 = sext i32 %j to i64
127  %arrayidx14 = getelementptr inbounds i32*, i32** %3, i64 %idxprom13
128  %4 = load i32*, i32** %arrayidx14, align 8
129  %idxprom15 = sext i32 %k to i64
130  %arrayidx16 = getelementptr inbounds i32, i32* %4, i64 %idxprom15
131  store i32 %2, i32* %arrayidx16, align 4
132  br label %for.inck
133
134for.inck:
135  %inck = add nsw i32 %k, 1
136  %cmp5 = icmp slt i32 %inck, %nk
137  br i1 %cmp5, label %perf_nest_3D_1_loop_k, label %for.incj
138
139for.incj:
140  %incj = add nsw i32 %j, 1
141  %cmp2 = icmp slt i32 %incj, %ny
142  br i1 %cmp2, label %perf_nest_3D_1_loop_j, label %for.inci
143
144for.inci:
145  %inci = add nsw i32 %i, 1
146  %cmp = icmp slt i32 %inci, %nx
147  br i1 %cmp, label %perf_nest_3D_1_loop_i, label %perf_nest_3D_1_loop_i_end
148
149perf_nest_3D_1_loop_i_end:
150  ret void
151}
152
153; Test a perfect 3-dim loop nest of the form:
154;   for (i=0; i<100; ++i)
155;     for (j=0; j<100; ++j)
156;       for (k=0; j<100; ++k)
157;          y[j][j][k] = x[i][j][k];
158;
159
160define void @perf_nest_3D_2(i32*** %y, i32*** %x) {
161; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_3D_2_loop_k, Loops: ( perf_nest_3D_2_loop_k )
162; CHECK-NEXT: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_3D_2_loop_j, Loops: ( perf_nest_3D_2_loop_j perf_nest_3D_2_loop_k )
163; CHECK-NEXT: IsPerfect=true, Depth=3, OutermostLoop: perf_nest_3D_2_loop_i, Loops: ( perf_nest_3D_2_loop_i perf_nest_3D_2_loop_j perf_nest_3D_2_loop_k )
164entry:
165  br label %perf_nest_3D_2_loop_i
166
167perf_nest_3D_2_loop_i:
168  %i = phi i32 [ 0, %entry ], [ %inci, %for.inci ]
169  br label %perf_nest_3D_2_loop_j
170
171perf_nest_3D_2_loop_j:
172  %j = phi i32 [ 0, %perf_nest_3D_2_loop_i ], [ %incj, %for.incj ]
173  br label %perf_nest_3D_2_loop_k
174
175perf_nest_3D_2_loop_k:
176  %k = phi i32 [ 0, %perf_nest_3D_2_loop_j ], [ %inck, %for.inck ]
177  %idxprom = sext i32 %i to i64
178  %arrayidx = getelementptr inbounds i32**, i32*** %x, i64 %idxprom
179  %0 = load i32**, i32*** %arrayidx, align 8
180  %idxprom7 = sext i32 %j to i64
181  %arrayidx8 = getelementptr inbounds i32*, i32** %0, i64 %idxprom7
182  %1 = load i32*, i32** %arrayidx8, align 8
183  %idxprom9 = sext i32 %k to i64
184  %arrayidx10 = getelementptr inbounds i32, i32* %1, i64 %idxprom9
185  %2 = load i32, i32* %arrayidx10, align 4
186  %idxprom11 = sext i32 %j to i64
187  %arrayidx12 = getelementptr inbounds i32**, i32*** %y, i64 %idxprom11
188  %3 = load i32**, i32*** %arrayidx12, align 8
189  %idxprom13 = sext i32 %j to i64
190  %arrayidx14 = getelementptr inbounds i32*, i32** %3, i64 %idxprom13
191  %4 = load i32*, i32** %arrayidx14, align 8
192  %idxprom15 = sext i32 %k to i64
193  %arrayidx16 = getelementptr inbounds i32, i32* %4, i64 %idxprom15
194  store i32 %2, i32* %arrayidx16, align 4
195  br label %for.inck
196
197for.inck:
198  %inck = add nsw i32 %k, 1
199  %cmp5 = icmp slt i32 %inck, 100
200  br i1 %cmp5, label %perf_nest_3D_2_loop_k, label %loop_k_end
201
202loop_k_end:
203  br label %for.incj
204
205for.incj:
206  %incj = add nsw i32 %j, 1
207  %cmp2 = icmp slt i32 %incj, 100
208  br i1 %cmp2, label %perf_nest_3D_2_loop_j, label %loop_j_end
209
210loop_j_end:
211  br label %for.inci
212
213for.inci:
214  %inci = add nsw i32 %i, 1
215  %cmp = icmp slt i32 %inci, 100
216  br i1 %cmp, label %perf_nest_3D_2_loop_i, label %perf_nest_3D_2_loop_i_end
217
218perf_nest_3D_2_loop_i_end:
219  ret void
220}
221
222; Test a perfect loop nest with a live out reduction:
223;   for (i = 0; i<ni; ++i)
224;     if (0<nj) { // guard branch for the j-loop
225;       for (j=0; j<nj; j+=1)
226;         x+=(i+j);
227;     }
228;   return x;
229
230define signext i32 @perf_nest_live_out(i32 signext %x, i32 signext %ni, i32 signext %nj) {
231; CHECK-LABEL: IsPerfect=true, Depth=1, OutermostLoop: perf_nest_live_out_loop_j, Loops: ( perf_nest_live_out_loop_j )
232; CHECK-LABEL: IsPerfect=true, Depth=2, OutermostLoop: perf_nest_live_out_loop_i, Loops: ( perf_nest_live_out_loop_i perf_nest_live_out_loop_j )
233entry:
234  %cmp4 = icmp slt i32 0, %ni
235  br i1 %cmp4, label %perf_nest_live_out_loop_i.lr.ph, label %for.end7
236
237perf_nest_live_out_loop_i.lr.ph:
238  br label %perf_nest_live_out_loop_i
239
240perf_nest_live_out_loop_i:
241  %x.addr.06 = phi i32 [ %x, %perf_nest_live_out_loop_i.lr.ph ], [ %x.addr.1.lcssa, %for.inc5 ]
242  %i.05 = phi i32 [ 0, %perf_nest_live_out_loop_i.lr.ph ], [ %inc6, %for.inc5 ]
243  %cmp21 = icmp slt i32 0, %nj
244  br i1 %cmp21, label %perf_nest_live_out_loop_j.lr.ph, label %for.inc5
245
246perf_nest_live_out_loop_j.lr.ph:
247  br label %perf_nest_live_out_loop_j
248
249perf_nest_live_out_loop_j:
250  %x.addr.13 = phi i32 [ %x.addr.06, %perf_nest_live_out_loop_j.lr.ph ], [ %add4, %perf_nest_live_out_loop_j ]
251  %j.02 = phi i32 [ 0, %perf_nest_live_out_loop_j.lr.ph ], [ %inc, %perf_nest_live_out_loop_j ]
252  %add = add nsw i32 %i.05, %j.02
253  %add4 = add nsw i32 %x.addr.13, %add
254  %inc = add nsw i32 %j.02, 1
255  %cmp2 = icmp slt i32 %inc, %nj
256  br i1 %cmp2, label %perf_nest_live_out_loop_j, label %for.cond1.for.inc5_crit_edge
257
258for.cond1.for.inc5_crit_edge:
259  %split = phi i32 [ %add4, %perf_nest_live_out_loop_j ]
260  br label %for.inc5
261
262for.inc5:
263  %x.addr.1.lcssa = phi i32 [ %split, %for.cond1.for.inc5_crit_edge ], [ %x.addr.06, %perf_nest_live_out_loop_i ]
264  %inc6 = add nsw i32 %i.05, 1
265  %cmp = icmp slt i32 %inc6, %ni
266  br i1 %cmp, label %perf_nest_live_out_loop_i, label %for.cond.for.end7_crit_edge
267
268for.cond.for.end7_crit_edge:
269  %split7 = phi i32 [ %x.addr.1.lcssa, %for.inc5 ]
270  br label %for.end7
271
272for.end7:
273  %x.addr.0.lcssa = phi i32 [ %split7, %for.cond.for.end7_crit_edge ], [ %x, %entry ]
274  ret i32 %x.addr.0.lcssa
275}
276