1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -indvars -S -indvars-predicate-loops=0 | FileCheck %s
3
4; Make sure that indvars can perform LFTR without a canonical IV.
5
6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
7
8; Perform LFTR using the original pointer-type IV.
9
10declare void @use(double %x)
11
12;  for(char* p = base; p < base + n; ++p) {
13;    *p = p-base;
14;  }
15define void @ptriv(i8* %base, i32 %n) nounwind {
16; CHECK-LABEL: @ptriv(
17; CHECK-NEXT:  entry:
18; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[N:%.*]] to i64
19; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[IDX_EXT]]
20; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i8* [[BASE]], [[ADD_PTR]]
21; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
22; CHECK:       for.body.preheader:
23; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
24; CHECK:       for.body:
25; CHECK-NEXT:    [[P_02:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BASE]], [[FOR_BODY_PREHEADER]] ]
26; CHECK-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i8* [[P_02]] to i64
27; CHECK-NEXT:    [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i8* [[BASE]] to i64
28; CHECK-NEXT:    [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
29; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i8
30; CHECK-NEXT:    store i8 [[CONV]], i8* [[P_02]]
31; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[P_02]], i32 1
32; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[ADD_PTR]]
33; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
34; CHECK:       for.end.loopexit:
35; CHECK-NEXT:    br label [[FOR_END]]
36; CHECK:       for.end:
37; CHECK-NEXT:    ret void
38;
39entry:
40  %idx.ext = sext i32 %n to i64
41  %add.ptr = getelementptr inbounds i8, i8* %base, i64 %idx.ext
42  %cmp1 = icmp ult i8* %base, %add.ptr
43  br i1 %cmp1, label %for.body, label %for.end
44
45for.body:
46  %p.02 = phi i8* [ %base, %entry ], [ %incdec.ptr, %for.body ]
47  ; cruft to make the IV useful
48  %sub.ptr.lhs.cast = ptrtoint i8* %p.02 to i64
49  %sub.ptr.rhs.cast = ptrtoint i8* %base to i64
50  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
51  %conv = trunc i64 %sub.ptr.sub to i8
52  store i8 %conv, i8* %p.02
53  %incdec.ptr = getelementptr inbounds i8, i8* %p.02, i32 1
54  %cmp = icmp ult i8* %incdec.ptr, %add.ptr
55  br i1 %cmp, label %for.body, label %for.end
56
57for.end:
58  ret void
59}
60
61; This test checks that SCEVExpander can handle an outer loop that has been
62; simplified, and as a result the inner loop's exit test will be rewritten.
63define void @expandOuterRecurrence(i32 %arg) nounwind {
64; CHECK-LABEL: @expandOuterRecurrence(
65; CHECK-NEXT:  entry:
66; CHECK-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1
67; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
68; CHECK-NEXT:    br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
69; CHECK:       outer.preheader:
70; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ARG]], -1
71; CHECK-NEXT:    br label [[OUTER:%.*]]
72; CHECK:       outer:
73; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ]
74; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ]
75; CHECK-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
76; CHECK-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
77; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
78; CHECK-NEXT:    br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]]
79; CHECK:       inner.ph:
80; CHECK-NEXT:    br label [[INNER:%.*]]
81; CHECK:       inner:
82; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ]
83; CHECK-NEXT:    [[J_INC]] = add nuw nsw i32 [[J]], 1
84; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]]
85; CHECK-NEXT:    br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
86; CHECK:       outer.inc.loopexit:
87; CHECK-NEXT:    br label [[OUTER_INC]]
88; CHECK:       outer.inc:
89; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
90; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1
91; CHECK-NEXT:    [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]]
92; CHECK-NEXT:    br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
93; CHECK:       exit.loopexit:
94; CHECK-NEXT:    br label [[EXIT]]
95; CHECK:       exit:
96; CHECK-NEXT:    ret void
97;
98entry:
99  %sub1 = sub nsw i32 %arg, 1
100  %cmp1 = icmp slt i32 0, %sub1
101  br i1 %cmp1, label %outer, label %exit
102
103outer:
104  %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ]
105  %sub2 = sub nsw i32 %arg, %i
106  %sub3 = sub nsw i32 %sub2, 1
107  %cmp2 = icmp slt i32 0, %sub3
108  br i1 %cmp2, label %inner.ph, label %outer.inc
109
110inner.ph:
111  br label %inner
112
113inner:
114  %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ]
115  %j.inc = add nsw i32 %j, 1
116  %cmp3 = icmp slt i32 %j.inc, %sub3
117  br i1 %cmp3, label %inner, label %outer.inc
118
119outer.inc:
120  %i.inc = add nsw i32 %i, 1
121  %cmp4 = icmp slt i32 %i.inc, %sub1
122  br i1 %cmp4, label %outer, label %exit
123
124exit:
125  ret void
126}
127
128; Force SCEVExpander to look for an existing well-formed phi.
129; Perform LFTR without generating extra preheader code.
130define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
131;
132; CHECK-LABEL: @guardedloop(
133; CHECK-NEXT:  entry:
134; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 1, [[IROW:%.*]]
135; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[RETURN:%.*]]
136; CHECK:       loop.preheader:
137; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[ILEAD:%.*]] to i64
138; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[IROW]] to i64
139; CHECK-NEXT:    br label [[LOOP:%.*]]
140; CHECK:       loop:
141; CHECK-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[LOOP]] ]
142; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
143; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], [[INDVARS_IV2]]
144; CHECK-NEXT:    [[MATRIXP:%.*]] = getelementptr inbounds [0 x double], [0 x double]* [[MATRIX:%.*]], i32 0, i64 [[TMP1]]
145; CHECK-NEXT:    [[V1:%.*]] = load double, double* [[MATRIXP]]
146; CHECK-NEXT:    call void @use(double [[V1]])
147; CHECK-NEXT:    [[VECTORP:%.*]] = getelementptr inbounds [0 x double], [0 x double]* [[VECTOR:%.*]], i32 0, i64 [[INDVARS_IV2]]
148; CHECK-NEXT:    [[V2:%.*]] = load double, double* [[VECTORP]]
149; CHECK-NEXT:    call void @use(double [[V2]])
150; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP0]]
151; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
152; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT]]
153; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[RETURN_LOOPEXIT:%.*]]
154; CHECK:       return.loopexit:
155; CHECK-NEXT:    br label [[RETURN]]
156; CHECK:       return:
157; CHECK-NEXT:    ret void
158;
159  i32 %irow, i32 %ilead) nounwind {
160entry:
161  %cmp = icmp slt i32 1, %irow
162  br i1 %cmp, label %loop, label %return
163
164loop:
165  %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
166  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
167  %diagidx = add nsw i32 %rowidx, %i
168  %diagidxw = sext i32 %diagidx to i64
169  %matrixp = getelementptr inbounds [0 x double], [0 x double]* %matrix, i32 0, i64 %diagidxw
170  %v1 = load double, double* %matrixp
171  call void @use(double %v1)
172  %iw = sext i32 %i to i64
173  %vectorp = getelementptr inbounds [0 x double], [0 x double]* %vector, i32 0, i64 %iw
174  %v2 = load double, double* %vectorp
175  call void @use(double %v2)
176  %row.inc = add nsw i32 %rowidx, %ilead
177  %i.inc = add nsw i32 %i, 1
178  %cmp196 = icmp slt i32 %i.inc, %irow
179  br i1 %cmp196, label %loop, label %return
180
181return:
182  ret void
183}
184
185; Avoid generating extra code to materialize a trip count. Skip LFTR.
186define void @unguardedloop([0 x double]* %matrix, [0 x double]* %vector,
187;
188; CHECK-LABEL: @unguardedloop(
189; CHECK-NEXT:  entry:
190; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[IROW:%.*]], 1
191; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[IROW]], i32 1
192; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64
193; CHECK-NEXT:    br label [[LOOP:%.*]]
194; CHECK:       loop:
195; CHECK-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
196; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
197; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT]]
198; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[RETURN:%.*]]
199; CHECK:       return:
200; CHECK-NEXT:    ret void
201;
202  i32 %irow, i32 %ilead) nounwind {
203entry:
204  br label %loop
205
206loop:
207  %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
208  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
209  %diagidx = add nsw i32 %rowidx, %i
210  %diagidxw = sext i32 %diagidx to i64
211  %matrixp = getelementptr inbounds [0 x double], [0 x double]* %matrix, i32 0, i64 %diagidxw
212  %v1 = load double, double* %matrixp
213  %iw = sext i32 %i to i64
214  %vectorp = getelementptr inbounds [0 x double], [0 x double]* %vector, i32 0, i64 %iw
215  %v2 = load double, double* %vectorp
216  %row.inc = add nsw i32 %rowidx, %ilead
217  %i.inc = add nsw i32 %i, 1
218  %cmp196 = icmp slt i32 %i.inc, %irow
219  br i1 %cmp196, label %loop, label %return
220
221return:
222  ret void
223}
224
225; Remove %i which is only used by the exit test.
226; Verify that SCEV can still compute a backedge count from the sign
227; extended %n, used for pointer comparison by LFTR.
228;
229; TODO: Fix for PR13371 currently makes this impossible. See
230; IndVarSimplify.cpp hasConcreteDef(). We may want to change to undef rules.
231define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind {
232; CHECK-LABEL: @geplftr(
233; CHECK-NEXT:  entry:
234; CHECK-NEXT:    [[X_EXT:%.*]] = sext i32 [[X:%.*]] to i64
235; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[X_EXT]]
236; CHECK-NEXT:    [[Y_EXT:%.*]] = sext i32 [[Y:%.*]] to i64
237; CHECK-NEXT:    [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 [[Y_EXT]]
238; CHECK-NEXT:    [[LIM:%.*]] = add i32 [[X]], [[N:%.*]]
239; CHECK-NEXT:    [[CMP_PH:%.*]] = icmp ult i32 [[X]], [[LIM]]
240; CHECK-NEXT:    br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
241; CHECK:       loop.preheader:
242; CHECK-NEXT:    br label [[LOOP:%.*]]
243; CHECK:       loop:
244; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ]
245; CHECK-NEXT:    [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[ADD_PTR10]], [[LOOP_PREHEADER]] ]
246; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1
247; CHECK-NEXT:    store i8 3, i8* [[APTR]]
248; CHECK-NEXT:    [[INC]] = add nuw i32 [[I]], 1
249; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[LIM]]
250; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
251; CHECK:       exit.loopexit:
252; CHECK-NEXT:    br label [[EXIT]]
253; CHECK:       exit:
254; CHECK-NEXT:    ret void
255;
256entry:
257  %x.ext = sext i32 %x to i64
258  %add.ptr = getelementptr inbounds i8, i8* %base, i64 %x.ext
259  %y.ext = sext i32 %y to i64
260  %add.ptr10 = getelementptr inbounds i8, i8* %add.ptr, i64 %y.ext
261  %lim = add i32 %x, %n
262  %cmp.ph = icmp ult i32 %x, %lim
263  br i1 %cmp.ph, label %loop, label %exit
264loop:
265  %i = phi i32 [ %x, %entry ], [ %inc, %loop ]
266  %aptr = phi i8* [ %add.ptr10, %entry ], [ %incdec.ptr, %loop ]
267  %incdec.ptr = getelementptr inbounds i8, i8* %aptr, i32 1
268  store i8 3, i8* %aptr
269  %inc = add i32 %i, 1
270  %cmp = icmp ult i32 %inc, %lim
271  br i1 %cmp, label %loop, label %exit
272
273exit:
274  ret void
275}
276
277; Exercise backedge taken count verification with a never-taken loop.
278define void @nevertaken() nounwind uwtable ssp {
279; CHECK-LABEL: @nevertaken(
280; CHECK-NEXT:  entry:
281; CHECK-NEXT:    br label [[LOOP:%.*]]
282; CHECK:       loop:
283; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
284; CHECK:       exit:
285; CHECK-NEXT:    ret void
286;
287entry:
288  br label %loop
289loop:
290  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
291  %inc = add nsw i32 %i, 1
292  %cmp = icmp sle i32 %inc, 0
293  br i1 %cmp, label %loop, label %exit
294
295exit:
296  ret void
297}
298
299; Test LFTR on an IV whose recurrence start is a non-unit pointer type.
300define void @aryptriv([256 x i8]* %base, i32 %n) nounwind {
301; CHECK-LABEL: @aryptriv(
302; CHECK-NEXT:  entry:
303; CHECK-NEXT:    [[IVSTART:%.*]] = getelementptr inbounds [256 x i8], [256 x i8]* [[BASE:%.*]], i32 0, i32 0
304; CHECK-NEXT:    [[IVEND:%.*]] = getelementptr inbounds [256 x i8], [256 x i8]* [[BASE]], i32 0, i32 [[N:%.*]]
305; CHECK-NEXT:    [[CMP_PH:%.*]] = icmp ult i8* [[IVSTART]], [[IVEND]]
306; CHECK-NEXT:    br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
307; CHECK:       loop.preheader:
308; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[N]] to i64
309; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr [256 x i8], [256 x i8]* [[BASE]], i64 0, i64 [[TMP0]]
310; CHECK-NEXT:    br label [[LOOP:%.*]]
311; CHECK:       loop:
312; CHECK-NEXT:    [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[IVSTART]], [[LOOP_PREHEADER]] ]
313; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1
314; CHECK-NEXT:    store i8 3, i8* [[APTR]]
315; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[SCEVGEP]]
316; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
317; CHECK:       exit.loopexit:
318; CHECK-NEXT:    br label [[EXIT]]
319; CHECK:       exit:
320; CHECK-NEXT:    ret void
321;
322entry:
323  %ivstart = getelementptr inbounds [256 x i8], [256 x i8]* %base, i32 0, i32 0
324  %ivend = getelementptr inbounds [256 x i8], [256 x i8]* %base, i32 0, i32 %n
325  %cmp.ph = icmp ult i8* %ivstart, %ivend
326  br i1 %cmp.ph, label %loop, label %exit
327
328loop:
329  %aptr = phi i8* [ %ivstart, %entry ], [ %incdec.ptr, %loop ]
330  %incdec.ptr = getelementptr inbounds i8, i8* %aptr, i32 1
331  store i8 3, i8* %aptr
332  %cmp = icmp ult i8* %incdec.ptr, %ivend
333  br i1 %cmp, label %loop, label %exit
334
335exit:
336  ret void
337}
338