1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -indvars -S | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
5
6; IV with constant start, preinc and postinc sign extends, with and without NSW.
7; IV rewrite only removes one sext. WidenIVs removes all three.
8define void @postincConstIV(i8* %base, i32 %limit) nounwind {
9; CHECK-LABEL: @postincConstIV(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[LIMIT:%.*]], 0
12; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[LIMIT]], i32 0
13; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
14; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64
15; CHECK-NEXT:    br label [[LOOP:%.*]]
16; CHECK:       loop:
17; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
18; CHECK-NEXT:    [[PREADR:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[INDVARS_IV]]
19; CHECK-NEXT:    store i8 0, i8* [[PREADR]], align 1
20; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
21; CHECK-NEXT:    [[POSTADR:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]]
22; CHECK-NEXT:    store i8 0, i8* [[POSTADR]], align 1
23; CHECK-NEXT:    [[POSTADRNSW:%.*]] = getelementptr inbounds i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]]
24; CHECK-NEXT:    store i8 0, i8* [[POSTADRNSW]], align 1
25; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
26; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
27; CHECK:       exit:
28; CHECK-NEXT:    br label [[RETURN:%.*]]
29; CHECK:       return:
30; CHECK-NEXT:    ret void
31;
32entry:
33  br label %loop
34loop:
35  %iv = phi i32 [ %postiv, %loop ], [ 0, %entry ]
36  %ivnsw = phi i32 [ %postivnsw, %loop ], [ 0, %entry ]
37  %preofs = sext i32 %iv to i64
38  %preadr = getelementptr i8, i8* %base, i64 %preofs
39  store i8 0, i8* %preadr
40  %postiv = add i32 %iv, 1
41  %postofs = sext i32 %postiv to i64
42  %postadr = getelementptr i8, i8* %base, i64 %postofs
43  store i8 0, i8* %postadr
44  %postivnsw = add nsw i32 %ivnsw, 1
45  %postofsnsw = sext i32 %postivnsw to i64
46  %postadrnsw = getelementptr inbounds i8, i8* %base, i64 %postofsnsw
47  store i8 0, i8* %postadrnsw
48  %cond = icmp sgt i32 %limit, %iv
49  br i1 %cond, label %loop, label %exit
50exit:
51  br label %return
52return:
53  ret void
54}
55
56; IV with nonconstant start, preinc and postinc sign extends,
57; with and without NSW.
58; As with postincConstIV, WidenIVs removes all three sexts.
59define void @postincVarIV(i8* %base, i32 %init, i32 %limit) nounwind {
60; CHECK-LABEL: @postincVarIV(
61; CHECK-NEXT:  entry:
62; CHECK-NEXT:    [[PRECOND:%.*]] = icmp sgt i32 [[LIMIT:%.*]], [[INIT:%.*]]
63; CHECK-NEXT:    br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[RETURN:%.*]]
64; CHECK:       loop.preheader:
65; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INIT]] to i64
66; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[LIMIT]] to i64
67; CHECK-NEXT:    br label [[LOOP:%.*]]
68; CHECK:       loop:
69; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
70; CHECK-NEXT:    [[PREADR:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[INDVARS_IV]]
71; CHECK-NEXT:    store i8 0, i8* [[PREADR]], align 1
72; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
73; CHECK-NEXT:    [[POSTADR:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]]
74; CHECK-NEXT:    store i8 0, i8* [[POSTADR]], align 1
75; CHECK-NEXT:    [[POSTADRNSW:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]]
76; CHECK-NEXT:    store i8 0, i8* [[POSTADRNSW]], align 1
77; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
78; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
79; CHECK:       exit:
80; CHECK-NEXT:    br label [[RETURN]]
81; CHECK:       return:
82; CHECK-NEXT:    ret void
83;
84entry:
85  %precond = icmp sgt i32 %limit, %init
86  br i1 %precond, label %loop, label %return
87loop:
88  %iv = phi i32 [ %postiv, %loop ], [ %init, %entry ]
89  %ivnsw = phi i32 [ %postivnsw, %loop ], [ %init, %entry ]
90  %preofs = sext i32 %iv to i64
91  %preadr = getelementptr i8, i8* %base, i64 %preofs
92  store i8 0, i8* %preadr
93  %postiv = add i32 %iv, 1
94  %postofs = sext i32 %postiv to i64
95  %postadr = getelementptr i8, i8* %base, i64 %postofs
96  store i8 0, i8* %postadr
97  %postivnsw = add nsw i32 %ivnsw, 1
98  %postofsnsw = sext i32 %postivnsw to i64
99  %postadrnsw = getelementptr i8, i8* %base, i64 %postofsnsw
100  store i8 0, i8* %postadrnsw
101  %cond = icmp sgt i32 %limit, %postiv
102  br i1 %cond, label %loop, label %exit
103exit:
104  br label %return
105return:
106  ret void
107}
108
109; Test sign extend elimination in the inner and outer loop.
110; %outercount is straightforward to widen, besides being in an outer loop.
111; %innercount is currently blocked by lcssa, so is not widened.
112; %inneriv can be widened only after proving it has no signed-overflow
113;   based on the loop test.
114define void @nestedIV(i8* %address, i32 %limit) nounwind {
115; CHECK-LABEL: @nestedIV(
116; CHECK-NEXT:  entry:
117; CHECK-NEXT:    [[LIMITDEC:%.*]] = add i32 [[LIMIT:%.*]], -1
118; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[LIMITDEC]] to i64
119; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[LIMIT]], 1
120; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[LIMIT]], i32 1
121; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64
122; CHECK-NEXT:    br label [[OUTERLOOP:%.*]]
123; CHECK:       outerloop:
124; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT2:%.*]], [[OUTERMERGE:%.*]] ], [ 0, [[ENTRY:%.*]] ]
125; CHECK-NEXT:    [[INNERCOUNT:%.*]] = phi i32 [ [[INNERCOUNT_MERGE:%.*]], [[OUTERMERGE]] ], [ 0, [[ENTRY]] ]
126; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i64 [[INDVARS_IV1]], -1
127; CHECK-NEXT:    [[ADR1:%.*]] = getelementptr i8, i8* [[ADDRESS:%.*]], i64 [[TMP2]]
128; CHECK-NEXT:    store i8 0, i8* [[ADR1]], align 1
129; CHECK-NEXT:    br label [[INNERPREHEADER:%.*]]
130; CHECK:       innerpreheader:
131; CHECK-NEXT:    [[INNERPRECMP:%.*]] = icmp sgt i32 [[LIMITDEC]], [[INNERCOUNT]]
132; CHECK-NEXT:    br i1 [[INNERPRECMP]], label [[INNERLOOP_PREHEADER:%.*]], label [[OUTERMERGE]]
133; CHECK:       innerloop.preheader:
134; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[INNERCOUNT]] to i64
135; CHECK-NEXT:    br label [[INNERLOOP:%.*]]
136; CHECK:       innerloop:
137; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP3]], [[INNERLOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[INNERLOOP]] ]
138; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
139; CHECK-NEXT:    [[ADR2:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV]]
140; CHECK-NEXT:    store i8 0, i8* [[ADR2]], align 1
141; CHECK-NEXT:    [[ADR3:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV_NEXT]]
142; CHECK-NEXT:    store i8 0, i8* [[ADR3]], align 1
143; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
144; CHECK-NEXT:    br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
145; CHECK:       innerexit:
146; CHECK-NEXT:    [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
147; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32
148; CHECK-NEXT:    br label [[OUTERMERGE]]
149; CHECK:       outermerge:
150; CHECK-NEXT:    [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP4]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ]
151; CHECK-NEXT:    [[ADR4:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV1]]
152; CHECK-NEXT:    store i8 0, i8* [[ADR4]], align 1
153; CHECK-NEXT:    [[OFS5:%.*]] = sext i32 [[INNERCOUNT_MERGE]] to i64
154; CHECK-NEXT:    [[ADR5:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[OFS5]]
155; CHECK-NEXT:    store i8 0, i8* [[ADR5]], align 1
156; CHECK-NEXT:    [[INDVARS_IV_NEXT2]] = add nuw nsw i64 [[INDVARS_IV1]], 1
157; CHECK-NEXT:    [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT2]], [[WIDE_TRIP_COUNT]]
158; CHECK-NEXT:    br i1 [[EXITCOND4]], label [[OUTERLOOP]], label [[RETURN:%.*]]
159; CHECK:       return:
160; CHECK-NEXT:    ret void
161;
162entry:
163  %limitdec = add i32 %limit, -1
164  br label %outerloop
165
166; Eliminate %ofs1 after widening outercount.
167; IV rewriting hoists a gep into this block. We don't like that.
168outerloop:
169  %outercount   = phi i32 [ %outerpostcount, %outermerge ], [ 0, %entry ]
170  %innercount = phi i32 [ %innercount.merge, %outermerge ], [ 0, %entry ]
171
172  %outercountdec = add i32 %outercount, -1
173  %ofs1 = sext i32 %outercountdec to i64
174  %adr1 = getelementptr i8, i8* %address, i64 %ofs1
175  store i8 0, i8* %adr1
176
177  br label %innerpreheader
178
179innerpreheader:
180  %innerprecmp = icmp sgt i32 %limitdec, %innercount
181  br i1 %innerprecmp, label %innerloop, label %outermerge
182
183; Eliminate %ofs2 after widening inneriv.
184; Eliminate %ofs3 after normalizing sext(innerpostiv)
185; FIXME: We should check that indvars does not increase the number of
186; IVs in this loop. sext elimination plus LFTR currently results in 2 final
187; IVs. Waiting to remove LFTR.
188innerloop:
189  %inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ]
190  %innerpostiv = add i32 %inneriv, 1
191
192  %ofs2 = sext i32 %inneriv to i64
193  %adr2 = getelementptr i8, i8* %address, i64 %ofs2
194  store i8 0, i8* %adr2
195
196  %ofs3 = sext i32 %innerpostiv to i64
197  %adr3 = getelementptr i8, i8* %address, i64 %ofs3
198  store i8 0, i8* %adr3
199
200  %innercmp = icmp sgt i32 %limitdec, %innerpostiv
201  br i1 %innercmp, label %innerloop, label %innerexit
202
203innerexit:
204  %innercount.lcssa = phi i32 [ %innerpostiv, %innerloop ]
205  br label %outermerge
206
207; Eliminate %ofs4 after widening outercount
208; TODO: Eliminate %ofs5 after removing lcssa
209outermerge:
210  %innercount.merge = phi i32 [ %innercount.lcssa, %innerexit ], [ %innercount, %innerpreheader ]
211
212  %ofs4 = sext i32 %outercount to i64
213  %adr4 = getelementptr i8, i8* %address, i64 %ofs4
214  store i8 0, i8* %adr4
215
216  %ofs5 = sext i32 %innercount.merge to i64
217  %adr5 = getelementptr i8, i8* %address, i64 %ofs5
218  store i8 0, i8* %adr5
219
220  %outerpostcount = add i32 %outercount, 1
221  %tmp47 = icmp slt i32 %outerpostcount, %limit
222  br i1 %tmp47, label %outerloop, label %return
223
224return:
225  ret void
226}
227