1; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
2; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5
6; Given a loop with an induction variable which is being
7; truncated/extended using casts that had been proven to
8; be redundant under a runtime test, we want to make sure
9; that these casts, do not get vectorized/scalarized/widened.
10; This is the case for inductions whose SCEV expression is
11; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
12; can be a result of the IR sequences we check below.
13;
14; See also pr30654.
15;
16
17; Case1: Check the following induction pattern:
18;
19;  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
20;  %sext = shl i32 %p.09, 24
21;  %conv = ashr exact i32 %sext, 24
22;  %add = add nsw i32 %conv, %step
23;
24; This is the case in the following code:
25;
26; void doit1(int n, int step) {
27;   int i;
28;   char p = 0;
29;   for (i = 0; i < n; i++) {
30;      a[i] = p;
31;      p = p + step;
32;   }
33; }
34;
35; The "ExtTrunc" IR sequence here is:
36;  "%sext = shl i32 %p.09, 24"
37;  "%conv = ashr exact i32 %sext, 24"
38; We check that it does not appear in the vector loop body, whether
39; we vectorize or scalarize the induction.
40; In the case of widened induction, this means that the induction phi
41; is directly used, without shl/ashr on the way.
42
43; VF8-LABEL: @doit1
44; VF8: vector.body:
45; VF8: %vec.ind = phi <8 x i32>
46; VF8: store <8 x i32> %vec.ind
47; VF8: middle.block:
48
49; VF1-LABEL: @doit1
50; VF1: vector.body:
51; VF1-NOT: %{{.*}} = shl i32
52; VF1: middle.block:
53
54@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
55
56define void @doit1(i32 %n, i32 %step) {
57entry:
58  %cmp7 = icmp sgt i32 %n, 0
59  br i1 %cmp7, label %for.body.lr.ph, label %for.end
60
61for.body.lr.ph:
62  %wide.trip.count = zext i32 %n to i64
63  br label %for.body
64
65for.body:
66  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
67  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
68  %sext = shl i32 %p.09, 24
69  %conv = ashr exact i32 %sext, 24
70  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
71  store i32 %conv, i32* %arrayidx, align 4
72  %add = add nsw i32 %conv, %step
73  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
74  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
75  br i1 %exitcond, label %for.end.loopexit, label %for.body
76
77for.end.loopexit:
78  br label %for.end
79
80for.end:
81  ret void
82}
83
84
85; Case2: Another variant of the above pattern is where the induction variable
86; is used only for address compuation (i.e. it is a GEP index) and therefore
87; the induction is not vectorized but rather only the step is widened.
88;
89; This is the case in the following code, where the induction variable 'w_ix'
90; is only used to access the array 'in':
91;
92; void doit2(int *in, int *out, size_t size, size_t step)
93; {
94;    int w_ix = 0;
95;    for (size_t offset = 0; offset < size; ++offset)
96;     {
97;        int w = in[w_ix];
98;        out[offset] = w;
99;        w_ix += step;
100;     }
101; }
102;
103; The "ExtTrunc" IR sequence here is similar to the previous case:
104;  "%sext = shl i64 %w_ix.012, 32
105;  %idxprom = ashr exact i64 %sext, 32"
106; We check that it does not appear in the vector loop body, whether
107; we widen or scalarize the induction.
108; In the case of widened induction, this means that the induction phi
109; is directly used, without shl/ashr on the way.
110
111; VF8-LABEL: @doit2
112; VF8: vector.body:
113; VF8: %vec.ind = phi <8 x i64>
114; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind
115; VF8: middle.block:
116
117; VF1-LABEL: @doit2
118; VF1: vector.body:
119; VF1-NOT: %{{.*}} = shl i64
120; VF1: middle.block:
121;
122
123define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step)  {
124entry:
125  %cmp9 = icmp eq i64 %size, 0
126  br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
127
128for.body.lr.ph:
129  br label %for.body
130
131for.cond.cleanup.loopexit:
132  br label %for.cond.cleanup
133
134for.cond.cleanup:
135  ret void
136
137for.body:
138  %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
139  %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
140  %sext = shl i64 %w_ix.011, 32
141  %idxprom = ashr exact i64 %sext, 32
142  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
143  %0 = load i32, i32* %arrayidx, align 4
144  %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
145  store i32 %0, i32* %arrayidx1, align 4
146  %add = add i64 %idxprom, %step
147  %inc = add nuw i64 %offset.010, 1
148  %exitcond = icmp eq i64 %inc, %size
149  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
150}
151
152; Case3: Lastly, check also the following induction pattern:
153;
154;  %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
155;  %conv = and i32 %p.09, 255
156;  %add = add nsw i32 %conv, %step
157;
158; This is the case in the following code:
159;
160; int a[N];
161; void doit3(int n, int step) {
162;   int i;
163;   unsigned char p = 0;
164;   for (i = 0; i < n; i++) {
165;      a[i] = p;
166;      p = p + step;
167;   }
168; }
169;
170; The "ExtTrunc" IR sequence here is:
171;  "%conv = and i32 %p.09, 255".
172; We check that it does not appear in the vector loop body, whether
173; we vectorize or scalarize the induction.
174
175; VF8-LABEL: @doit3
176; VF8: vector.body:
177; VF8: %vec.ind = phi <8 x i32>
178; VF8: store <8 x i32> %vec.ind
179; VF8: middle.block:
180
181; VF1-LABEL: @doit3
182; VF1: vector.body:
183; VF1-NOT: %{{.*}} = and i32
184; VF1: middle.block:
185
186define void @doit3(i32 %n, i32 %step) {
187entry:
188  %cmp7 = icmp sgt i32 %n, 0
189  br i1 %cmp7, label %for.body.lr.ph, label %for.end
190
191for.body.lr.ph:
192  %wide.trip.count = zext i32 %n to i64
193  br label %for.body
194
195for.body:
196  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
197  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
198  %conv = and i32 %p.09, 255
199  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
200  store i32 %conv, i32* %arrayidx, align 4
201  %add = add nsw i32 %conv, %step
202  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
203  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
204  br i1 %exitcond, label %for.end.loopexit, label %for.body
205
206for.end.loopexit:
207  br label %for.end
208
209for.end:
210  ret void
211}
212