1; RUN: opt < %s -loop-reroll -S | FileCheck %s
2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
3target triple = "x86_64-unknown-linux-gnu"
4
5; int foo(int a);
6; void bar(int *x) {
7;   for (int i = 0; i < 500; i += 3) {
8;     foo(i);
9;     foo(i+1);
10;     foo(i+2);
11;   }
12; }
13
14; Function Attrs: nounwind uwtable
15define void @bar(i32* nocapture readnone %x) #0 {
16entry:
17  br label %for.body
18
19for.body:                                         ; preds = %for.body, %entry
20  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
21  %call = tail call i32 @foo(i32 %i.08) #1
22  %add = add nsw i32 %i.08, 1
23  %call1 = tail call i32 @foo(i32 %add) #1
24  %add2 = add nsw i32 %i.08, 2
25  %call3 = tail call i32 @foo(i32 %add2) #1
26  %add3 = add nsw i32 %i.08, 3
27  %exitcond = icmp sge i32 %add3, 500
28  br i1 %exitcond, label %for.end, label %for.body
29
30; CHECK-LABEL: @bar
31
32; CHECK: for.body:
33; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
34; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
35; CHECK: %indvar.next = add i32 %indvar, 1
36; CHECK: %exitcond1 = icmp eq i32 %indvar, 500
37; CHECK: br i1 %exitcond1, label %for.end, label %for.body
38
39; CHECK: ret
40
41for.end:                                          ; preds = %for.body
42  ret void
43}
44
45declare i32 @foo(i32)
46
47; void hi1(int *x) {
48;   for (int i = 0; i < 1500; i += 3) {
49;     x[i] = foo(0);
50;     x[i+1] = foo(0);
51;     x[i+2] = foo(0);
52;   }
53; }
54
55; Function Attrs: nounwind uwtable
56define void @hi1(i32* nocapture %x) #0 {
57entry:
58  br label %for.body
59
60for.body:                                         ; preds = %entry, %for.body
61  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
62  %call = tail call i32 @foo(i32 0) #1
63  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
64  store i32 %call, i32* %arrayidx, align 4
65  %call1 = tail call i32 @foo(i32 0) #1
66  %0 = add nsw i64 %indvars.iv, 1
67  %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %0
68  store i32 %call1, i32* %arrayidx3, align 4
69  %call4 = tail call i32 @foo(i32 0) #1
70  %1 = add nsw i64 %indvars.iv, 2
71  %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %1
72  store i32 %call4, i32* %arrayidx7, align 4
73  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
74  %2 = trunc i64 %indvars.iv.next to i32
75  %cmp = icmp slt i32 %2, 1500
76  br i1 %cmp, label %for.body, label %for.end
77
78; CHECK-LABEL: @hi1
79
80; CHECK: for.body:
81; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
82; CHECK: %0 = trunc i64 %indvar to i32
83; CHECK: %call = tail call i32 @foo(i32 0) #1
84; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvar
85; CHECK: store i32 %call, i32* %arrayidx, align 4
86; CHECK: %indvar.next = add i64 %indvar, 1
87; CHECK: %exitcond = icmp eq i32 %0, 1499
88; CHECK: br i1 %exitcond, label %for.end, label %for.body
89
90; CHECK: ret
91
92for.end:                                          ; preds = %for.body
93  ret void
94}
95
96; void hi2(int *x) {
97;   for (int i = 0; i < 500; ++i) {
98;     x[3*i] = foo(0);
99;     x[3*i+1] = foo(0);
100;     x[3*i+2] = foo(0);
101;   }
102; }
103
104; Function Attrs: nounwind uwtable
105define void @hi2(i32* nocapture %x) #0 {
106entry:
107  br label %for.body
108
109for.body:                                         ; preds = %for.body, %entry
110  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
111  %call = tail call i32 @foo(i32 0) #1
112  %0 = mul nsw i64 %indvars.iv, 3
113  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
114  store i32 %call, i32* %arrayidx, align 4
115  %call1 = tail call i32 @foo(i32 0) #1
116  %1 = add nsw i64 %0, 1
117  %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
118  store i32 %call1, i32* %arrayidx4, align 4
119  %call5 = tail call i32 @foo(i32 0) #1
120  %2 = add nsw i64 %0, 2
121  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
122  store i32 %call5, i32* %arrayidx9, align 4
123  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
124  %exitcond = icmp eq i64 %indvars.iv.next, 500
125  br i1 %exitcond, label %for.end, label %for.body
126
127; CHECK-LABEL: @hi2
128
129; CHECK: for.body:
130; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
131; CHECK: %call = tail call i32 @foo(i32 0) #1
132; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
133; CHECK: store i32 %call, i32* %arrayidx, align 4
134; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
135; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
136; CHECK: br i1 %exitcond1, label %for.end, label %for.body
137
138; CHECK: ret
139
140for.end:                                          ; preds = %for.body
141  ret void
142}
143
144; void goo(float alpha, float *a, float *b) {
145;   for (int i = 0; i < 3200; i += 5) {
146;     a[i] += alpha * b[i];
147;     a[i + 1] += alpha * b[i + 1];
148;     a[i + 2] += alpha * b[i + 2];
149;     a[i + 3] += alpha * b[i + 3];
150;     a[i + 4] += alpha * b[i + 4];
151;   }
152; }
153
154; Function Attrs: nounwind uwtable
155define void @goo(float %alpha, float* nocapture %a, float* nocapture readonly %b) #0 {
156entry:
157  br label %for.body
158
159for.body:                                         ; preds = %entry, %for.body
160  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
161  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
162  %0 = load float, float* %arrayidx, align 4
163  %mul = fmul float %0, %alpha
164  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
165  %1 = load float, float* %arrayidx2, align 4
166  %add = fadd float %1, %mul
167  store float %add, float* %arrayidx2, align 4
168  %2 = add nsw i64 %indvars.iv, 1
169  %arrayidx5 = getelementptr inbounds float, float* %b, i64 %2
170  %3 = load float, float* %arrayidx5, align 4
171  %mul6 = fmul float %3, %alpha
172  %arrayidx9 = getelementptr inbounds float, float* %a, i64 %2
173  %4 = load float, float* %arrayidx9, align 4
174  %add10 = fadd float %4, %mul6
175  store float %add10, float* %arrayidx9, align 4
176  %5 = add nsw i64 %indvars.iv, 2
177  %arrayidx13 = getelementptr inbounds float, float* %b, i64 %5
178  %6 = load float, float* %arrayidx13, align 4
179  %mul14 = fmul float %6, %alpha
180  %arrayidx17 = getelementptr inbounds float, float* %a, i64 %5
181  %7 = load float, float* %arrayidx17, align 4
182  %add18 = fadd float %7, %mul14
183  store float %add18, float* %arrayidx17, align 4
184  %8 = add nsw i64 %indvars.iv, 3
185  %arrayidx21 = getelementptr inbounds float, float* %b, i64 %8
186  %9 = load float, float* %arrayidx21, align 4
187  %mul22 = fmul float %9, %alpha
188  %arrayidx25 = getelementptr inbounds float, float* %a, i64 %8
189  %10 = load float, float* %arrayidx25, align 4
190  %add26 = fadd float %10, %mul22
191  store float %add26, float* %arrayidx25, align 4
192  %11 = add nsw i64 %indvars.iv, 4
193  %arrayidx29 = getelementptr inbounds float, float* %b, i64 %11
194  %12 = load float, float* %arrayidx29, align 4
195  %mul30 = fmul float %12, %alpha
196  %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
197  %13 = load float, float* %arrayidx33, align 4
198  %add34 = fadd float %13, %mul30
199  store float %add34, float* %arrayidx33, align 4
200  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
201  %14 = trunc i64 %indvars.iv.next to i32
202  %cmp = icmp slt i32 %14, 3200
203  br i1 %cmp, label %for.body, label %for.end
204
205; CHECK-LABEL: @goo
206
207; CHECK: for.body:
208; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
209; CHECK: %0 = trunc i64 %indvar to i32
210; CHECK: %arrayidx = getelementptr inbounds float, float* %b, i64 %indvar
211; CHECK: %1 = load float, float* %arrayidx, align 4
212; CHECK: %mul = fmul float %1, %alpha
213; CHECK: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvar
214; CHECK: %2 = load float, float* %arrayidx2, align 4
215; CHECK: %add = fadd float %2, %mul
216; CHECK: store float %add, float* %arrayidx2, align 4
217; CHECK: %indvar.next = add i64 %indvar, 1
218; CHECK: %exitcond = icmp eq i32 %0, 3199
219; CHECK: br i1 %exitcond, label %for.end, label %for.body
220
221; CHECK: ret
222
223for.end:                                          ; preds = %for.body
224  ret void
225}
226
227; void hoo(float alpha, float *a, float *b, int *ip) {
228;   for (int i = 0; i < 3200; i += 5) {
229;     a[i] += alpha * b[ip[i]];
230;     a[i + 1] += alpha * b[ip[i + 1]];
231;     a[i + 2] += alpha * b[ip[i + 2]];
232;     a[i + 3] += alpha * b[ip[i + 3]];
233;     a[i + 4] += alpha * b[ip[i + 4]];
234;   }
235; }
236
237; Function Attrs: nounwind uwtable
238define void @hoo(float %alpha, float* nocapture %a, float* nocapture readonly %b, i32* nocapture readonly %ip) #0 {
239entry:
240  br label %for.body
241
242for.body:                                         ; preds = %entry, %for.body
243  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
244  %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvars.iv
245  %0 = load i32, i32* %arrayidx, align 4
246  %idxprom1 = sext i32 %0 to i64
247  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1
248  %1 = load float, float* %arrayidx2, align 4
249  %mul = fmul float %1, %alpha
250  %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
251  %2 = load float, float* %arrayidx4, align 4
252  %add = fadd float %2, %mul
253  store float %add, float* %arrayidx4, align 4
254  %3 = add nsw i64 %indvars.iv, 1
255  %arrayidx7 = getelementptr inbounds i32, i32* %ip, i64 %3
256  %4 = load i32, i32* %arrayidx7, align 4
257  %idxprom8 = sext i32 %4 to i64
258  %arrayidx9 = getelementptr inbounds float, float* %b, i64 %idxprom8
259  %5 = load float, float* %arrayidx9, align 4
260  %mul10 = fmul float %5, %alpha
261  %arrayidx13 = getelementptr inbounds float, float* %a, i64 %3
262  %6 = load float, float* %arrayidx13, align 4
263  %add14 = fadd float %6, %mul10
264  store float %add14, float* %arrayidx13, align 4
265  %7 = add nsw i64 %indvars.iv, 2
266  %arrayidx17 = getelementptr inbounds i32, i32* %ip, i64 %7
267  %8 = load i32, i32* %arrayidx17, align 4
268  %idxprom18 = sext i32 %8 to i64
269  %arrayidx19 = getelementptr inbounds float, float* %b, i64 %idxprom18
270  %9 = load float, float* %arrayidx19, align 4
271  %mul20 = fmul float %9, %alpha
272  %arrayidx23 = getelementptr inbounds float, float* %a, i64 %7
273  %10 = load float, float* %arrayidx23, align 4
274  %add24 = fadd float %10, %mul20
275  store float %add24, float* %arrayidx23, align 4
276  %11 = add nsw i64 %indvars.iv, 3
277  %arrayidx27 = getelementptr inbounds i32, i32* %ip, i64 %11
278  %12 = load i32, i32* %arrayidx27, align 4
279  %idxprom28 = sext i32 %12 to i64
280  %arrayidx29 = getelementptr inbounds float, float* %b, i64 %idxprom28
281  %13 = load float, float* %arrayidx29, align 4
282  %mul30 = fmul float %13, %alpha
283  %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
284  %14 = load float, float* %arrayidx33, align 4
285  %add34 = fadd float %14, %mul30
286  store float %add34, float* %arrayidx33, align 4
287  %15 = add nsw i64 %indvars.iv, 4
288  %arrayidx37 = getelementptr inbounds i32, i32* %ip, i64 %15
289  %16 = load i32, i32* %arrayidx37, align 4
290  %idxprom38 = sext i32 %16 to i64
291  %arrayidx39 = getelementptr inbounds float, float* %b, i64 %idxprom38
292  %17 = load float, float* %arrayidx39, align 4
293  %mul40 = fmul float %17, %alpha
294  %arrayidx43 = getelementptr inbounds float, float* %a, i64 %15
295  %18 = load float, float* %arrayidx43, align 4
296  %add44 = fadd float %18, %mul40
297  store float %add44, float* %arrayidx43, align 4
298  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
299  %19 = trunc i64 %indvars.iv.next to i32
300  %cmp = icmp slt i32 %19, 3200
301  br i1 %cmp, label %for.body, label %for.end
302
303; CHECK-LABEL: @hoo
304
305; CHECK: for.body:
306; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
307; CHECK: %0 = trunc i64 %indvar to i32
308; CHECK: %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvar
309; CHECK: %1 = load i32, i32* %arrayidx, align 4
310; CHECK: %idxprom1 = sext i32 %1 to i64
311; CHECK: %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1
312; CHECK: %2 = load float, float* %arrayidx2, align 4
313; CHECK: %mul = fmul float %2, %alpha
314; CHECK: %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvar
315; CHECK: %3 = load float, float* %arrayidx4, align 4
316; CHECK: %add = fadd float %3, %mul
317; CHECK: store float %add, float* %arrayidx4, align 4
318; CHECK: %indvar.next = add i64 %indvar, 1
319; CHECK: %exitcond = icmp eq i32 %0, 3199
320; CHECK: br i1 %exitcond, label %for.end, label %for.body
321
322; CHECK: ret
323
324for.end:                                          ; preds = %for.body
325  ret void
326}
327
328; void multi1(int *x) {
329;   y = foo(0)
330;   for (int i = 0; i < 500; ++i) {
331;     x[3*i] = y;
332;     x[3*i+1] = y;
333;     x[3*i+2] = y;
334;     x[3*i+6] = y;
335;     x[3*i+7] = y;
336;     x[3*i+8] = y;
337;   }
338; }
339
340; Function Attrs: nounwind uwtable
341define void @multi1(i32* nocapture %x) #0 {
342entry:
343  %call = tail call i32 @foo(i32 0) #1
344  br label %for.body
345
346for.body:                                         ; preds = %for.body, %entry
347  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
348  %0 = mul nsw i64 %indvars.iv, 3
349  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
350  store i32 %call, i32* %arrayidx, align 4
351  %1 = add nsw i64 %0, 1
352  %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
353  store i32 %call, i32* %arrayidx4, align 4
354  %2 = add nsw i64 %0, 2
355  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
356  store i32 %call, i32* %arrayidx9, align 4
357  %3 = add nsw i64 %0, 6
358  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %3
359  store i32 %call, i32* %arrayidx6, align 4
360  %4 = add nsw i64 %0, 7
361  %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %4
362  store i32 %call, i32* %arrayidx7, align 4
363  %5 = add nsw i64 %0, 8
364  %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %5
365  store i32 %call, i32* %arrayidx8, align 4
366  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
367  %exitcond = icmp eq i64 %indvars.iv.next, 500
368  br i1 %exitcond, label %for.end, label %for.body
369
370; CHECK-LABEL: @multi1
371
372; CHECK:for.body:
373; CHECK:  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
374; CHECK:  %0 = add i64 %indvars.iv, 6
375; CHECK:  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
376; CHECK:  store i32 %call, i32* %arrayidx, align 4
377; CHECK:  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0
378; CHECK:  store i32 %call, i32* %arrayidx6, align 4
379; CHECK:  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
380; CHECK:  %exitcond1 = icmp eq i64 %indvars.iv, 1499
381; CHECK:  br i1 %exitcond1, label %for.end, label %for.body
382
383for.end:                                          ; preds = %for.body
384  ret void
385}
386
387; void multi2(int *x) {
388;   y = foo(0)
389;   for (int i = 0; i < 500; ++i) {
390;     x[3*i] = y;
391;     x[3*i+1] = y;
392;     x[3*i+2] = y;
393;     x[3*(i+1)] = y;
394;     x[3*(i+1)+1] = y;
395;     x[3*(i+1)+2] = y;
396;   }
397; }
398
399; Function Attrs: nounwind uwtable
400define void @multi2(i32* nocapture %x) #0 {
401entry:
402  %call = tail call i32 @foo(i32 0) #1
403  br label %for.body
404
405for.body:                                         ; preds = %for.body, %entry
406  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
407  %0 = mul nsw i64 %indvars.iv, 3
408  %add = add nsw i64 %indvars.iv, 1
409  %newmul = mul nsw i64 %add, 3
410  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
411  store i32 %call, i32* %arrayidx, align 4
412  %1 = add nsw i64 %0, 1
413  %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
414  store i32 %call, i32* %arrayidx4, align 4
415  %2 = add nsw i64 %0, 2
416  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
417  store i32 %call, i32* %arrayidx9, align 4
418  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %newmul
419  store i32 %call, i32* %arrayidx6, align 4
420  %3 = add nsw i64 %newmul, 1
421  %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %3
422  store i32 %call, i32* %arrayidx7, align 4
423  %4 = add nsw i64 %newmul, 2
424  %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %4
425  store i32 %call, i32* %arrayidx8, align 4
426  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
427  %exitcond = icmp eq i64 %indvars.iv.next, 500
428  br i1 %exitcond, label %for.end, label %for.body
429
430; CHECK-LABEL: @multi2
431
432; CHECK:for.body:
433; CHECK:  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
434; CHECK:  %0 = add i64 %indvars.iv, 3
435; CHECK:  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
436; CHECK:  store i32 %call, i32* %arrayidx, align 4
437; CHECK:  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0
438; CHECK:  store i32 %call, i32* %arrayidx6, align 4
439; CHECK:  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
440; CHECK:  %exitcond1 = icmp eq i64 %indvars.iv, 1499
441; CHECK:  br i1 %exitcond1, label %for.end, label %for.body
442
443for.end:                                          ; preds = %for.body
444  ret void
445}
446
447; void multi3(int *x) {
448;   y = foo(0)
449;   for (int i = 0; i < 500; ++i) {
450;     // Note: No zero index
451;     x[3*i+3] = y;
452;     x[3*i+4] = y;
453;     x[3*i+5] = y;
454;   }
455; }
456
457; Function Attrs: nounwind uwtable
458define void @multi3(i32* nocapture %x) #0 {
459entry:
460  %call = tail call i32 @foo(i32 0) #1
461  br label %for.body
462
463for.body:                                         ; preds = %for.body, %entry
464  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
465  %0 = mul nsw i64 %indvars.iv, 3
466  %x0 = add nsw i64 %0, 3
467  %add = add nsw i64 %indvars.iv, 1
468  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %x0
469  store i32 %call, i32* %arrayidx, align 4
470  %1 = add nsw i64 %0, 4
471  %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
472  store i32 %call, i32* %arrayidx4, align 4
473  %2 = add nsw i64 %0, 5
474  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
475  store i32 %call, i32* %arrayidx9, align 4
476  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
477  %exitcond = icmp eq i64 %indvars.iv.next, 500
478  br i1 %exitcond, label %for.end, label %for.body
479
480; CHECK-LABEL: @multi3
481; CHECK: for.body:
482; CHECK:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
483; CHECK:   %0 = add i64 %indvars.iv, 3
484; CHECK:   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
485; CHECK:   store i32 %call, i32* %arrayidx, align 4
486; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
487; CHECK:   %exitcond1 = icmp eq i64 %indvars.iv, 1499
488; CHECK:   br i1 %exitcond1, label %for.end, label %for.body
489
490for.end:                                          ; preds = %for.body
491  ret void
492}
493
494; int foo(int a);
495; void bar2(int *x, int y, int z) {
496;   for (int i = 0; i < 500; i += 3) {
497;     foo(i+y+i*z); // Slightly reordered instruction order
498;     foo(i+1+y+(i+1)*z);
499;     foo(i+2+y+(i+2)*z);
500;   }
501; }
502
503; Function Attrs: nounwind uwtable
504define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 {
505entry:
506  br label %for.body
507
508for.body:                                         ; preds = %for.body, %entry
509  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
510
511  %tmp1 = add i32 %i.08, %y
512  %tmp2 = mul i32 %i.08, %z
513  %tmp3 = add i32 %tmp2, %tmp1
514  %call = tail call i32 @foo(i32 %tmp3) #1
515
516  %add = add nsw i32 %i.08, 1
517  %tmp2a = mul i32 %add, %z
518  %tmp1a = add i32 %add, %y
519  %tmp3a = add i32 %tmp2a, %tmp1a
520  %calla = tail call i32 @foo(i32 %tmp3a) #1
521
522  %add2 = add nsw i32 %i.08, 2
523  %tmp2b = mul i32 %add2, %z
524  %tmp1b = add i32 %add2, %y
525  %tmp3b = add i32 %tmp2b, %tmp1b
526  %callb = tail call i32 @foo(i32 %tmp3b) #1
527
528  %add3 = add nsw i32 %i.08, 3
529
530  %exitcond = icmp sge i32 %add3, 500
531  br i1 %exitcond, label %for.end, label %for.body
532
533; CHECK-LABEL: @bar2
534
535; CHECK: for.body:
536; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
537; CHECK: %tmp1 = add i32 %indvar, %y
538; CHECK: %tmp2 = mul i32 %indvar, %z
539; CHECK: %tmp3 = add i32 %tmp2, %tmp1
540; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
541; CHECK: %indvar.next = add i32 %indvar, 1
542; CHECK: %exitcond1 = icmp eq i32 %indvar, 500
543; CHECK: br i1 %exitcond1, label %for.end, label %for.body
544
545; CHECK: ret
546
547for.end:                                          ; preds = %for.body
548  ret void
549}
550
551%struct.s = type { i32, i32 }
552
553; Function Attrs: nounwind uwtable
554define void @gep1(%struct.s* nocapture %x) #0 {
555entry:
556  %call = tail call i32 @foo(i32 0) #1
557  br label %for.body
558
559for.body:                                         ; preds = %for.body, %entry
560  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
561  %0 = mul nsw i64 %indvars.iv, 3
562  %arrayidx = getelementptr inbounds %struct.s, %struct.s* %x, i64 %0, i32 0
563  store i32 %call, i32* %arrayidx, align 4
564  %1 = add nsw i64 %0, 1
565  %arrayidx4 = getelementptr inbounds %struct.s, %struct.s* %x, i64 %1, i32 0
566  store i32 %call, i32* %arrayidx4, align 4
567  %2 = add nsw i64 %0, 2
568  %arrayidx9 = getelementptr inbounds %struct.s, %struct.s* %x, i64 %2, i32 0
569  store i32 %call, i32* %arrayidx9, align 4
570  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
571  %exitcond = icmp eq i64 %indvars.iv.next, 500
572  br i1 %exitcond, label %for.end, label %for.body
573
574; CHECK-LABEL: @gep1
575; This test is a crash test only.
576; CHECK: ret
577for.end:                                          ; preds = %for.body
578  ret void
579}
580
581define void @gep-indexing(i32* nocapture %x) {
582entry:
583  %call = tail call i32 @foo(i32 0) #1
584  br label %for.body
585
586for.body:                                         ; preds = %for.body, %entry
587  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
588  %0 = mul nsw i64 %indvars.iv, 3
589  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
590  store i32 %call, i32* %arrayidx, align 4
591  %arrayidx4 = getelementptr inbounds i32, i32* %arrayidx, i64 1
592  store i32 %call, i32* %arrayidx4, align 4
593  %arrayidx9 = getelementptr inbounds i32, i32* %arrayidx, i64 2
594  store i32 %call, i32* %arrayidx9, align 4
595  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
596  %exitcond = icmp eq i64 %indvars.iv.next, 500
597  br i1 %exitcond, label %for.end, label %for.body
598
599; CHECK-LABEL: @gep-indexing
600; CHECK:      for.body:
601; CHECK-NEXT:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
602; CHECK-NEXT:   %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv
603; CHECK-NEXT:   store i32 %call, i32* %scevgep, align 4
604; CHECK-NEXT:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
605; CHECK-NEXT:   %exitcond1 = icmp eq i64 %indvars.iv, 1499
606; CHECK-NEXT:   br i1 %exitcond1, label %for.end, label %for.body
607
608for.end:                                          ; preds = %for.body
609  ret void
610}
611
612
613define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
614; CHECK-LABEL: @unordered_atomic_ops(
615
616; CHECK: for.body:
617; CHECK-NEXT:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
618; CHECK-NEXT:   %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvar
619; CHECK-NEXT:   %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvar
620; CHECK-NEXT:   %va = load atomic i32, i32* %buf0_a unordered, align 4
621; CHECK-NEXT:   store atomic i32 %va, i32* %buf1_a unordered, align 4
622; CHECK-NEXT:   %indvar.next = add i32 %indvar, 1
623; CHECK-NEXT:   %exitcond = icmp eq i32 %indvar, 3199
624; CHECK-NEXT:   br i1 %exitcond, label %for.end, label %for.body
625
626entry:
627  br label %for.body
628
629for.body:
630  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
631  %indvars.iv.next = add i32 %indvars.iv, 2
632  %indvars.mid = add i32 %indvars.iv, 1
633  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
634  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
635  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
636  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
637  %va = load atomic i32, i32* %buf0_a unordered, align 4
638  %vb = load atomic i32, i32* %buf0_b unordered, align 4
639  store atomic i32 %va, i32* %buf1_a unordered, align 4
640  store atomic i32 %vb, i32* %buf1_b unordered, align 4
641  %cmp = icmp slt i32 %indvars.iv.next, 3200
642  br i1 %cmp, label %for.body, label %for.end
643
644for.end:
645  ret void
646}
647
648define void @unordered_atomic_ops_nomatch(i32* noalias %buf_0, i32* noalias %buf_1) {
649; Negative test
650
651; CHECK-LABEL: @unordered_atomic_ops_nomatch(
652entry:
653  br label %for.body
654
655for.body:
656; CHECK: for.body:
657; CHECK:   %indvars.iv.next = add i32 %indvars.iv, 2
658; CHECK:   %indvars.mid = add i32 %indvars.iv, 1
659; CHECK:   %cmp = icmp slt i32 %indvars.iv.next, 3200
660; CHECK:   br i1 %cmp, label %for.body, label %for.end
661
662  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
663  %indvars.iv.next = add i32 %indvars.iv, 2
664  %indvars.mid = add i32 %indvars.iv, 1
665  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
666  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
667  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
668  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
669  %va = load atomic i32, i32* %buf0_a unordered, align 4
670  %vb = load atomic i32, i32* %buf0_b unordered, align 4
671  store i32 %va, i32* %buf1_a, align 4  ;; Not atomic
672  store atomic i32 %vb, i32* %buf1_b unordered, align 4
673  %cmp = icmp slt i32 %indvars.iv.next, 3200
674  br i1 %cmp, label %for.body, label %for.end
675
676for.end:
677  ret void
678}
679
680define void @ordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
681; Negative test
682
683; CHECK-LABEL: @ordered_atomic_ops(
684entry:
685  br label %for.body
686
687for.body:
688; CHECK: for.body:
689; CHECK:   %indvars.iv.next = add i32 %indvars.iv, 2
690; CHECK:   %indvars.mid = add i32 %indvars.iv, 1
691; CHECK:   %cmp = icmp slt i32 %indvars.iv.next, 3200
692; CHECK:   br i1 %cmp, label %for.body, label %for.end
693
694  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
695  %indvars.iv.next = add i32 %indvars.iv, 2
696  %indvars.mid = add i32 %indvars.iv, 1
697  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
698  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
699  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
700  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
701  %va = load atomic i32, i32* %buf0_a acquire, align 4
702  %vb = load atomic i32, i32* %buf0_b acquire, align 4
703  store atomic i32 %va, i32* %buf1_a release, align 4
704  store atomic i32 %vb, i32* %buf1_b release, align 4
705  %cmp = icmp slt i32 %indvars.iv.next, 3200
706  br i1 %cmp, label %for.body, label %for.end
707
708for.end:
709  ret void
710}
711
712define void @unordered_atomic_ops_with_fence(i32* noalias %buf_0, i32* noalias %buf_1) {
713; CHECK-LABEL: @unordered_atomic_ops_with_fence(
714entry:
715  br label %for.body
716
717for.body:
718; CHECK: for.body:
719; CHECK:  %va = load atomic i32, i32* %buf0_a unordered, align 4
720; CHECK-NEXT:  %vb = load atomic i32, i32* %buf0_b unordered, align 4
721; CHECK-NEXT:  fence seq_cst
722; CHECK-NEXT:  store atomic i32 %va, i32* %buf1_a unordered, align 4
723; CHECK-NEXT:  store atomic i32 %vb, i32* %buf1_b unordered, align 4
724
725  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
726  %indvars.iv.next = add i32 %indvars.iv, 2
727  %indvars.mid = add i32 %indvars.iv, 1
728  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
729  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
730  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
731  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
732  %va = load atomic i32, i32* %buf0_a unordered, align 4
733  %vb = load atomic i32, i32* %buf0_b unordered, align 4
734  fence seq_cst
735  store atomic i32 %va, i32* %buf1_a unordered, align 4
736  store atomic i32 %vb, i32* %buf1_b unordered, align 4
737  %cmp = icmp slt i32 %indvars.iv.next, 3200
738  br i1 %cmp, label %for.body, label %for.end
739
740for.end:
741  ret void
742}
743
744define void @pointer_bitcast_baseinst(i16* %arg, i8* %arg1, i64 %arg2) {
745; CHECK-LABEL: @pointer_bitcast_baseinst(
746; CHECK:       bb3:
747; CHECK-NEXT:    %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ]
748; CHECK-NEXT:    %4 = shl i64 %indvar, 3
749; CHECK-NEXT:    %5 = add i64 %4, 1
750; CHECK-NEXT:    %tmp5 = shl nuw i64 %5, 1
751; CHECK-NEXT:    %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5
752; CHECK-NEXT:    %tmp7 = bitcast i8* %tmp6 to <8 x i16>*
753; CHECK-NEXT:    %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2
754; CHECK-NEXT:    %tmp13 = getelementptr i16, i16* %arg, i64 %5
755; CHECK-NEXT:    %tmp14 = bitcast i16* %tmp13 to <8 x i16>*
756; CHECK-NEXT:    store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2
757; CHECK-NEXT:    %indvar.next = add i64 %indvar, 1
758; CHECK-NEXT:    %exitcond = icmp eq i64 %indvar, %3
759; CHECK-NEXT:    br i1 %exitcond, label %bb19, label %bb3
760bb:
761  br label %bb3
762
763bb3:                                              ; preds = %bb3, %bb
764  %tmp = phi i64 [ 1, %bb ], [ %tmp17, %bb3 ]
765  %tmp4 = add nuw i64 %tmp, 8
766  %tmp5 = shl nuw i64 %tmp, 1
767  %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5
768  %tmp7 = bitcast i8* %tmp6 to <8 x i16>*
769  %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2
770  %tmp9 = shl i64 %tmp4, 1
771  %tmp10 = getelementptr i8, i8* %arg1, i64 %tmp9
772  %tmp11 = bitcast i8* %tmp10 to <8 x i16>*
773  %tmp12 = load <8 x i16>, <8 x i16>* %tmp11, align 2
774  %tmp13 = getelementptr i16, i16* %arg, i64 %tmp
775  %tmp14 = bitcast i16* %tmp13 to <8 x i16>*
776  store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2
777  %tmp15 = getelementptr i16, i16* %arg, i64 %tmp4
778  %tmp16 = bitcast i16* %tmp15 to <8 x i16>*
779  store <8 x i16> %tmp12, <8 x i16>* %tmp16, align 2
780  %tmp17 = add nuw nsw i64 %tmp, 16
781  %tmp18 = icmp eq i64 %tmp17, %arg2
782  br i1 %tmp18, label %bb19, label %bb3
783
784bb19:                                             ; preds = %bb3
785  ret void
786}
787
788attributes #0 = { nounwind uwtable }
789attributes #1 = { nounwind }
790
791