1; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine < %s | FileCheck %s
2
3target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
4target triple = "aarch64--linux-gnu"
5
6; These tests check that we remove from consideration pairs of seed
7; getelementptrs when they are known to have a constant difference. Such pairs
8; are likely not good candidates for vectorization since one can be computed
9; from the other. We use an unprofitable threshold to force vectorization.
10;
11; int getelementptr(int *g, int n, int w, int x, int y, int z) {
12;   int sum = 0;
13;   for (int i = 0; i < n ; ++i) {
14;     sum += g[2*i + w]; sum += g[2*i + x];
15;     sum += g[2*i + y]; sum += g[2*i + z];
16;   }
17;   return sum;
18; }
19;
20
21; CHECK-LABEL: @getelementptr_4x32
22;
23; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <4 x i32>
24; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
25; CHECK: sext i32 [[X]] to i64
26;
27define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
28entry:
29  %cmp31 = icmp sgt i32 %n, 0
30  br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
31
32for.body.preheader:
33  br label %for.body
34
35for.cond.cleanup.loopexit:
36  br label %for.cond.cleanup
37
38for.cond.cleanup:
39  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
40  ret i32 %sum.0.lcssa
41
42for.body:
43  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
44  %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
45  %t4 = shl nsw i32 %indvars.iv, 1
46  %t5 = add nsw i32 %t4, 0
47  %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
48  %t6 = load i32, i32* %arrayidx, align 4
49  %add1 = add nsw i32 %t6, %sum.032
50  %t7 = add nsw i32 %t4, %x
51  %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
52  %t8 = load i32, i32* %arrayidx5, align 4
53  %add6 = add nsw i32 %add1, %t8
54  %t9 = add nsw i32 %t4, %y
55  %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
56  %t10 = load i32, i32* %arrayidx10, align 4
57  %add11 = add nsw i32 %add6, %t10
58  %t11 = add nsw i32 %t4, %z
59  %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
60  %t12 = load i32, i32* %arrayidx15, align 4
61  %add16 = add nsw i32 %add11, %t12
62  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
63  %exitcond = icmp eq i32 %indvars.iv.next , %n
64  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
65}
66
67; CHECK-LABEL: @getelementptr_2x32
68;
69; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <2 x i32>
70; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
71; CHECK: sext i32 [[X]] to i64
72;
73define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
74entry:
75  %cmp31 = icmp sgt i32 %n, 0
76  br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
77
78for.body.preheader:
79  br label %for.body
80
81for.cond.cleanup.loopexit:
82  br label %for.cond.cleanup
83
84for.cond.cleanup:
85  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
86  ret i32 %sum.0.lcssa
87
88for.body:
89  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
90  %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
91  %t4 = shl nsw i32 %indvars.iv, 1
92  %t5 = add nsw i32 %t4, 0
93  %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
94  %t6 = load i32, i32* %arrayidx, align 4
95  %add1 = add nsw i32 %t6, %sum.032
96  %t7 = add nsw i32 %t4, 1
97  %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
98  %t8 = load i32, i32* %arrayidx5, align 4
99  %add6 = add nsw i32 %add1, %t8
100  %t9 = add nsw i32 %t4, %y
101  %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
102  %t10 = load i32, i32* %arrayidx10, align 4
103  %add11 = add nsw i32 %add6, %t10
104  %t11 = add nsw i32 %t4, %z
105  %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
106  %t12 = load i32, i32* %arrayidx15, align 4
107  %add16 = add nsw i32 %add11, %t12
108  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
109  %exitcond = icmp eq i32 %indvars.iv.next , %n
110  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
111}
112