1; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
2
3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
4target triple = "i386-apple-macosx10.8.0"
5
6;int test(double *G) {
7;  G[0] = 1+G[5]*4;
8;  G[1] = 6+G[6]*3;
9;  G[2] = 7+G[5]*4;
10;  G[3] = 8+G[6]*4;
11;}
12
13;CHECK-LABEL: @test(
14;CHECK: load <2 x double>
15;CHECK: fadd <4 x double>
16;CHECK: store <4 x double>
17;CHECK: ret i32
18
19define i32 @test(double* nocapture %G) {
20entry:
21  %arrayidx = getelementptr inbounds double, double* %G, i64 5
22  %0 = load double, double* %arrayidx, align 8
23  %mul = fmul double %0, 4.000000e+00
24  %add = fadd double %mul, 1.000000e+00
25  store double %add, double* %G, align 8
26  %arrayidx2 = getelementptr inbounds double, double* %G, i64 6
27  %1 = load double, double* %arrayidx2, align 8
28  %mul3 = fmul double %1, 3.000000e+00
29  %add4 = fadd double %mul3, 6.000000e+00
30  %arrayidx5 = getelementptr inbounds double, double* %G, i64 1
31  store double %add4, double* %arrayidx5, align 8
32  %add8 = fadd double %mul, 7.000000e+00
33  %arrayidx9 = getelementptr inbounds double, double* %G, i64 2
34  store double %add8, double* %arrayidx9, align 8
35  %mul11 = fmul double %1, 4.000000e+00
36  %add12 = fadd double %mul11, 8.000000e+00
37  %arrayidx13 = getelementptr inbounds double, double* %G, i64 3
38  store double %add12, double* %arrayidx13, align 8
39  ret i32 undef
40}
41
42;int foo(double *A, int n) {
43;  A[0] = A[0] * 7.9 * n + 6.0;
44;  A[1] = A[1] * 7.7 * n + 2.0;
45;  A[2] = A[2] * 7.6 * n + 3.0;
46;  A[3] = A[3] * 7.4 * n + 4.0;
47;}
48; CHECK-LABEL: @foo(
49; CHECK: load <4 x double>
50; CHECK: fmul <4 x double>
51; CHECK: fmul <4 x double>
52; CHECK: fadd <4 x double>
53; CHECK: store <4 x double>
54define i32 @foo(double* nocapture %A, i32 %n) {
55entry:
56  %0 = load double, double* %A, align 8
57  %mul = fmul double %0, 7.900000e+00
58  %conv = sitofp i32 %n to double
59  %mul1 = fmul double %conv, %mul
60  %add = fadd double %mul1, 6.000000e+00
61  store double %add, double* %A, align 8
62  %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
63  %1 = load double, double* %arrayidx3, align 8
64  %mul4 = fmul double %1, 7.700000e+00
65  %mul6 = fmul double %conv, %mul4
66  %add7 = fadd double %mul6, 2.000000e+00
67  store double %add7, double* %arrayidx3, align 8
68  %arrayidx9 = getelementptr inbounds double, double* %A, i64 2
69  %2 = load double, double* %arrayidx9, align 8
70  %mul10 = fmul double %2, 7.600000e+00
71  %mul12 = fmul double %conv, %mul10
72  %add13 = fadd double %mul12, 3.000000e+00
73  store double %add13, double* %arrayidx9, align 8
74  %arrayidx15 = getelementptr inbounds double, double* %A, i64 3
75  %3 = load double, double* %arrayidx15, align 8
76  %mul16 = fmul double %3, 7.400000e+00
77  %mul18 = fmul double %conv, %mul16
78  %add19 = fadd double %mul18, 4.000000e+00
79  store double %add19, double* %arrayidx15, align 8
80  ret i32 undef
81}
82
83; int test2(double *G, int k) {
84;   if (k) {
85;     G[0] = 1+G[5]*4;
86;     G[1] = 6+G[6]*3;
87;   } else {
88;     G[2] = 7+G[5]*4;
89;     G[3] = 8+G[6]*3;
90;   }
91; }
92
93; We can't merge the gather sequences because one does not dominate the other.
94; CHECK-LABEL: @test2(
95; CHECK: insertelement
96; CHECK: insertelement
97; CHECK: insertelement
98; CHECK: insertelement
99; CHECK: ret
100define i32 @test2(double* nocapture %G, i32 %k) {
101  %1 = icmp eq i32 %k, 0
102  %2 = getelementptr inbounds double, double* %G, i64 5
103  %3 = load double, double* %2, align 8
104  %4 = fmul double %3, 4.000000e+00
105  br i1 %1, label %12, label %5
106
107; <label>:5                                       ; preds = %0
108  %6 = fadd double %4, 1.000000e+00
109  store double %6, double* %G, align 8
110  %7 = getelementptr inbounds double, double* %G, i64 6
111  %8 = load double, double* %7, align 8
112  %9 = fmul double %8, 3.000000e+00
113  %10 = fadd double %9, 6.000000e+00
114  %11 = getelementptr inbounds double, double* %G, i64 1
115  store double %10, double* %11, align 8
116  br label %20
117
118; <label>:12                                      ; preds = %0
119  %13 = fadd double %4, 7.000000e+00
120  %14 = getelementptr inbounds double, double* %G, i64 2
121  store double %13, double* %14, align 8
122  %15 = getelementptr inbounds double, double* %G, i64 6
123  %16 = load double, double* %15, align 8
124  %17 = fmul double %16, 3.000000e+00
125  %18 = fadd double %17, 8.000000e+00
126  %19 = getelementptr inbounds double, double* %G, i64 3
127  store double %18, double* %19, align 8
128  br label %20
129
130; <label>:20                                      ; preds = %12, %5
131  ret i32 undef
132}
133
134
135;int foo(double *A, int n) {
136;  A[0] = A[0] * 7.9 * n + 6.0;
137;  A[1] = A[1] * 7.9 * n + 6.0;
138;  A[2] = A[2] * 7.9 * n + 6.0;
139;  A[3] = A[3] * 7.9 * n + 6.0;
140;}
141; CHECK-LABEL: @foo4(
142; CHECK: load <4 x double>
143; CHECK: fmul <4 x double>
144; CHECK: fmul <4 x double>
145; CHECK: fadd <4 x double>
146; CHECK: store <4 x double>
147define i32 @foo4(double* nocapture %A, i32 %n) {
148entry:
149  %0 = load double, double* %A, align 8
150  %mul = fmul double %0, 7.900000e+00
151  %conv = sitofp i32 %n to double
152  %mul1 = fmul double %conv, %mul
153  %add = fadd double %mul1, 6.000000e+00
154  store double %add, double* %A, align 8
155  %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
156  %1 = load double, double* %arrayidx3, align 8
157  %mul4 = fmul double %1, 7.900000e+00
158  %mul6 = fmul double %conv, %mul4
159  %add7 = fadd double %mul6, 6.000000e+00
160  store double %add7, double* %arrayidx3, align 8
161  %arrayidx9 = getelementptr inbounds double, double* %A, i64 2
162  %2 = load double, double* %arrayidx9, align 8
163  %mul10 = fmul double %2, 7.900000e+00
164  %mul12 = fmul double %conv, %mul10
165  %add13 = fadd double %mul12, 6.000000e+00
166  store double %add13, double* %arrayidx9, align 8
167  %arrayidx15 = getelementptr inbounds double, double* %A, i64 3
168  %3 = load double, double* %arrayidx15, align 8
169  %mul16 = fmul double %3, 7.900000e+00
170  %mul18 = fmul double %conv, %mul16
171  %add19 = fadd double %mul18, 6.000000e+00
172  store double %add19, double* %arrayidx15, align 8
173  ret i32 undef
174}
175
176;int partial_mrg(double *A, int n) {
177;  A[0] = A[0] * n;
178;  A[1] = A[1] * n;
179;  if (n < 4) return 0;
180;  A[2] = A[2] * n;
181;  A[3] = A[3] * (n+4);
182;}
183;CHECK-LABEL: @partial_mrg(
184;CHECK: insertelement <2 x double>
185;CHECK: insertelement <2 x double>
186;CHECK: insertelement <2 x double>
187;CHECK-NOT: insertelement <2 x double>
188;CHECK: ret
189define i32 @partial_mrg(double* nocapture %A, i32 %n) {
190entry:
191  %0 = load double, double* %A, align 8
192  %conv = sitofp i32 %n to double
193  %mul = fmul double %conv, %0
194  store double %mul, double* %A, align 8
195  %arrayidx2 = getelementptr inbounds double, double* %A, i64 1
196  %1 = load double, double* %arrayidx2, align 8
197  %mul4 = fmul double %conv, %1
198  store double %mul4, double* %arrayidx2, align 8
199  %cmp = icmp slt i32 %n, 4
200  br i1 %cmp, label %return, label %if.end
201
202if.end:                                           ; preds = %entry
203  %arrayidx7 = getelementptr inbounds double, double* %A, i64 2
204  %2 = load double, double* %arrayidx7, align 8
205  %mul9 = fmul double %conv, %2
206  store double %mul9, double* %arrayidx7, align 8
207  %arrayidx11 = getelementptr inbounds double, double* %A, i64 3
208  %3 = load double, double* %arrayidx11, align 8
209  %add = add nsw i32 %n, 4
210  %conv12 = sitofp i32 %add to double
211  %mul13 = fmul double %conv12, %3
212  store double %mul13, double* %arrayidx11, align 8
213  br label %return
214
215return:                                           ; preds = %entry, %if.end
216  ret i32 0
217}
218
219%class.B.53.55 = type { %class.A.52.54, double }
220%class.A.52.54 = type { double, double, double }
221
222@a = external global double, align 8
223
224define void @PR19646(%class.B.53.55* %this) {
225entry:
226  br i1 undef, label %if.end13, label %if.end13
227
228sw.epilog7:                                       ; No predecessors!
229  %.in = getelementptr inbounds %class.B.53.55, %class.B.53.55* %this, i64 0, i32 0, i32 1
230  %0 = load double, double* %.in, align 8
231  %add = fadd double undef, 0.000000e+00
232  %add6 = fadd double %add, %0
233  %1 = load double, double* @a, align 8
234  %add8 = fadd double %1, 0.000000e+00
235  %_dy = getelementptr inbounds %class.B.53.55, %class.B.53.55* %this, i64 0, i32 0, i32 2
236  %2 = load double, double* %_dy, align 8
237  %add10 = fadd double %add8, %2
238  br i1 undef, label %if.then12, label %if.end13
239
240if.then12:                                        ; preds = %sw.epilog7
241  %3 = load double, double* undef, align 8
242  br label %if.end13
243
244if.end13:                                         ; preds = %if.then12, %sw.epilog7, %entry
245  %x.1 = phi double [ 0.000000e+00, %if.then12 ], [ %add6, %sw.epilog7 ], [ undef, %entry ], [ undef, %entry ]
246  %b.0 = phi double [ %3, %if.then12 ], [ %add10, %sw.epilog7 ], [ undef, %entry], [ undef, %entry ]
247  unreachable
248}
249