1; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
2
3
4@p = external local_unnamed_addr global [257 x i32], align 16
5@q = external local_unnamed_addr global [257 x i32], align 16
6
7; Test case for PR43398.
8
9define void @can_sink_after_store(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 {
10; CHECK-LABEL: vector.ph:
11; CHECK:        %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %x, i32 0
12; CHECK-NEXT:   %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
13; CHECK-NEXT:   %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
14; CHECK-NEXT:    br label %vector.body
15
16; CHECK-LABEL: vector.body:
17; CHECK-NEXT:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
18; CHECK-NEXT:   %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ]
19; CHECK-NEXT:   %offset.idx = add i64 1, %index
20; CHECK-NEXT:   %0 = add i64 %offset.idx, 0
21; CHECK-NEXT:   %1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %0
22; CHECK-NEXT:   %2 = getelementptr inbounds i32, i32* %1, i32 0
23; CHECK-NEXT:   %3 = bitcast i32* %2 to <4 x i32>*
24; CHECK-NEXT:   %wide.load = load <4 x i32>, <4 x i32>* %3, align 4
25; CHECK-NEXT:   %4 = shufflevector <4 x i32> %vector.recur, <4 x i32> %wide.load, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
26; CHECK-NEXT:   %5 = add <4 x i32> %4, %broadcast.splat
27; CHECK-NEXT:   %6 = add <4 x i32> %5, %wide.load
28; CHECK-NEXT:   %7 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %0
29; CHECK-NEXT:   %8 = getelementptr inbounds i32, i32* %7, i32 0
30; CHECK-NEXT:   %9 = bitcast i32* %8 to <4 x i32>*
31; CHECK-NEXT:   store <4 x i32> %6, <4 x i32>* %9, align 4
32; CHECK-NEXT:   %index.next = add i64 %index, 4
33; CHECK-NEXT:   %10 = icmp eq i64 %index.next, 1996
34; CHECK-NEXT:   br i1 %10, label %middle.block, label %vector.body
35;
36entry:
37  br label %preheader
38
39preheader:
40  %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1
41  %.pre = load i32, i32* %idx.phi.trans, align 4
42  br label %for
43
44for:
45  %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
46  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
47  %add.1 = add i32 %pre.phi, %x
48  %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv
49  %pre.next = load i32, i32* %idx.1, align 4
50  %add.2 = add i32 %add.1, %pre.next
51  %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv
52  store i32 %add.2, i32* %idx.2, align 4
53  %iv.next = add nuw nsw i64 %iv, 1
54  %exitcond = icmp eq i64 %iv.next, 2000
55  br i1 %exitcond, label %exit, label %for
56
57exit:
58  ret void
59}
60
61; We can sink potential trapping instructions, as this will only delay the trap
62; and not introduce traps on additional paths.
63define void @sink_sdiv(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 {
64; CHECK-LABEL: vector.ph:
65; CHECK:        %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %x, i32 0
66; CHECK-NEXT:   %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
67; CHECK-NEXT:   %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
68; CHECK-NEXT:    br label %vector.body
69
70; CHECK-LABEL: vector.body:
71; CHECK-NEXT:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
72; CHECK-NEXT:   %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ]
73; CHECK-NEXT:   %offset.idx = add i64 1, %index
74; CHECK-NEXT:   %0 = add i64 %offset.idx, 0
75; CHECK-NEXT:   %1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %0
76; CHECK-NEXT:   %2 = getelementptr inbounds i32, i32* %1, i32 0
77; CHECK-NEXT:   %3 = bitcast i32* %2 to <4 x i32>*
78; CHECK-NEXT:   %wide.load = load <4 x i32>, <4 x i32>* %3, align 4
79; CHECK-NEXT:   %4 = shufflevector <4 x i32> %vector.recur, <4 x i32> %wide.load, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
80; CHECK-NEXT:   %5 = sdiv <4 x i32> %4, %broadcast.splat
81; CHECK-NEXT:   %6 = add <4 x i32> %5, %wide.load
82; CHECK-NEXT:   %7 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %0
83; CHECK-NEXT:   %8 = getelementptr inbounds i32, i32* %7, i32 0
84; CHECK-NEXT:   %9 = bitcast i32* %8 to <4 x i32>*
85; CHECK-NEXT:   store <4 x i32> %6, <4 x i32>* %9, align 4
86; CHECK-NEXT:   %index.next = add i64 %index, 4
87; CHECK-NEXT:   %10 = icmp eq i64 %index.next, 1996
88; CHECK-NEXT:   br i1 %10, label %middle.block, label %vector.body
89;
90entry:
91  br label %preheader
92
93preheader:
94  %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1
95  %.pre = load i32, i32* %idx.phi.trans, align 4
96  br label %for
97
98for:
99  %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
100  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
101  %div.1 = sdiv i32 %pre.phi, %x
102  %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv
103  %pre.next = load i32, i32* %idx.1, align 4
104  %add.2 = add i32 %div.1, %pre.next
105  %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv
106  store i32 %add.2, i32* %idx.2, align 4
107  %iv.next = add nuw nsw i64 %iv, 1
108  %exitcond = icmp eq i64 %iv.next, 2000
109  br i1 %exitcond, label %exit, label %for
110
111exit:
112  ret void
113}
114
115; FIXME: Currently we can only sink a single instruction. For the example below,
116;        we also have to sink users.
117define void @cannot_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) {
118; CHECK-LABEL: define void @cannot_sink_with_additional_user(
119; CHECK-NEXT: entry:
120; CHECK-NEXT:   br label %preheader
121
122; CHECK-LABEL: preheader:                                        ; preds = %entry
123; CHECK:  br label %for
124
125; CHECK-LABEL: for:                                              ; preds = %for, %preheader
126; CHECK:  br i1 %exitcond, label %exit, label %for
127
128; CHECK-LABEL: exit:
129; CHECK-NEXT:    ret void
130
131entry:
132  br label %preheader
133
134preheader:
135  %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1
136  %.pre = load i32, i32* %idx.phi.trans, align 4
137  br label %for
138
139for:
140  %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
141  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
142  %add.1 = add i32 %pre.phi, %x
143  %add.2 = add i32 %add.1, %x
144  %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv
145  %pre.next = load i32, i32* %idx.1, align 4
146  %add.3 = add i32 %add.1, %pre.next
147  %add.4 = add i32 %add.2, %add.3
148  %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv
149  store i32 %add.4, i32* %idx.2, align 4
150  %iv.next = add nuw nsw i64 %iv, 1
151  %exitcond = icmp eq i64 %iv.next, 2000
152  br i1 %exitcond, label %exit, label %for
153
154exit:
155  ret void
156}
157
158; FIXME: We can sink a store, if we can guarantee that it does not alias any
159;        loads/stores in between.
160define void @cannot_sink_store(i32 %x, i32* %ptr, i64 %tc) {
161; CHECK-LABEL: define void @cannot_sink_store(
162; CHECK-NEXT: entry:
163; CHECK-NEXT:   br label %preheader
164
165; CHECK-LABEL: preheader:                                        ; preds = %entry
166; CHECK:  br label %for
167
168; CHECK-LABEL: for:                                              ; preds = %for, %preheader
169; CHECK:  br i1 %exitcond, label %exit, label %for
170
171; CHECK-LABEL: exit:
172; CHECK-NEXT:    ret void
173;
174entry:
175  br label %preheader
176
177preheader:
178  %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1
179  %.pre = load i32, i32* %idx.phi.trans, align 4
180  br label %for
181
182for:
183  %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
184  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
185  %add.1 = add i32 %pre.phi, %x
186  store i32 %add.1, i32* %ptr
187  %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv
188  %pre.next = load i32, i32* %idx.1, align 4
189  %add.2 = add i32 %add.1, %pre.next
190  %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv
191  store i32 %add.2, i32* %idx.2, align 4
192  %iv.next = add nuw nsw i64 %iv, 1
193  %exitcond = icmp eq i64 %iv.next, 2000
194  br i1 %exitcond, label %exit, label %for
195
196exit:
197  ret void
198}
199
200; Some kinds of reductions are not detected by IVDescriptors. If we have a
201; cycle, we cannot sink it.
202define void @cannot_sink_reduction(i32 %x, i32* %ptr, i64 %tc) {
203; CHECK-LABEL: define void @cannot_sink_reduction(
204; CHECK-NEXT: entry:
205; CHECK-NEXT:   br label %preheader
206
207; CHECK-LABEL: preheader:                                        ; preds = %entry
208; CHECK:  br label %for
209
210; CHECK-LABEL: for:                                              ; preds = %for, %preheader
211; CHECK:  br i1 %exitcond, label %exit, label %for
212
213; CHECK-LABEL: exit:                                    ; preds = %for
214; CHECK-NET:     ret void
215;
216entry:
217  br label %preheader
218
219preheader:
220  %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1
221  %.pre = load i32, i32* %idx.phi.trans, align 4
222  br label %for
223
224for:
225  %pre.phi = phi i32 [ %.pre, %preheader ], [ %d, %for ]
226  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
227  %d = sdiv i32 %pre.phi, %x
228  %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv
229  %pre.next = load i32, i32* %idx.1, align 4
230  %add.2 = add i32 %x, %pre.next
231  %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv
232  store i32 %add.2, i32* %idx.2, align 4
233  %iv.next = add nuw nsw i64 %iv, 1
234  %exitcond = icmp eq i64 %iv.next, 2000
235  br i1 %exitcond, label %exit, label %for
236
237exit:
238  ret void
239}
240
241; TODO: We should be able to sink %tmp38 after %tmp60.
242define void @instruction_with_2_FOR_operands() {
243; CHECK-LABEL: define void @instruction_with_2_FOR_operands(
244; CHECK-NEXT: bb:
245; CHECK-NEXT:   br label %bb13
246
247; CHECK-LABEL: bb13:
248; CHECK:         br i1 %tmp12, label %bb13, label %bb74
249
250; CHECK-LABEL: bb74:
251; CHECK-NEXT:    ret void
252;
253bb:
254  br label %bb13
255
256bb13:                                             ; preds = %bb13, %bb
257  %tmp37 = phi float [ %tmp60, %bb13 ], [ undef, %bb ]
258  %tmp27 = phi float [ %tmp49, %bb13 ], [ undef, %bb ]
259  %indvars.iv = phi i64 [ %indvars.iv.next, %bb13 ], [ 0, %bb ]
260  %tmp38 = fmul fast float %tmp37, %tmp27
261  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
262  %tmp49 = load float, float* undef, align 4
263  %tmp60 = load float, float* undef, align 4
264  %tmp12 = icmp slt i64 %indvars.iv, undef
265  br i1 %tmp12, label %bb13, label %bb74
266
267bb74:                                             ; preds = %bb13
268  ret void
269}
270
271; Users that are phi nodes cannot be sunk.
272define void @cannot_sink_phi(i32* %ptr) {
273; CHECK-LABEL: define void @cannot_sink_phi(
274; CHECK-NOT:   vector.body
275entry:
276  br label %loop.header
277
278loop.header:                                      ; preds = %if.end128, %for.cond108.preheader
279  %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ]
280  %for = phi i32 [ 0, %entry ], [ %for.next, %loop.latch ]
281  %c.1 = icmp ult i64 %iv, 500
282  br i1 %c.1, label %if.truebb, label %if.falsebb
283
284if.truebb:                  ; preds = %for.body114
285  br label %loop.latch
286
287if.falsebb:                                       ; preds = %for.body114
288  br label %loop.latch
289
290loop.latch:                                        ; preds = %if.then122, %for.body114.if.end128_crit_edge
291  %first_time.1 = phi i32 [ 20, %if.truebb ], [ %for, %if.falsebb ]
292  %c.2 = icmp ult i64 %iv, 800
293  %for.next = select i1 %c.2, i32 30, i32 %first_time.1
294  %ptr.idx = getelementptr i32, i32* %ptr, i64 %iv
295  store i32 %for.next, i32* %ptr.idx
296  %iv.next = add nuw nsw i64 %iv, 1
297  %exitcond.not = icmp eq i64 %iv.next, 1000
298  br i1 %exitcond.not, label %exit, label %loop.header
299
300exit:
301  ret void
302}
303