1; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s 2 3 4@p = external local_unnamed_addr global [257 x i32], align 16 5@q = external local_unnamed_addr global [257 x i32], align 16 6 7; Test case for PR43398. 8 9define void @can_sink_after_store(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 { 10; CHECK-LABEL: vector.ph: 11; CHECK: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %x, i32 0 12; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 13; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3 14; CHECK-NEXT: br label %vector.body 15 16; CHECK-LABEL: vector.body: 17; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 18; CHECK-NEXT: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ] 19; CHECK-NEXT: %offset.idx = add i64 1, %index 20; CHECK-NEXT: %0 = add i64 %offset.idx, 0 21; CHECK-NEXT: %1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %0 22; CHECK-NEXT: %2 = getelementptr inbounds i32, i32* %1, i32 0 23; CHECK-NEXT: %3 = bitcast i32* %2 to <4 x i32>* 24; CHECK-NEXT: %wide.load = load <4 x i32>, <4 x i32>* %3, align 4 25; CHECK-NEXT: %4 = shufflevector <4 x i32> %vector.recur, <4 x i32> %wide.load, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 26; CHECK-NEXT: %5 = add <4 x i32> %4, %broadcast.splat 27; CHECK-NEXT: %6 = add <4 x i32> %5, %wide.load 28; CHECK-NEXT: %7 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %0 29; CHECK-NEXT: %8 = getelementptr inbounds i32, i32* %7, i32 0 30; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>* 31; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %9, align 4 32; CHECK-NEXT: %index.next = add i64 %index, 4 33; CHECK-NEXT: %10 = icmp eq i64 %index.next, 1996 34; CHECK-NEXT: br i1 %10, label %middle.block, label %vector.body 35; 36entry: 37 br label %preheader 38 39preheader: 40 %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1 41 %.pre = load i32, i32* %idx.phi.trans, align 4 42 br label %for 43 44for: 45 %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ] 46 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ] 47 %add.1 = add i32 %pre.phi, %x 48 %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv 49 %pre.next = load i32, i32* %idx.1, align 4 50 %add.2 = add i32 %add.1, %pre.next 51 %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv 52 store i32 %add.2, i32* %idx.2, align 4 53 %iv.next = add nuw nsw i64 %iv, 1 54 %exitcond = icmp eq i64 %iv.next, 2000 55 br i1 %exitcond, label %exit, label %for 56 57exit: 58 ret void 59} 60 61; We can sink potential trapping instructions, as this will only delay the trap 62; and not introduce traps on additional paths. 63define void @sink_sdiv(i32 %x, i32* %ptr, i64 %tc) local_unnamed_addr #0 { 64; CHECK-LABEL: vector.ph: 65; CHECK: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %x, i32 0 66; CHECK-NEXT: %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 67; CHECK-NEXT: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3 68; CHECK-NEXT: br label %vector.body 69 70; CHECK-LABEL: vector.body: 71; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 72; CHECK-NEXT: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ %wide.load, %vector.body ] 73; CHECK-NEXT: %offset.idx = add i64 1, %index 74; CHECK-NEXT: %0 = add i64 %offset.idx, 0 75; CHECK-NEXT: %1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %0 76; CHECK-NEXT: %2 = getelementptr inbounds i32, i32* %1, i32 0 77; CHECK-NEXT: %3 = bitcast i32* %2 to <4 x i32>* 78; CHECK-NEXT: %wide.load = load <4 x i32>, <4 x i32>* %3, align 4 79; CHECK-NEXT: %4 = shufflevector <4 x i32> %vector.recur, <4 x i32> %wide.load, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 80; CHECK-NEXT: %5 = sdiv <4 x i32> %4, %broadcast.splat 81; CHECK-NEXT: %6 = add <4 x i32> %5, %wide.load 82; CHECK-NEXT: %7 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %0 83; CHECK-NEXT: %8 = getelementptr inbounds i32, i32* %7, i32 0 84; CHECK-NEXT: %9 = bitcast i32* %8 to <4 x i32>* 85; CHECK-NEXT: store <4 x i32> %6, <4 x i32>* %9, align 4 86; CHECK-NEXT: %index.next = add i64 %index, 4 87; CHECK-NEXT: %10 = icmp eq i64 %index.next, 1996 88; CHECK-NEXT: br i1 %10, label %middle.block, label %vector.body 89; 90entry: 91 br label %preheader 92 93preheader: 94 %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1 95 %.pre = load i32, i32* %idx.phi.trans, align 4 96 br label %for 97 98for: 99 %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ] 100 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ] 101 %div.1 = sdiv i32 %pre.phi, %x 102 %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv 103 %pre.next = load i32, i32* %idx.1, align 4 104 %add.2 = add i32 %div.1, %pre.next 105 %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv 106 store i32 %add.2, i32* %idx.2, align 4 107 %iv.next = add nuw nsw i64 %iv, 1 108 %exitcond = icmp eq i64 %iv.next, 2000 109 br i1 %exitcond, label %exit, label %for 110 111exit: 112 ret void 113} 114 115; FIXME: Currently we can only sink a single instruction. For the example below, 116; we also have to sink users. 117define void @cannot_sink_with_additional_user(i32 %x, i32* %ptr, i64 %tc) { 118; CHECK-LABEL: define void @cannot_sink_with_additional_user( 119; CHECK-NEXT: entry: 120; CHECK-NEXT: br label %preheader 121 122; CHECK-LABEL: preheader: ; preds = %entry 123; CHECK: br label %for 124 125; CHECK-LABEL: for: ; preds = %for, %preheader 126; CHECK: br i1 %exitcond, label %exit, label %for 127 128; CHECK-LABEL: exit: 129; CHECK-NEXT: ret void 130 131entry: 132 br label %preheader 133 134preheader: 135 %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1 136 %.pre = load i32, i32* %idx.phi.trans, align 4 137 br label %for 138 139for: 140 %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ] 141 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ] 142 %add.1 = add i32 %pre.phi, %x 143 %add.2 = add i32 %add.1, %x 144 %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv 145 %pre.next = load i32, i32* %idx.1, align 4 146 %add.3 = add i32 %add.1, %pre.next 147 %add.4 = add i32 %add.2, %add.3 148 %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv 149 store i32 %add.4, i32* %idx.2, align 4 150 %iv.next = add nuw nsw i64 %iv, 1 151 %exitcond = icmp eq i64 %iv.next, 2000 152 br i1 %exitcond, label %exit, label %for 153 154exit: 155 ret void 156} 157 158; FIXME: We can sink a store, if we can guarantee that it does not alias any 159; loads/stores in between. 160define void @cannot_sink_store(i32 %x, i32* %ptr, i64 %tc) { 161; CHECK-LABEL: define void @cannot_sink_store( 162; CHECK-NEXT: entry: 163; CHECK-NEXT: br label %preheader 164 165; CHECK-LABEL: preheader: ; preds = %entry 166; CHECK: br label %for 167 168; CHECK-LABEL: for: ; preds = %for, %preheader 169; CHECK: br i1 %exitcond, label %exit, label %for 170 171; CHECK-LABEL: exit: 172; CHECK-NEXT: ret void 173; 174entry: 175 br label %preheader 176 177preheader: 178 %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1 179 %.pre = load i32, i32* %idx.phi.trans, align 4 180 br label %for 181 182for: 183 %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ] 184 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ] 185 %add.1 = add i32 %pre.phi, %x 186 store i32 %add.1, i32* %ptr 187 %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv 188 %pre.next = load i32, i32* %idx.1, align 4 189 %add.2 = add i32 %add.1, %pre.next 190 %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv 191 store i32 %add.2, i32* %idx.2, align 4 192 %iv.next = add nuw nsw i64 %iv, 1 193 %exitcond = icmp eq i64 %iv.next, 2000 194 br i1 %exitcond, label %exit, label %for 195 196exit: 197 ret void 198} 199 200; Some kinds of reductions are not detected by IVDescriptors. If we have a 201; cycle, we cannot sink it. 202define void @cannot_sink_reduction(i32 %x, i32* %ptr, i64 %tc) { 203; CHECK-LABEL: define void @cannot_sink_reduction( 204; CHECK-NEXT: entry: 205; CHECK-NEXT: br label %preheader 206 207; CHECK-LABEL: preheader: ; preds = %entry 208; CHECK: br label %for 209 210; CHECK-LABEL: for: ; preds = %for, %preheader 211; CHECK: br i1 %exitcond, label %exit, label %for 212 213; CHECK-LABEL: exit: ; preds = %for 214; CHECK-NET: ret void 215; 216entry: 217 br label %preheader 218 219preheader: 220 %idx.phi.trans = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 1 221 %.pre = load i32, i32* %idx.phi.trans, align 4 222 br label %for 223 224for: 225 %pre.phi = phi i32 [ %.pre, %preheader ], [ %d, %for ] 226 %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ] 227 %d = sdiv i32 %pre.phi, %x 228 %idx.1 = getelementptr inbounds [257 x i32], [257 x i32]* @p, i64 0, i64 %iv 229 %pre.next = load i32, i32* %idx.1, align 4 230 %add.2 = add i32 %x, %pre.next 231 %idx.2 = getelementptr inbounds [257 x i32], [257 x i32]* @q, i64 0, i64 %iv 232 store i32 %add.2, i32* %idx.2, align 4 233 %iv.next = add nuw nsw i64 %iv, 1 234 %exitcond = icmp eq i64 %iv.next, 2000 235 br i1 %exitcond, label %exit, label %for 236 237exit: 238 ret void 239} 240 241; TODO: We should be able to sink %tmp38 after %tmp60. 242define void @instruction_with_2_FOR_operands() { 243; CHECK-LABEL: define void @instruction_with_2_FOR_operands( 244; CHECK-NEXT: bb: 245; CHECK-NEXT: br label %bb13 246 247; CHECK-LABEL: bb13: 248; CHECK: br i1 %tmp12, label %bb13, label %bb74 249 250; CHECK-LABEL: bb74: 251; CHECK-NEXT: ret void 252; 253bb: 254 br label %bb13 255 256bb13: ; preds = %bb13, %bb 257 %tmp37 = phi float [ %tmp60, %bb13 ], [ undef, %bb ] 258 %tmp27 = phi float [ %tmp49, %bb13 ], [ undef, %bb ] 259 %indvars.iv = phi i64 [ %indvars.iv.next, %bb13 ], [ 0, %bb ] 260 %tmp38 = fmul fast float %tmp37, %tmp27 261 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 262 %tmp49 = load float, float* undef, align 4 263 %tmp60 = load float, float* undef, align 4 264 %tmp12 = icmp slt i64 %indvars.iv, undef 265 br i1 %tmp12, label %bb13, label %bb74 266 267bb74: ; preds = %bb13 268 ret void 269} 270 271; Users that are phi nodes cannot be sunk. 272define void @cannot_sink_phi(i32* %ptr) { 273; CHECK-LABEL: define void @cannot_sink_phi( 274; CHECK-NOT: vector.body 275entry: 276 br label %loop.header 277 278loop.header: ; preds = %if.end128, %for.cond108.preheader 279 %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ] 280 %for = phi i32 [ 0, %entry ], [ %for.next, %loop.latch ] 281 %c.1 = icmp ult i64 %iv, 500 282 br i1 %c.1, label %if.truebb, label %if.falsebb 283 284if.truebb: ; preds = %for.body114 285 br label %loop.latch 286 287if.falsebb: ; preds = %for.body114 288 br label %loop.latch 289 290loop.latch: ; preds = %if.then122, %for.body114.if.end128_crit_edge 291 %first_time.1 = phi i32 [ 20, %if.truebb ], [ %for, %if.falsebb ] 292 %c.2 = icmp ult i64 %iv, 800 293 %for.next = select i1 %c.2, i32 30, i32 %first_time.1 294 %ptr.idx = getelementptr i32, i32* %ptr, i64 %iv 295 store i32 %for.next, i32* %ptr.idx 296 %iv.next = add nuw nsw i64 %iv, 1 297 %exitcond.not = icmp eq i64 %iv.next, 1000 298 br i1 %exitcond.not, label %exit, label %loop.header 299 300exit: 301 ret void 302} 303