1; RUN: opt -S -loop-fusion < %s | FileCheck %s 2 3@B = common global [1024 x i32] zeroinitializer, align 16 4 5; CHECK: void @dep_free_parametric 6; CHECK-next: entry: 7; CHECK: br i1 %{{.*}}, label %[[LOOP1PREHEADER:bb[0-9]*]], label %[[LOOP1SUCC:bb[0-9]+]] 8; CHECK: [[LOOP1PREHEADER]] 9; CHECK-NEXT: br label %[[LOOP1BODY:bb[0-9]*]] 10; CHECK: [[LOOP1BODY]] 11; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY]], label %[[LOOP2EXIT:bb[0-9]+]] 12; CHECK: [[LOOP2EXIT]] 13; CHECK: br label %[[LOOP1SUCC]] 14; CHECK: [[LOOP1SUCC]] 15; CHECK: ret void 16define void @dep_free_parametric(i32* noalias %A, i64 %N) { 17entry: 18 %cmp4 = icmp slt i64 0, %N 19 br i1 %cmp4, label %bb3, label %bb14 20 21bb3: ; preds = %entry 22 br label %bb5 23 24bb5: ; preds = %bb3, %bb5 25 %i.05 = phi i64 [ %inc, %bb5 ], [ 0, %bb3 ] 26 %sub = sub nsw i64 %i.05, 3 27 %add = add nsw i64 %i.05, 3 28 %mul = mul nsw i64 %sub, %add 29 %rem = srem i64 %mul, %i.05 30 %conv = trunc i64 %rem to i32 31 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.05 32 store i32 %conv, i32* %arrayidx, align 4 33 %inc = add nsw i64 %i.05, 1 34 %cmp = icmp slt i64 %inc, %N 35 br i1 %cmp, label %bb5, label %bb10 36 37bb10: ; preds = %bb5 38 br label %bb14 39 40bb14: ; preds = %bb10, %entry 41 %cmp31 = icmp slt i64 0, %N 42 br i1 %cmp31, label %bb8, label %bb12 43 44bb8: ; preds = %bb14 45 br label %bb9 46 47bb9: ; preds = %bb8, %bb9 48 %i1.02 = phi i64 [ %inc14, %bb9 ], [ 0, %bb8 ] 49 %sub7 = sub nsw i64 %i1.02, 3 50 %add8 = add nsw i64 %i1.02, 3 51 %mul9 = mul nsw i64 %sub7, %add8 52 %rem10 = srem i64 %mul9, %i1.02 53 %conv11 = trunc i64 %rem10 to i32 54 %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.02 55 store i32 %conv11, i32* %arrayidx12, align 4 56 %inc14 = add nsw i64 %i1.02, 1 57 %cmp3 = icmp slt i64 %inc14, %N 58 br i1 %cmp3, label %bb9, label %bb15 59 60bb15: ; preds = %bb9 61 br label %bb12 62 63bb12: ; preds = %bb15, %bb14 64 ret void 65} 66 67; Test that `%add` is moved in for.first.preheader, and the two loops for.first 68; and for.second are fused. 69 70; CHECK: void @moveinsts_preheader 71; CHECK-LABEL: for.first.guard: 72; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end 73; CHECK-LABEL: for.first.preheader: 74; CHECK-NEXT: %add = add nsw i32 %x, 1 75; CHECK-NEXT: br label %for.first 76; CHECK-LABEL: for.first: 77; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit 78; CHECK-LABEL: for.second.exit: 79; CHECK-NEXT: br label %for.end 80; CHECK-LABEL: for.end: 81; CHECK-NEXT: ret void 82define void @moveinsts_preheader(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { 83for.first.guard: 84 %cmp.guard = icmp slt i64 0, %N 85 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard 86 87for.first.preheader: 88 br label %for.first 89 90for.first: 91 %i = phi i64 [ %inc.i, %for.first ], [ 0, %for.first.preheader ] 92 %Ai = getelementptr inbounds i32, i32* %A, i64 %i 93 store i32 0, i32* %Ai, align 4 94 %inc.i = add nsw i64 %i, 1 95 %cmp.i = icmp slt i64 %inc.i, %N 96 br i1 %cmp.i, label %for.first, label %for.first.exit 97 98for.first.exit: 99 br label %for.second.guard 100 101for.second.guard: 102 br i1 %cmp.guard, label %for.second.preheader, label %for.end 103 104for.second.preheader: 105 %add = add nsw i32 %x, 1 106 br label %for.second 107 108for.second: 109 %j = phi i64 [ %inc.j, %for.second ], [ 0, %for.second.preheader ] 110 %Bj = getelementptr inbounds i32, i32* %B, i64 %j 111 store i32 0, i32* %Bj, align 4 112 %inc.j = add nsw i64 %j, 1 113 %cmp.j = icmp slt i64 %inc.j, %N 114 br i1 %cmp.j, label %for.second, label %for.second.exit 115 116for.second.exit: 117 br label %for.end 118 119for.end: 120 ret void 121} 122 123; Test that `%add` is moved in for.second.exit, and the two loops for.first 124; and for.second are fused. 125 126; CHECK: void @moveinsts_exitblock 127; CHECK-LABEL: for.first.guard: 128; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end 129; CHECK-LABEL: for.first.preheader: 130; CHECK-NEXT: br label %for.first 131; CHECK-LABEL: for.first: 132; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit 133; CHECK-LABEL: for.second.exit: 134; CHECK-NEXT: %add = add nsw i32 %x, 1 135; CHECK-NEXT: br label %for.end 136; CHECK-LABEL: for.end: 137; CHECK-NEXT: ret void 138define void @moveinsts_exitblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { 139for.first.guard: 140 %cmp.guard = icmp slt i64 0, %N 141 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard 142 143for.first.preheader: 144 br label %for.first 145 146for.first: 147 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] 148 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04 149 store i32 0, i32* %arrayidx, align 4 150 %inc = add nsw i64 %i.04, 1 151 %cmp = icmp slt i64 %inc, %N 152 br i1 %cmp, label %for.first, label %for.first.exit 153 154for.first.exit: 155 %add = add nsw i32 %x, 1 156 br label %for.second.guard 157 158for.second.guard: 159 br i1 %cmp.guard, label %for.second.preheader, label %for.end 160 161for.second.preheader: 162 br label %for.second 163 164for.second: 165 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] 166 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02 167 store i32 0, i32* %arrayidx4, align 4 168 %inc6 = add nsw i64 %j.02, 1 169 %cmp.j = icmp slt i64 %inc6, %N 170 br i1 %cmp.j, label %for.second, label %for.second.exit 171 172for.second.exit: 173 br label %for.end 174 175for.end: 176 ret void 177} 178 179; Test that `%add` is moved in for.first.guard, and the two loops for.first 180; and for.second are fused. 181 182; CHECK: void @moveinsts_guardblock 183; CHECK-LABEL: for.first.guard: 184; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N 185; CHECK-NEXT: %add = add nsw i32 %x, 1 186; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end 187; CHECK-LABEL: for.first.preheader: 188; CHECK-NEXT: br label %for.first 189; CHECK-LABEL: for.first: 190; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit 191; CHECK-LABEL: for.second.exit: 192; CHECK-NEXT: br label %for.end 193; CHECK-LABEL: for.end: 194; CHECK-NEXT: ret void 195define void @moveinsts_guardblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { 196for.first.guard: 197 %cmp.guard = icmp slt i64 0, %N 198 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard 199 200for.first.preheader: 201 br label %for.first 202 203for.first: 204 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] 205 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04 206 store i32 0, i32* %arrayidx, align 4 207 %inc = add nsw i64 %i.04, 1 208 %cmp = icmp slt i64 %inc, %N 209 br i1 %cmp, label %for.first, label %for.first.exit 210 211for.first.exit: 212 br label %for.second.guard 213 214for.second.guard: 215 %add = add nsw i32 %x, 1 216 br i1 %cmp.guard, label %for.second.preheader, label %for.end 217 218for.second.preheader: 219 br label %for.second 220 221for.second: 222 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] 223 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02 224 store i32 0, i32* %arrayidx4, align 4 225 %inc6 = add nsw i64 %j.02, 1 226 %cmp.j = icmp slt i64 %inc6, %N 227 br i1 %cmp.j, label %for.second, label %for.second.exit 228 229for.second.exit: 230 br label %for.end 231 232for.end: 233 ret void 234} 235 236; Test that the incoming block of `%j.lcssa` is updated correctly 237; from for.second.guard to for.first.guard, and the two loops for.first and 238; for.second are fused. 239 240; CHECK: i64 @updatephi_guardnonloopblock 241; CHECK-LABEL: for.first.guard: 242; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N 243; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end 244; CHECK-LABEL: for.first.preheader: 245; CHECK-NEXT: br label %for.first 246; CHECK-LABEL: for.first: 247; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit 248; CHECK-LABEL: for.second.exit: 249; CHECK-NEXT: br label %for.end 250; CHECK-LABEL: for.end: 251; CHECK-NEXT: %j.lcssa = phi i64 [ 0, %for.first.guard ], [ %j.02, %for.second.exit ] 252; CHECK-NEXT: ret i64 %j.lcssa 253 254define i64 @updatephi_guardnonloopblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { 255for.first.guard: 256 %cmp.guard = icmp slt i64 0, %N 257 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard 258 259for.first.preheader: 260 br label %for.first 261 262for.first: 263 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] 264 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04 265 store i32 0, i32* %arrayidx, align 4 266 %inc = add nsw i64 %i.04, 1 267 %cmp = icmp slt i64 %inc, %N 268 br i1 %cmp, label %for.first, label %for.first.exit 269 270for.first.exit: 271 br label %for.second.guard 272 273for.second.guard: 274 br i1 %cmp.guard, label %for.second.preheader, label %for.end 275 276for.second.preheader: 277 br label %for.second 278 279for.second: 280 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] 281 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02 282 store i32 0, i32* %arrayidx4, align 4 283 %inc6 = add nsw i64 %j.02, 1 284 %cmp.j = icmp slt i64 %inc6, %N 285 br i1 %cmp.j, label %for.second, label %for.second.exit 286 287for.second.exit: 288 br label %for.end 289 290for.end: 291 %j.lcssa = phi i64 [ 0, %for.second.guard ], [ %j.02, %for.second.exit ] 292 ret i64 %j.lcssa 293} 294