1; RUN: opt -S -loop-sink < %s | FileCheck %s 2; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s 3 4@g = global i32 0, align 4 5 6; b1 7; / \ 8; b2 b6 9; / \ | 10; b3 b4 | 11; \ / | 12; b5 | 13; \ / 14; b7 15; preheader: 1000 16; b2: 15 17; b3: 7 18; b4: 7 19; Sink load to b2 20; CHECK: t1 21; CHECK: .b2: 22; CHECK: load i32, i32* @g 23; CHECK: .b3: 24; CHECK-NOT: load i32, i32* @g 25define i32 @t1(i32, i32) #0 !prof !0 { 26 %3 = icmp eq i32 %1, 0 27 br i1 %3, label %.exit, label %.preheader 28 29.preheader: 30 %invariant = load i32, i32* @g 31 br label %.b1 32 33.b1: 34 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 35 %c1 = icmp sgt i32 %iv, %0 36 br i1 %c1, label %.b2, label %.b6, !prof !1 37 38.b2: 39 %c2 = icmp sgt i32 %iv, 1 40 br i1 %c2, label %.b3, label %.b4 41 42.b3: 43 %t3 = sub nsw i32 %invariant, %iv 44 br label %.b5 45 46.b4: 47 %t4 = add nsw i32 %invariant, %iv 48 br label %.b5 49 50.b5: 51 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 52 %t5 = mul nsw i32 %p5, 5 53 br label %.b7 54 55.b6: 56 %t6 = add nsw i32 %iv, 100 57 br label %.b7 58 59.b7: 60 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 61 %t7 = add nuw nsw i32 %iv, 1 62 %c7 = icmp eq i32 %t7, %p7 63 br i1 %c7, label %.b1, label %.exit, !prof !3 64 65.exit: 66 ret i32 10 67} 68 69; b1 70; / \ 71; b2 b6 72; / \ | 73; b3 b4 | 74; \ / | 75; b5 | 76; \ / 77; b7 78; preheader: 500 79; b1: 16016 80; b3: 8 81; b6: 8 82; Sink load to b3 and b6 83; CHECK: t2 84; CHECK: .preheader: 85; CHECK-NOT: load i32, i32* @g 86; CHECK: .b3: 87; CHECK: load i32, i32* @g 88; CHECK: .b4: 89; CHECK: .b6: 90; CHECK: load i32, i32* @g 91; CHECK: .b7: 92define i32 @t2(i32, i32) #0 !prof !0 { 93 %3 = icmp eq i32 %1, 0 94 br i1 %3, label %.exit, label %.preheader 95 96.preheader: 97 %invariant = load i32, i32* @g 98 br label %.b1 99 100.b1: 101 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 102 %c1 = icmp sgt i32 %iv, %0 103 br i1 %c1, label %.b2, label %.b6, !prof !2 104 105.b2: 106 %c2 = icmp sgt i32 %iv, 1 107 br i1 %c2, label %.b3, label %.b4, !prof !1 108 109.b3: 110 %t3 = sub nsw i32 %invariant, %iv 111 br label %.b5 112 113.b4: 114 %t4 = add nsw i32 5, %iv 115 br label %.b5 116 117.b5: 118 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 119 %t5 = mul nsw i32 %p5, 5 120 br label %.b7 121 122.b6: 123 %t6 = add nsw i32 %iv, %invariant 124 br label %.b7 125 126.b7: 127 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 128 %t7 = add nuw nsw i32 %iv, 1 129 %c7 = icmp eq i32 %t7, %p7 130 br i1 %c7, label %.b1, label %.exit, !prof !3 131 132.exit: 133 ret i32 10 134} 135 136; b1 137; / \ 138; b2 b6 139; / \ | 140; b3 b4 | 141; \ / | 142; b5 | 143; \ / 144; b7 145; preheader: 500 146; b3: 8 147; b5: 16008 148; Do not sink load from preheader. 149; CHECK: t3 150; CHECK: .preheader: 151; CHECK: load i32, i32* @g 152; CHECK: .b1: 153; CHECK-NOT: load i32, i32* @g 154define i32 @t3(i32, i32) #0 !prof !0 { 155 %3 = icmp eq i32 %1, 0 156 br i1 %3, label %.exit, label %.preheader 157 158.preheader: 159 %invariant = load i32, i32* @g 160 br label %.b1 161 162.b1: 163 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 164 %c1 = icmp sgt i32 %iv, %0 165 br i1 %c1, label %.b2, label %.b6, !prof !2 166 167.b2: 168 %c2 = icmp sgt i32 %iv, 1 169 br i1 %c2, label %.b3, label %.b4, !prof !1 170 171.b3: 172 %t3 = sub nsw i32 %invariant, %iv 173 br label %.b5 174 175.b4: 176 %t4 = add nsw i32 5, %iv 177 br label %.b5 178 179.b5: 180 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 181 %t5 = mul nsw i32 %p5, %invariant 182 br label %.b7 183 184.b6: 185 %t6 = add nsw i32 %iv, 5 186 br label %.b7 187 188.b7: 189 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 190 %t7 = add nuw nsw i32 %iv, 1 191 %c7 = icmp eq i32 %t7, %p7 192 br i1 %c7, label %.b1, label %.exit, !prof !3 193 194.exit: 195 ret i32 10 196} 197 198; For single-BB loop with <=1 avg trip count, sink load to b1 199; CHECK: t4 200; CHECK: .preheader: 201; CHECK-NOT: load i32, i32* @g 202; CHECK: .b1: 203; CHECK: load i32, i32* @g 204; CHECK: .exit: 205define i32 @t4(i32, i32) #0 !prof !0 { 206.preheader: 207 %invariant = load i32, i32* @g 208 br label %.b1 209 210.b1: 211 %iv = phi i32 [ %t1, %.b1 ], [ 0, %.preheader ] 212 %t1 = add nsw i32 %invariant, %iv 213 %c1 = icmp sgt i32 %iv, %0 214 br i1 %c1, label %.b1, label %.exit, !prof !1 215 216.exit: 217 ret i32 10 218} 219 220; b1 221; / \ 222; b2 b6 223; / \ | 224; b3 b4 | 225; \ / | 226; b5 | 227; \ / 228; b7 229; preheader: 1000 230; b2: 15 231; b3: 7 232; b4: 7 233; There is alias store in loop, do not sink load 234; CHECK: t5 235; CHECK: .preheader: 236; CHECK: load i32, i32* @g 237; CHECK: .b1: 238; CHECK-NOT: load i32, i32* @g 239define i32 @t5(i32, i32*) #0 !prof !0 { 240 %3 = icmp eq i32 %0, 0 241 br i1 %3, label %.exit, label %.preheader 242 243.preheader: 244 %invariant = load i32, i32* @g 245 br label %.b1 246 247.b1: 248 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 249 %c1 = icmp sgt i32 %iv, %0 250 br i1 %c1, label %.b2, label %.b6, !prof !1 251 252.b2: 253 %c2 = icmp sgt i32 %iv, 1 254 br i1 %c2, label %.b3, label %.b4 255 256.b3: 257 %t3 = sub nsw i32 %invariant, %iv 258 br label %.b5 259 260.b4: 261 %t4 = add nsw i32 %invariant, %iv 262 br label %.b5 263 264.b5: 265 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 266 %t5 = mul nsw i32 %p5, 5 267 br label %.b7 268 269.b6: 270 %t6 = call i32 @foo() 271 br label %.b7 272 273.b7: 274 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 275 %t7 = add nuw nsw i32 %iv, 1 276 %c7 = icmp eq i32 %t7, %p7 277 br i1 %c7, label %.b1, label %.exit, !prof !3 278 279.exit: 280 ret i32 10 281} 282 283; b1 284; / \ 285; b2 b6 286; / \ | 287; b3 b4 | 288; \ / | 289; b5 | 290; \ / 291; b7 292; preheader: 1000 293; b2: 15 294; b3: 7 295; b4: 7 296; Regardless of aliasing store in loop this load from constant memory can be sunk. 297; CHECK: t5_const_memory 298; CHECK: .preheader: 299; CHECK-NOT: load i32, i32* @g_const 300; CHECK: .b2: 301; CHECK: load i32, i32* @g_const 302; CHECK: br i1 %c2, label %.b3, label %.b4 303define i32 @t5_const_memory(i32, i32*) #0 !prof !0 { 304 %3 = icmp eq i32 %0, 0 305 br i1 %3, label %.exit, label %.preheader 306 307.preheader: 308 %invariant = load i32, i32* @g_const 309 br label %.b1 310 311.b1: 312 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 313 %c1 = icmp sgt i32 %iv, %0 314 br i1 %c1, label %.b2, label %.b6, !prof !1 315 316.b2: 317 %c2 = icmp sgt i32 %iv, 1 318 br i1 %c2, label %.b3, label %.b4 319 320.b3: 321 %t3 = sub nsw i32 %invariant, %iv 322 br label %.b5 323 324.b4: 325 %t4 = add nsw i32 %invariant, %iv 326 br label %.b5 327 328.b5: 329 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 330 %t5 = mul nsw i32 %p5, 5 331 br label %.b7 332 333.b6: 334 %t6 = call i32 @foo() 335 br label %.b7 336 337.b7: 338 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 339 %t7 = add nuw nsw i32 %iv, 1 340 %c7 = icmp eq i32 %t7, %p7 341 br i1 %c7, label %.b1, label %.exit, !prof !3 342 343.exit: 344 ret i32 10 345} 346 347; b1 348; / \ 349; b2 b3 350; \ / 351; b4 352; preheader: 1000 353; b2: 15 354; b3: 7 355; Do not sink unordered atomic load to b2 356; CHECK: t6 357; CHECK: .preheader: 358; CHECK: load atomic i32, i32* @g unordered, align 4 359; CHECK: .b2: 360; CHECK-NOT: load atomic i32, i32* @g unordered, align 4 361define i32 @t6(i32, i32) #0 !prof !0 { 362 %3 = icmp eq i32 %1, 0 363 br i1 %3, label %.exit, label %.preheader 364 365.preheader: 366 %invariant = load atomic i32, i32* @g unordered, align 4 367 br label %.b1 368 369.b1: 370 %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] 371 %c1 = icmp sgt i32 %iv, %0 372 br i1 %c1, label %.b2, label %.b3, !prof !1 373 374.b2: 375 %t1 = add nsw i32 %invariant, %iv 376 br label %.b4 377 378.b3: 379 %t2 = add nsw i32 %iv, 100 380 br label %.b4 381 382.b4: 383 %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] 384 %t3 = add nuw nsw i32 %iv, 1 385 %c2 = icmp eq i32 %t3, %p1 386 br i1 %c2, label %.b1, label %.exit, !prof !3 387 388.exit: 389 ret i32 10 390} 391 392@g_const = constant i32 0, align 4 393 394; b1 395; / \ 396; b2 b3 397; \ / 398; b4 399; preheader: 1000 400; b2: 0.5 401; b3: 999.5 402; Sink unordered atomic load to b2. It is allowed to sink into loop unordered 403; load from constant. 404; CHECK: t7 405; CHECK: .preheader: 406; CHECK-NOT: load atomic i32, i32* @g_const unordered, align 4 407; CHECK: .b2: 408; CHECK: load atomic i32, i32* @g_const unordered, align 4 409define i32 @t7(i32, i32) #0 !prof !0 { 410 %3 = icmp eq i32 %1, 0 411 br i1 %3, label %.exit, label %.preheader 412 413.preheader: 414 %invariant = load atomic i32, i32* @g_const unordered, align 4 415 br label %.b1 416 417.b1: 418 %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] 419 %c1 = icmp sgt i32 %iv, %0 420 br i1 %c1, label %.b2, label %.b3, !prof !1 421 422.b2: 423 %t1 = add nsw i32 %invariant, %iv 424 br label %.b4 425 426.b3: 427 %t2 = add nsw i32 %iv, 100 428 br label %.b4 429 430.b4: 431 %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] 432 %t3 = add nuw nsw i32 %iv, 1 433 %c2 = icmp eq i32 %t3, %p1 434 br i1 %c2, label %.b1, label %.exit, !prof !3 435 436.exit: 437 ret i32 10 438} 439 440declare i32 @foo() 441 442!0 = !{!"function_entry_count", i64 1} 443!1 = !{!"branch_weights", i32 1, i32 2000} 444!2 = !{!"branch_weights", i32 2000, i32 1} 445!3 = !{!"branch_weights", i32 100, i32 1} 446