1; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s 2; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s 3; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s 4; 5; Run loop unrolling twice to verify that loop unrolling metadata is properly 6; removed and further unrolling is disabled after the pass is run once. 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-linux-gnu" 10 11; loop4 contains a small loop which should be completely unrolled by 12; the default unrolling heuristics. It serves as a control for the 13; unroll(disable) pragma test loop4_with_disable. 14; 15; CHECK-LABEL: @loop4( 16; CHECK-NOT: br i1 17define void @loop4(i32* nocapture %a) { 18entry: 19 br label %for.body 20 21for.body: ; preds = %for.body, %entry 22 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 23 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 24 %0 = load i32, i32* %arrayidx, align 4 25 %inc = add nsw i32 %0, 1 26 store i32 %inc, i32* %arrayidx, align 4 27 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 28 %exitcond = icmp eq i64 %indvars.iv.next, 4 29 br i1 %exitcond, label %for.end, label %for.body 30 31for.end: ; preds = %for.body 32 ret void 33} 34 35; #pragma clang loop unroll(disable) 36; 37; CHECK-LABEL: @loop4_with_disable( 38; CHECK: store i32 39; CHECK-NOT: store i32 40; CHECK: br i1 41define void @loop4_with_disable(i32* nocapture %a) { 42entry: 43 br label %for.body 44 45for.body: ; preds = %for.body, %entry 46 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 47 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 48 %0 = load i32, i32* %arrayidx, align 4 49 %inc = add nsw i32 %0, 1 50 store i32 %inc, i32* %arrayidx, align 4 51 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 52 %exitcond = icmp eq i64 %indvars.iv.next, 4 53 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 54 55for.end: ; preds = %for.body 56 ret void 57} 58!1 = !{!1, !2} 59!2 = !{!"llvm.loop.unroll.disable"} 60 61; loop64 has a high enough count that it should *not* be unrolled by 62; the default unrolling heuristic. It serves as the control for the 63; unroll(full) pragma test loop64_with_.* tests below. 64; 65; CHECK-LABEL: @loop64( 66; CHECK: store i32 67; CHECK-NOT: store i32 68; CHECK: br i1 69define void @loop64(i32* nocapture %a) { 70entry: 71 br label %for.body 72 73for.body: ; preds = %for.body, %entry 74 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 75 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 76 %0 = load i32, i32* %arrayidx, align 4 77 %inc = add nsw i32 %0, 1 78 store i32 %inc, i32* %arrayidx, align 4 79 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 80 %exitcond = icmp eq i64 %indvars.iv.next, 64 81 br i1 %exitcond, label %for.end, label %for.body 82 83for.end: ; preds = %for.body 84 ret void 85} 86 87; #pragma clang loop unroll(full) 88; Loop should be fully unrolled. 89; 90; CHECK-LABEL: @loop64_with_full( 91; CHECK-NOT: br i1 92define void @loop64_with_full(i32* nocapture %a) { 93entry: 94 br label %for.body 95 96for.body: ; preds = %for.body, %entry 97 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 98 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 99 %0 = load i32, i32* %arrayidx, align 4 100 %inc = add nsw i32 %0, 1 101 store i32 %inc, i32* %arrayidx, align 4 102 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 103 %exitcond = icmp eq i64 %indvars.iv.next, 64 104 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 105 106for.end: ; preds = %for.body 107 ret void 108} 109!3 = !{!3, !4} 110!4 = !{!"llvm.loop.unroll.full"} 111 112; #pragma clang loop unroll_count(4) 113; Loop should be unrolled 4 times. 114; 115; CHECK-LABEL: @loop64_with_count4( 116; CHECK: store i32 117; CHECK: store i32 118; CHECK: store i32 119; CHECK: store i32 120; CHECK-NOT: store i32 121; CHECK: br i1 122define void @loop64_with_count4(i32* nocapture %a) { 123entry: 124 br label %for.body 125 126for.body: ; preds = %for.body, %entry 127 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 128 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 129 %0 = load i32, i32* %arrayidx, align 4 130 %inc = add nsw i32 %0, 1 131 store i32 %inc, i32* %arrayidx, align 4 132 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 133 %exitcond = icmp eq i64 %indvars.iv.next, 64 134 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 135 136for.end: ; preds = %for.body 137 ret void 138} 139!5 = !{!5, !6} 140!6 = !{!"llvm.loop.unroll.count", i32 4} 141 142; #pragma clang loop unroll(full) 143; Full unrolling is requested, but loop has a runtime trip count so 144; no unrolling should occur. 145; 146; CHECK-LABEL: @runtime_loop_with_full( 147; CHECK: store i32 148; CHECK-NOT: store i32 149define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { 150entry: 151 %cmp3 = icmp sgt i32 %b, 0 152 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 153 154for.body: ; preds = %entry, %for.body 155 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 156 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 157 %0 = load i32, i32* %arrayidx, align 4 158 %inc = add nsw i32 %0, 1 159 store i32 %inc, i32* %arrayidx, align 4 160 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 161 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 162 %exitcond = icmp eq i32 %lftr.wideiv, %b 163 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 164 165for.end: ; preds = %for.body, %entry 166 ret void 167} 168!8 = !{!8, !4} 169 170; #pragma clang loop unroll_count(4) 171; Loop has a runtime trip count. Runtime unrolling should occur and loop 172; should be duplicated (original and 4x unrolled) if remainder is allowed, 173; otherwise loop should not be unrolled. 174; 175; CHECK-LABEL: @runtime_loop_with_count4( 176; CHECK: for.body 177; CHECK: store 178; REM: store 179; REM: store 180; REM: store 181; CHECK-NOT: store 182; CHECK: br i1 183; REM: for.body.epil: 184; REM: store 185; NOREM-NOT: for.body.epil: 186; NOREM-NOT: store 187; CHECK-NOT: store 188; REM: br i1 189; NOREM-NOT: br i1 190define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { 191entry: 192 %cmp3 = icmp sgt i32 %b, 0 193 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 194 195for.body: ; preds = %entry, %for.body 196 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 197 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 198 %0 = load i32, i32* %arrayidx, align 4 199 %inc = add nsw i32 %0, 1 200 store i32 %inc, i32* %arrayidx, align 4 201 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 202 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 203 %exitcond = icmp eq i32 %lftr.wideiv, %b 204 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 205 206for.end: ; preds = %for.body, %entry 207 ret void 208} 209!9 = !{!9, !6} 210 211; #pragma clang loop unroll_count(1) 212; Loop should not be unrolled 213; 214; CHECK-LABEL: @unroll_1( 215; CHECK: store i32 216; CHECK-NOT: store i32 217; CHECK: br i1 218define void @unroll_1(i32* nocapture %a, i32 %b) { 219entry: 220 br label %for.body 221 222for.body: ; preds = %for.body, %entry 223 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 224 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 225 %0 = load i32, i32* %arrayidx, align 4 226 %inc = add nsw i32 %0, 1 227 store i32 %inc, i32* %arrayidx, align 4 228 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 229 %exitcond = icmp eq i64 %indvars.iv.next, 4 230 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 231 232for.end: ; preds = %for.body 233 ret void 234} 235!10 = !{!10, !11} 236!11 = !{!"llvm.loop.unroll.count", i32 1} 237 238; #pragma clang loop unroll(full) 239; Loop has very high loop count (1 million) and full unrolling was requested. 240; Loop should unrolled up to the pragma threshold, but not completely. 241; 242; CHECK-LABEL: @unroll_1M( 243; CHECK: store i32 244; CHECK: store i32 245; CHECK: br i1 246define void @unroll_1M(i32* nocapture %a, i32 %b) { 247entry: 248 br label %for.body 249 250for.body: ; preds = %for.body, %entry 251 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 252 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 253 %0 = load i32, i32* %arrayidx, align 4 254 %inc = add nsw i32 %0, 1 255 store i32 %inc, i32* %arrayidx, align 4 256 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 257 %exitcond = icmp eq i64 %indvars.iv.next, 1000000 258 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 259 260for.end: ; preds = %for.body 261 ret void 262} 263!12 = !{!12, !4} 264 265; #pragma clang loop unroll(enable) 266; Loop should be fully unrolled. 267; 268; CHECK-LABEL: @loop64_with_enable( 269; CHECK-NOT: br i1 270define void @loop64_with_enable(i32* nocapture %a) { 271entry: 272 br label %for.body 273 274for.body: ; preds = %for.body, %entry 275 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 276 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 277 %0 = load i32, i32* %arrayidx, align 4 278 %inc = add nsw i32 %0, 1 279 store i32 %inc, i32* %arrayidx, align 4 280 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 281 %exitcond = icmp eq i64 %indvars.iv.next, 64 282 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 283 284for.end: ; preds = %for.body 285 ret void 286} 287!13 = !{!13, !14} 288!14 = !{!"llvm.loop.unroll.enable"} 289 290; #pragma clang loop unroll(enable) 291; Loop has a runtime trip count and should be runtime unrolled and duplicated 292; (original and 8x) if remainder is allowed, otherwise it should not be 293; unrolled. 294; 295; CHECK-LABEL: @runtime_loop_with_enable( 296; CHECK: for.body: 297; CHECK: store i32 298; REM: store i32 299; REM: store i32 300; REM: store i32 301; REM: store i32 302; REM: store i32 303; REM: store i32 304; REM: store i32 305; CHECK-NOT: store i32 306; CHECK: br i1 307; REM: for.body.epil: 308; NOREM-NOT: for.body.epil: 309; REM: store 310; CHECK-NOT: store 311; REM: br i1 312; NOREM-NOT: br i1 313define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { 314entry: 315 %cmp3 = icmp sgt i32 %b, 0 316 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 317 318for.body: ; preds = %entry, %for.body 319 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 320 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 321 %0 = load i32, i32* %arrayidx, align 4 322 %inc = add nsw i32 %0, 1 323 store i32 %inc, i32* %arrayidx, align 4 324 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 325 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 326 %exitcond = icmp eq i32 %lftr.wideiv, %b 327 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 328 329for.end: ; preds = %for.body, %entry 330 ret void 331} 332!15 = !{!15, !14} 333 334; #pragma clang loop unroll_count(3) 335; Loop has a runtime trip count. Runtime unrolling should occur and loop 336; should be duplicated (original and 3x unrolled) if remainder is allowed, 337; otherwise it should not be unrolled. 338; 339; CHECK-LABEL: @runtime_loop_with_count3( 340; CHECK: for.body 341; CHECK: store 342; REM: store 343; REM: store 344; CHECK-NOT: store 345; CHECK: br i1 346; REM: for.body.epil: 347; REM: store 348; NOREM-NOT: for.body.epil: 349; NOREM-NOT: store 350; CHECK-NOT: store 351; REM: br i1 352define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) { 353entry: 354 %cmp3 = icmp sgt i32 %b, 0 355 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16 356 357for.body: ; preds = %entry, %for.body 358 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 359 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 360 %0 = load i32, i32* %arrayidx, align 4 361 %inc = add nsw i32 %0, 1 362 store i32 %inc, i32* %arrayidx, align 4 363 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 364 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 365 %exitcond = icmp eq i32 %lftr.wideiv, %b 366 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16 367 368for.end: ; preds = %for.body, %entry 369 ret void 370} 371!16 = !{!16, !17} 372!17 = !{!"llvm.loop.unroll.count", i32 3} 373