1; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 2; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s 3; 4; Run loop unrolling twice to verify that loop unrolling metadata is properly 5; removed and further unrolling is disabled after the pass is run once. 6 7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 8target triple = "x86_64-unknown-linux-gnu" 9 10; loop4 contains a small loop which should be completely unrolled by 11; the default unrolling heuristics. It serves as a control for the 12; unroll(disable) pragma test loop4_with_disable. 13; 14; CHECK-LABEL: @loop4( 15; CHECK-NOT: br i1 16define void @loop4(i32* nocapture %a) { 17entry: 18 br label %for.body 19 20for.body: ; preds = %for.body, %entry 21 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 22 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 23 %0 = load i32, i32* %arrayidx, align 4 24 %inc = add nsw i32 %0, 1 25 store i32 %inc, i32* %arrayidx, align 4 26 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 27 %exitcond = icmp eq i64 %indvars.iv.next, 4 28 br i1 %exitcond, label %for.end, label %for.body 29 30for.end: ; preds = %for.body 31 ret void 32} 33 34; #pragma clang loop unroll(disable) 35; 36; CHECK-LABEL: @loop4_with_disable( 37; CHECK: store i32 38; CHECK-NOT: store i32 39; CHECK: br i1 40define void @loop4_with_disable(i32* nocapture %a) { 41entry: 42 br label %for.body 43 44for.body: ; preds = %for.body, %entry 45 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 46 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 47 %0 = load i32, i32* %arrayidx, align 4 48 %inc = add nsw i32 %0, 1 49 store i32 %inc, i32* %arrayidx, align 4 50 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 51 %exitcond = icmp eq i64 %indvars.iv.next, 4 52 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 53 54for.end: ; preds = %for.body 55 ret void 56} 57!1 = !{!1, !2} 58!2 = !{!"llvm.loop.unroll.disable"} 59 60; loop64 has a high enough count that it should *not* be unrolled by 61; the default unrolling heuristic. It serves as the control for the 62; unroll(full) pragma test loop64_with_.* tests below. 63; 64; CHECK-LABEL: @loop64( 65; CHECK: store i32 66; CHECK-NOT: store i32 67; CHECK: br i1 68define void @loop64(i32* nocapture %a) { 69entry: 70 br label %for.body 71 72for.body: ; preds = %for.body, %entry 73 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 74 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 75 %0 = load i32, i32* %arrayidx, align 4 76 %inc = add nsw i32 %0, 1 77 store i32 %inc, i32* %arrayidx, align 4 78 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 79 %exitcond = icmp eq i64 %indvars.iv.next, 64 80 br i1 %exitcond, label %for.end, label %for.body 81 82for.end: ; preds = %for.body 83 ret void 84} 85 86; #pragma clang loop unroll(full) 87; Loop should be fully unrolled. 88; 89; CHECK-LABEL: @loop64_with_full( 90; CHECK-NOT: br i1 91define void @loop64_with_full(i32* nocapture %a) { 92entry: 93 br label %for.body 94 95for.body: ; preds = %for.body, %entry 96 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 97 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 98 %0 = load i32, i32* %arrayidx, align 4 99 %inc = add nsw i32 %0, 1 100 store i32 %inc, i32* %arrayidx, align 4 101 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 102 %exitcond = icmp eq i64 %indvars.iv.next, 64 103 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 104 105for.end: ; preds = %for.body 106 ret void 107} 108!3 = !{!3, !4} 109!4 = !{!"llvm.loop.unroll.full"} 110 111; #pragma clang loop unroll_count(4) 112; Loop should be unrolled 4 times. 113; 114; CHECK-LABEL: @loop64_with_count4( 115; CHECK: store i32 116; CHECK: store i32 117; CHECK: store i32 118; CHECK: store i32 119; CHECK-NOT: store i32 120; CHECK: br i1 121define void @loop64_with_count4(i32* nocapture %a) { 122entry: 123 br label %for.body 124 125for.body: ; preds = %for.body, %entry 126 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 127 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 128 %0 = load i32, i32* %arrayidx, align 4 129 %inc = add nsw i32 %0, 1 130 store i32 %inc, i32* %arrayidx, align 4 131 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 132 %exitcond = icmp eq i64 %indvars.iv.next, 64 133 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 134 135for.end: ; preds = %for.body 136 ret void 137} 138!5 = !{!5, !6} 139!6 = !{!"llvm.loop.unroll.count", i32 4} 140 141; #pragma clang loop unroll(full) 142; Full unrolling is requested, but loop has a runtime trip count so 143; no unrolling should occur. 144; 145; CHECK-LABEL: @runtime_loop_with_full( 146; CHECK: store i32 147; CHECK-NOT: store i32 148define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { 149entry: 150 %cmp3 = icmp sgt i32 %b, 0 151 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 152 153for.body: ; preds = %entry, %for.body 154 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 155 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 156 %0 = load i32, i32* %arrayidx, align 4 157 %inc = add nsw i32 %0, 1 158 store i32 %inc, i32* %arrayidx, align 4 159 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 160 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 161 %exitcond = icmp eq i32 %lftr.wideiv, %b 162 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 163 164for.end: ; preds = %for.body, %entry 165 ret void 166} 167!8 = !{!8, !4} 168 169; #pragma clang loop unroll_count(4) 170; Loop has a runtime trip count. Runtime unrolling should occur and loop 171; should be duplicated (original and 4x unrolled). 172; 173; CHECK-LABEL: @runtime_loop_with_count4( 174; CHECK: for.body.prol: 175; CHECK: store 176; CHECK-NOT: store 177; CHECK: br i1 178; CHECK: for.body 179; CHECK: store 180; CHECK: store 181; CHECK: store 182; CHECK: store 183; CHECK-NOT: store 184; CHECK: br i1 185define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { 186entry: 187 %cmp3 = icmp sgt i32 %b, 0 188 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 189 190for.body: ; preds = %entry, %for.body 191 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 192 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 193 %0 = load i32, i32* %arrayidx, align 4 194 %inc = add nsw i32 %0, 1 195 store i32 %inc, i32* %arrayidx, align 4 196 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 197 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 198 %exitcond = icmp eq i32 %lftr.wideiv, %b 199 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 200 201for.end: ; preds = %for.body, %entry 202 ret void 203} 204!9 = !{!9, !6} 205 206; #pragma clang loop unroll_count(1) 207; Loop should not be unrolled 208; 209; CHECK-LABEL: @unroll_1( 210; CHECK: store i32 211; CHECK-NOT: store i32 212; CHECK: br i1 213define void @unroll_1(i32* nocapture %a, i32 %b) { 214entry: 215 br label %for.body 216 217for.body: ; preds = %for.body, %entry 218 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 219 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 220 %0 = load i32, i32* %arrayidx, align 4 221 %inc = add nsw i32 %0, 1 222 store i32 %inc, i32* %arrayidx, align 4 223 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 224 %exitcond = icmp eq i64 %indvars.iv.next, 4 225 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 226 227for.end: ; preds = %for.body 228 ret void 229} 230!10 = !{!10, !11} 231!11 = !{!"llvm.loop.unroll.count", i32 1} 232 233; #pragma clang loop unroll(full) 234; Loop has very high loop count (1 million) and full unrolling was requested. 235; Loop should unrolled up to the pragma threshold, but not completely. 236; 237; CHECK-LABEL: @unroll_1M( 238; CHECK: store i32 239; CHECK: store i32 240; CHECK: br i1 241define void @unroll_1M(i32* nocapture %a, i32 %b) { 242entry: 243 br label %for.body 244 245for.body: ; preds = %for.body, %entry 246 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 247 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 248 %0 = load i32, i32* %arrayidx, align 4 249 %inc = add nsw i32 %0, 1 250 store i32 %inc, i32* %arrayidx, align 4 251 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 252 %exitcond = icmp eq i64 %indvars.iv.next, 1000000 253 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 254 255for.end: ; preds = %for.body 256 ret void 257} 258!12 = !{!12, !4} 259 260; #pragma clang loop unroll(enable) 261; Loop should be fully unrolled. 262; 263; CHECK-LABEL: @loop64_with_enable( 264; CHECK-NOT: br i1 265define void @loop64_with_enable(i32* nocapture %a) { 266entry: 267 br label %for.body 268 269for.body: ; preds = %for.body, %entry 270 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 271 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 272 %0 = load i32, i32* %arrayidx, align 4 273 %inc = add nsw i32 %0, 1 274 store i32 %inc, i32* %arrayidx, align 4 275 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 276 %exitcond = icmp eq i64 %indvars.iv.next, 64 277 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 278 279for.end: ; preds = %for.body 280 ret void 281} 282!13 = !{!13, !14} 283!14 = !{!"llvm.loop.unroll.enable"} 284 285; #pragma clang loop unroll(enable) 286; Loop has a runtime trip count and should be runtime unrolled and duplicated 287; (original and 8x). 288; 289; CHECK-LABEL: @runtime_loop_with_enable( 290; CHECK: for.body.prol: 291; CHECK: store 292; CHECK-NOT: store 293; CHECK: br i1 294; CHECK: for.body: 295; CHECK: store i32 296; CHECK: store i32 297; CHECK: store i32 298; CHECK: store i32 299; CHECK: store i32 300; CHECK: store i32 301; CHECK: store i32 302; CHECK: store i32 303; CHECK-NOT: store i32 304; CHECK: br i1 305define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { 306entry: 307 %cmp3 = icmp sgt i32 %b, 0 308 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 309 310for.body: ; preds = %entry, %for.body 311 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 312 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 313 %0 = load i32, i32* %arrayidx, align 4 314 %inc = add nsw i32 %0, 1 315 store i32 %inc, i32* %arrayidx, align 4 316 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 317 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 318 %exitcond = icmp eq i32 %lftr.wideiv, %b 319 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 320 321for.end: ; preds = %for.body, %entry 322 ret void 323} 324!15 = !{!15, !14} 325