1; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime < %s -S | FileCheck %s 2; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-and-jam-threshold=15 < %s -S | FileCheck %s --check-prefix=CHECK-LOWTHRES 3 4target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" 5 6; CHECK-LABEL: test1 7; Basic check that these loops are by default UnJ'd 8define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 9; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ] 10; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 11entry: 12 %cmp = icmp ne i32 %J, 0 13 %cmp122 = icmp ne i32 %I, 0 14 %or.cond = and i1 %cmp, %cmp122 15 br i1 %or.cond, label %for.outer.preheader, label %for.end 16 17for.outer.preheader: 18 br label %for.outer 19 20for.outer: 21 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 22 br label %for.inner 23 24for.inner: 25 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 26 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 27 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 28 %0 = load i32, i32* %arrayidx.us, align 4 29 %add.us = add i32 %0, %sum1.us 30 %inc.us = add nuw i32 %j.us, 1 31 %exitcond = icmp eq i32 %inc.us, %J 32 br i1 %exitcond, label %for.latch, label %for.inner 33 34for.latch: 35 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 36 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 37 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 38 %add8.us = add nuw i32 %i.us, 1 39 %exitcond25 = icmp eq i32 %add8.us, %I 40 br i1 %exitcond25, label %for.end.loopexit, label %for.outer 41 42for.end.loopexit: 43 br label %for.end 44 45for.end: 46 ret void 47} 48 49 50; CHECK-LABEL: nounroll_and_jam 51; #pragma nounroll_and_jam 52define void @nounroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 53; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 54entry: 55 %cmp = icmp ne i32 %J, 0 56 %cmp122 = icmp ne i32 %I, 0 57 %or.cond = and i1 %cmp, %cmp122 58 br i1 %or.cond, label %for.outer.preheader, label %for.end 59 60for.outer.preheader: 61 br label %for.outer 62 63for.outer: 64 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 65 br label %for.inner 66 67for.inner: 68 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 69 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 70 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 71 %0 = load i32, i32* %arrayidx.us, align 4 72 %add.us = add i32 %0, %sum1.us 73 %inc.us = add nuw i32 %j.us, 1 74 %exitcond = icmp eq i32 %inc.us, %J 75 br i1 %exitcond, label %for.latch, label %for.inner 76 77for.latch: 78 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 79 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 80 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 81 %add8.us = add nuw i32 %i.us, 1 82 %exitcond25 = icmp eq i32 %add8.us, %I 83 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !1 84 85for.end.loopexit: 86 br label %for.end 87 88for.end: 89 ret void 90} 91 92 93; CHECK-LABEL: unroll_and_jam_count 94; #pragma unroll_and_jam(8) 95define void @unroll_and_jam_count(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 96; CHECK: %i.us = phi i32 [ %add8.us.7, %for.latch ], [ 0, %for.outer.preheader.new ] 97entry: 98 %cmp = icmp ne i32 %J, 0 99 %cmp122 = icmp ne i32 %I, 0 100 %or.cond = and i1 %cmp, %cmp122 101 br i1 %or.cond, label %for.outer.preheader, label %for.end 102 103for.outer.preheader: 104 br label %for.outer 105 106for.outer: 107 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 108 br label %for.inner 109 110for.inner: 111 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 112 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 113 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 114 %0 = load i32, i32* %arrayidx.us, align 4 115 %add.us = add i32 %0, %sum1.us 116 %inc.us = add nuw i32 %j.us, 1 117 %exitcond = icmp eq i32 %inc.us, %J 118 br i1 %exitcond, label %for.latch, label %for.inner 119 120for.latch: 121 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 122 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 123 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 124 %add8.us = add nuw i32 %i.us, 1 125 %exitcond25 = icmp eq i32 %add8.us, %I 126 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !3 127 128for.end.loopexit: 129 br label %for.end 130 131for.end: 132 ret void 133} 134 135 136; CHECK-LABEL: unroll_and_jam 137; #pragma unroll_and_jam 138define void @unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 139; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ] 140; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ] 141entry: 142 %cmp = icmp ne i32 %J, 0 143 %cmp122 = icmp ne i32 %I, 0 144 %or.cond = and i1 %cmp, %cmp122 145 br i1 %or.cond, label %for.outer.preheader, label %for.end 146 147for.outer.preheader: 148 br label %for.outer 149 150for.outer: 151 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 152 br label %for.inner 153 154for.inner: 155 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 156 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 157 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 158 %0 = load i32, i32* %arrayidx.us, align 4 159 %add.us = add i32 %0, %sum1.us 160 %inc.us = add nuw i32 %j.us, 1 161 %exitcond = icmp eq i32 %inc.us, %J 162 br i1 %exitcond, label %for.latch, label %for.inner 163 164for.latch: 165 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 166 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 167 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 168 %add8.us = add nuw i32 %i.us, 1 169 %exitcond25 = icmp eq i32 %add8.us, %I 170 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !5 171 172for.end.loopexit: 173 br label %for.end 174 175for.end: 176 ret void 177} 178 179 180; CHECK-LABEL: nounroll 181; #pragma nounroll (which we take to mean disable unroll and jam too) 182define void @nounroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 183; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 184entry: 185 %cmp = icmp ne i32 %J, 0 186 %cmp122 = icmp ne i32 %I, 0 187 %or.cond = and i1 %cmp, %cmp122 188 br i1 %or.cond, label %for.outer.preheader, label %for.end 189 190for.outer.preheader: 191 br label %for.outer 192 193for.outer: 194 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 195 br label %for.inner 196 197for.inner: 198 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 199 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 200 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 201 %0 = load i32, i32* %arrayidx.us, align 4 202 %add.us = add i32 %0, %sum1.us 203 %inc.us = add nuw i32 %j.us, 1 204 %exitcond = icmp eq i32 %inc.us, %J 205 br i1 %exitcond, label %for.latch, label %for.inner 206 207for.latch: 208 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 209 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 210 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 211 %add8.us = add nuw i32 %i.us, 1 212 %exitcond25 = icmp eq i32 %add8.us, %I 213 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !7 214 215for.end.loopexit: 216 br label %for.end 217 218for.end: 219 ret void 220} 221 222 223; CHECK-LABEL: unroll 224; #pragma unroll (which we take to mean disable unroll and jam) 225define void @unroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 226; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 227entry: 228 %cmp = icmp ne i32 %J, 0 229 %cmp122 = icmp ne i32 %I, 0 230 %or.cond = and i1 %cmp, %cmp122 231 br i1 %or.cond, label %for.outer.preheader, label %for.end 232 233for.outer.preheader: 234 br label %for.outer 235 236for.outer: 237 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 238 br label %for.inner 239 240for.inner: 241 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 242 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 243 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 244 %0 = load i32, i32* %arrayidx.us, align 4 245 %add.us = add i32 %0, %sum1.us 246 %inc.us = add nuw i32 %j.us, 1 247 %exitcond = icmp eq i32 %inc.us, %J 248 br i1 %exitcond, label %for.latch, label %for.inner 249 250for.latch: 251 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 252 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 253 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 254 %add8.us = add nuw i32 %i.us, 1 255 %exitcond25 = icmp eq i32 %add8.us, %I 256 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !9 257 258for.end.loopexit: 259 br label %for.end 260 261for.end: 262 ret void 263} 264 265 266; CHECK-LABEL: nounroll_plus_unroll_and_jam 267; #pragma clang loop nounroll, unroll_and_jam (which we take to mean do unroll_and_jam) 268define void @nounroll_plus_unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) { 269; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ] 270entry: 271 %cmp = icmp ne i32 %J, 0 272 %cmp122 = icmp ne i32 %I, 0 273 %or.cond = and i1 %cmp, %cmp122 274 br i1 %or.cond, label %for.outer.preheader, label %for.end 275 276for.outer.preheader: 277 br label %for.outer 278 279for.outer: 280 %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ] 281 br label %for.inner 282 283for.inner: 284 %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ] 285 %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ] 286 %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us 287 %0 = load i32, i32* %arrayidx.us, align 4 288 %add.us = add i32 %0, %sum1.us 289 %inc.us = add nuw i32 %j.us, 1 290 %exitcond = icmp eq i32 %inc.us, %J 291 br i1 %exitcond, label %for.latch, label %for.inner 292 293for.latch: 294 %add.us.lcssa = phi i32 [ %add.us, %for.inner ] 295 %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us 296 store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4 297 %add8.us = add nuw i32 %i.us, 1 298 %exitcond25 = icmp eq i32 %add8.us, %I 299 br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !11 300 301for.end.loopexit: 302 br label %for.end 303 304for.end: 305 ret void 306} 307 308 309!1 = distinct !{!1, !2} 310!2 = distinct !{!"llvm.loop.unroll_and_jam.disable"} 311!3 = distinct !{!3, !4} 312!4 = distinct !{!"llvm.loop.unroll_and_jam.count", i32 8} 313!5 = distinct !{!5, !6} 314!6 = distinct !{!"llvm.loop.unroll_and_jam.enable"} 315!7 = distinct !{!7, !8} 316!8 = distinct !{!"llvm.loop.unroll.disable"} 317!9 = distinct !{!9, !10} 318!10 = distinct !{!"llvm.loop.unroll.enable"} 319!11 = distinct !{!11, !8, !6} 320