1; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s 2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3target triple = "x86_64-apple-darwin10.0.0" 4 5define void @test1(i8* %Base, i64 %Size) nounwind ssp { 6bb.nph: ; preds = %entry 7 br label %for.body 8 9for.body: ; preds = %bb.nph, %for.body 10 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 11 %I.0.014 = getelementptr i8* %Base, i64 %indvar 12 store i8 0, i8* %I.0.014, align 1 13 %indvar.next = add i64 %indvar, 1 14 %exitcond = icmp eq i64 %indvar.next, %Size 15 br i1 %exitcond, label %for.end, label %for.body 16 17for.end: ; preds = %for.body, %entry 18 ret void 19; CHECK: @test1 20; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 21; CHECK-NOT: store 22} 23 24; This is a loop that was rotated but where the blocks weren't merged. This 25; shouldn't perturb us. 26define void @test1a(i8* %Base, i64 %Size) nounwind ssp { 27bb.nph: ; preds = %entry 28 br label %for.body 29 30for.body: ; preds = %bb.nph, %for.body 31 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 32 %I.0.014 = getelementptr i8* %Base, i64 %indvar 33 store i8 0, i8* %I.0.014, align 1 34 %indvar.next = add i64 %indvar, 1 35 br label %for.body.cont 36for.body.cont: 37 %exitcond = icmp eq i64 %indvar.next, %Size 38 br i1 %exitcond, label %for.end, label %for.body 39 40for.end: ; preds = %for.body, %entry 41 ret void 42; CHECK: @test1a 43; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 44; CHECK-NOT: store 45} 46 47 48define void @test2(i32* %Base, i64 %Size) nounwind ssp { 49entry: 50 %cmp10 = icmp eq i64 %Size, 0 51 br i1 %cmp10, label %for.end, label %for.body 52 53for.body: ; preds = %entry, %for.body 54 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 55 %add.ptr.i = getelementptr i32* %Base, i64 %i.011 56 store i32 16843009, i32* %add.ptr.i, align 4 57 %inc = add nsw i64 %i.011, 1 58 %exitcond = icmp eq i64 %inc, %Size 59 br i1 %exitcond, label %for.end, label %for.body 60 61for.end: ; preds = %for.body, %entry 62 ret void 63; CHECK: @test2 64; CHECK: br i1 %cmp10, 65; CHECK: %0 = mul i64 %Size, 4 66; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false) 67; CHECK-NOT: store 68} 69 70; This is a case where there is an extra may-aliased store in the loop, we can't 71; promote the memset. 72define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp { 73entry: 74 br label %for.body 75 76for.body: ; preds = %entry, %for.body 77 %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 78 %add.ptr.i = getelementptr i32* %Base, i64 %i.011 79 store i32 16843009, i32* %add.ptr.i, align 4 80 81 store i8 42, i8* %MayAlias 82 %inc = add nsw i64 %i.011, 1 83 %exitcond = icmp eq i64 %inc, %Size 84 br i1 %exitcond, label %for.end, label %for.body 85 86for.end: ; preds = %entry 87 ret void 88; CHECK: @test3 89; CHECK-NOT: memset 90; CHECK: ret void 91} 92 93 94;; TODO: We should be able to promote this memset. Not yet though. 95define void @test4(i8* %Base) nounwind ssp { 96bb.nph: ; preds = %entry 97 %Base100 = getelementptr i8* %Base, i64 1000 98 br label %for.body 99 100for.body: ; preds = %bb.nph, %for.body 101 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 102 %I.0.014 = getelementptr i8* %Base, i64 %indvar 103 store i8 0, i8* %I.0.014, align 1 104 105 ;; Store beyond the range memset, should be safe to promote. 106 store i8 42, i8* %Base100 107 108 %indvar.next = add i64 %indvar, 1 109 %exitcond = icmp eq i64 %indvar.next, 100 110 br i1 %exitcond, label %for.end, label %for.body 111 112for.end: ; preds = %for.body, %entry 113 ret void 114; CHECK-TODO: @test4 115; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false) 116; CHECK-TODO-NOT: store 117} 118 119; This can't be promoted: the memset is a store of a loop variant value. 120define void @test5(i8* %Base, i64 %Size) nounwind ssp { 121bb.nph: ; preds = %entry 122 br label %for.body 123 124for.body: ; preds = %bb.nph, %for.body 125 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 126 %I.0.014 = getelementptr i8* %Base, i64 %indvar 127 128 %V = trunc i64 %indvar to i8 129 store i8 %V, i8* %I.0.014, align 1 130 %indvar.next = add i64 %indvar, 1 131 %exitcond = icmp eq i64 %indvar.next, %Size 132 br i1 %exitcond, label %for.end, label %for.body 133 134for.end: ; preds = %for.body, %entry 135 ret void 136; CHECK: @test5 137; CHECK-NOT: memset 138; CHECK: ret void 139} 140 141 142;; memcpy formation 143define void @test6(i64 %Size) nounwind ssp { 144bb.nph: 145 %Base = alloca i8, i32 10000 146 %Dest = alloca i8, i32 10000 147 br label %for.body 148 149for.body: ; preds = %bb.nph, %for.body 150 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 151 %I.0.014 = getelementptr i8* %Base, i64 %indvar 152 %DestI = getelementptr i8* %Dest, i64 %indvar 153 %V = load i8* %I.0.014, align 1 154 store i8 %V, i8* %DestI, align 1 155 %indvar.next = add i64 %indvar, 1 156 %exitcond = icmp eq i64 %indvar.next, %Size 157 br i1 %exitcond, label %for.end, label %for.body 158 159for.end: ; preds = %for.body, %entry 160 ret void 161; CHECK: @test6 162; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false) 163; CHECK-NOT: store 164; CHECK: ret void 165} 166 167 168; This is a loop that was rotated but where the blocks weren't merged. This 169; shouldn't perturb us. 170define void @test7(i8* %Base, i64 %Size) nounwind ssp { 171bb.nph: ; preds = %entry 172 br label %for.body 173 174for.body: ; preds = %bb.nph, %for.body 175 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ] 176 br label %for.body.cont 177for.body.cont: 178 %I.0.014 = getelementptr i8* %Base, i64 %indvar 179 store i8 0, i8* %I.0.014, align 1 180 %indvar.next = add i64 %indvar, 1 181 %exitcond = icmp eq i64 %indvar.next, %Size 182 br i1 %exitcond, label %for.end, label %for.body 183 184for.end: ; preds = %for.body, %entry 185 ret void 186; CHECK: @test7 187; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false) 188; CHECK-NOT: store 189} 190 191; This is a loop should not be transformed, it only executes one iteration. 192define void @test8(i64* %Ptr, i64 %Size) nounwind ssp { 193bb.nph: ; preds = %entry 194 br label %for.body 195 196for.body: ; preds = %bb.nph, %for.body 197 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 198 %PI = getelementptr i64* %Ptr, i64 %indvar 199 store i64 0, i64 *%PI 200 %indvar.next = add i64 %indvar, 1 201 %exitcond = icmp eq i64 %indvar.next, 1 202 br i1 %exitcond, label %for.end, label %for.body 203 204for.end: ; preds = %for.body, %entry 205 ret void 206; CHECK: @test8 207; CHECK: store i64 0, i64* %PI 208} 209 210declare i8* @external(i8*) 211 212;; This cannot be transformed into a memcpy, because the read-from location is 213;; mutated by the loop. 214define void @test9(i64 %Size) nounwind ssp { 215bb.nph: 216 %Base = alloca i8, i32 10000 217 %Dest = alloca i8, i32 10000 218 219 %BaseAlias = call i8* @external(i8* %Base) 220 br label %for.body 221 222for.body: ; preds = %bb.nph, %for.body 223 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 224 %I.0.014 = getelementptr i8* %Base, i64 %indvar 225 %DestI = getelementptr i8* %Dest, i64 %indvar 226 %V = load i8* %I.0.014, align 1 227 store i8 %V, i8* %DestI, align 1 228 229 ;; This store can clobber the input. 230 store i8 4, i8* %BaseAlias 231 232 %indvar.next = add i64 %indvar, 1 233 %exitcond = icmp eq i64 %indvar.next, %Size 234 br i1 %exitcond, label %for.end, label %for.body 235 236for.end: ; preds = %for.body, %entry 237 ret void 238; CHECK: @test9 239; CHECK-NOT: llvm.memcpy 240; CHECK: ret void 241} 242 243; Two dimensional nested loop should be promoted to one big memset. 244define void @test10(i8* %X) nounwind ssp { 245entry: 246 br label %bb.nph 247 248bb.nph: ; preds = %entry, %for.inc10 249 %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ] 250 br label %for.body5 251 252for.body5: ; preds = %for.body5, %bb.nph 253 %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ] 254 %mul = mul nsw i32 %i.04, 100 255 %add = add nsw i32 %j.02, %mul 256 %idxprom = sext i32 %add to i64 257 %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom 258 store i8 0, i8* %arrayidx, align 1 259 %inc = add nsw i32 %j.02, 1 260 %cmp4 = icmp eq i32 %inc, 100 261 br i1 %cmp4, label %for.inc10, label %for.body5 262 263for.inc10: ; preds = %for.body5 264 %inc12 = add nsw i32 %i.04, 1 265 %cmp = icmp eq i32 %inc12, 100 266 br i1 %cmp, label %for.end13, label %bb.nph 267 268for.end13: ; preds = %for.inc10 269 ret void 270; CHECK: @test10 271; CHECK: entry: 272; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false) 273; CHECK-NOT: store 274; CHECK: ret void 275} 276 277; On darwin10 (which is the triple in this .ll file) this loop can be turned 278; into a memset_pattern call. 279; rdar://9009151 280define void @test11_pattern(i32* nocapture %P) nounwind ssp { 281entry: 282 br label %for.body 283 284for.body: ; preds = %entry, %for.body 285 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 286 %arrayidx = getelementptr i32* %P, i64 %indvar 287 store i32 1, i32* %arrayidx, align 4 288 %indvar.next = add i64 %indvar, 1 289 %exitcond = icmp eq i64 %indvar.next, 10000 290 br i1 %exitcond, label %for.end, label %for.body 291 292for.end: ; preds = %for.body 293 ret void 294; CHECK: @test11_pattern 295; CHECK-NEXT: entry: 296; CHECK-NEXT: bitcast 297; CHECK-NEXT: memset_pattern 298; CHECK-NOT: store 299; CHECK: ret void 300} 301 302; Store of null should turn into memset of zero. 303define void @test12(i32** nocapture %P) nounwind ssp { 304entry: 305 br label %for.body 306 307for.body: ; preds = %entry, %for.body 308 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 309 %arrayidx = getelementptr i32** %P, i64 %indvar 310 store i32* null, i32** %arrayidx, align 4 311 %indvar.next = add i64 %indvar, 1 312 %exitcond = icmp eq i64 %indvar.next, 10000 313 br i1 %exitcond, label %for.end, label %for.body 314 315for.end: ; preds = %for.body 316 ret void 317; CHECK: @test12 318; CHECK-NEXT: entry: 319; CHECK-NEXT: bitcast 320; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false) 321; CHECK-NOT: store 322; CHECK: ret void 323} 324 325@G = global i32 5 326 327; This store-of-address loop can be turned into a memset_pattern call. 328; rdar://9009151 329define void @test13_pattern(i32** nocapture %P) nounwind ssp { 330entry: 331 br label %for.body 332 333for.body: ; preds = %entry, %for.body 334 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 335 %arrayidx = getelementptr i32** %P, i64 %indvar 336 store i32* @G, i32** %arrayidx, align 4 337 %indvar.next = add i64 %indvar, 1 338 %exitcond = icmp eq i64 %indvar.next, 10000 339 br i1 %exitcond, label %for.end, label %for.body 340 341for.end: ; preds = %for.body 342 ret void 343; CHECK: @test13_pattern 344; CHECK-NEXT: entry: 345; CHECK-NEXT: bitcast 346; CHECK-NEXT: memset_pattern 347; CHECK-NOT: store 348; CHECK: ret void 349} 350 351 352 353; PR9815 - This is a partial overlap case that cannot be safely transformed 354; into a memcpy. 355@g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16 356 357define i32 @test14() nounwind { 358entry: 359 br label %for.body 360 361for.body: ; preds = %for.inc, %for.body.lr.ph 362 %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 363 %add = add nsw i32 %tmp5, 4 364 %idxprom = sext i32 %add to i64 365 %arrayidx = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom 366 %tmp2 = load i32* %arrayidx, align 4 367 %add4 = add nsw i32 %tmp5, 5 368 %idxprom5 = sext i32 %add4 to i64 369 %arrayidx6 = getelementptr inbounds [7 x i32]* @g_50, i32 0, i64 %idxprom5 370 store i32 %tmp2, i32* %arrayidx6, align 4 371 %inc = add nsw i32 %tmp5, 1 372 %cmp = icmp slt i32 %inc, 2 373 br i1 %cmp, label %for.body, label %for.end 374 375for.end: ; preds = %for.inc 376 %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4 377 ret i32 %tmp8 378; CHECK: @test14 379; CHECK: for.body: 380; CHECK: load i32 381; CHECK: store i32 382; CHECK: br i1 %cmp 383 384} 385 386 387