1; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s 2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 3target triple = "x86_64-unknown-linux-gnu" 4 5;; memcpy.atomic formation (atomic load & store) 6define void @test1(i64 %Size) nounwind ssp { 7; CHECK-LABEL: @test1( 8; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) 9; CHECK-NOT: store 10; CHECK: ret void 11bb.nph: 12 %Base = alloca i8, i32 10000 13 %Dest = alloca i8, i32 10000 14 br label %for.body 15 16for.body: ; preds = %bb.nph, %for.body 17 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 18 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 19 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 20 %V = load atomic i8, i8* %I.0.014 unordered, align 1 21 store atomic i8 %V, i8* %DestI unordered, align 1 22 %indvar.next = add i64 %indvar, 1 23 %exitcond = icmp eq i64 %indvar.next, %Size 24 br i1 %exitcond, label %for.end, label %for.body 25 26for.end: ; preds = %for.body, %entry 27 ret void 28} 29 30;; memcpy.atomic formation (atomic store, normal load) 31define void @test2(i64 %Size) nounwind ssp { 32; CHECK-LABEL: @test2( 33; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) 34; CHECK-NOT: store 35; CHECK: ret void 36bb.nph: 37 %Base = alloca i8, i32 10000 38 %Dest = alloca i8, i32 10000 39 br label %for.body 40 41for.body: ; preds = %bb.nph, %for.body 42 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 43 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 44 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 45 %V = load i8, i8* %I.0.014, align 1 46 store atomic i8 %V, i8* %DestI unordered, align 1 47 %indvar.next = add i64 %indvar, 1 48 %exitcond = icmp eq i64 %indvar.next, %Size 49 br i1 %exitcond, label %for.end, label %for.body 50 51for.end: ; preds = %for.body, %entry 52 ret void 53} 54 55;; memcpy.atomic formation (atomic store, normal load w/ no align) 56define void @test2b(i64 %Size) nounwind ssp { 57; CHECK-LABEL: @test2b( 58; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) 59; CHECK-NOT: store 60; CHECK: ret void 61bb.nph: 62 %Base = alloca i8, i32 10000 63 %Dest = alloca i8, i32 10000 64 br label %for.body 65 66for.body: ; preds = %bb.nph, %for.body 67 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 68 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 69 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 70 %V = load i8, i8* %I.0.014 71 store atomic i8 %V, i8* %DestI unordered, align 1 72 %indvar.next = add i64 %indvar, 1 73 %exitcond = icmp eq i64 %indvar.next, %Size 74 br i1 %exitcond, label %for.end, label %for.body 75 76for.end: ; preds = %for.body, %entry 77 ret void 78} 79 80;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align) 81define void @test2c(i64 %Size) nounwind ssp { 82; CHECK-LABEL: @test2c( 83; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic 84; CHECK: store 85; CHECK: ret void 86bb.nph: 87 %Base = alloca i32, i32 10000 88 %Dest = alloca i32, i32 10000 89 br label %for.body 90 91for.body: ; preds = %bb.nph, %for.body 92 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 93 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 94 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 95 %V = load i32, i32* %I.0.014, align 2 96 store atomic i32 %V, i32* %DestI unordered, align 4 97 %indvar.next = add i64 %indvar, 1 98 %exitcond = icmp eq i64 %indvar.next, %Size 99 br i1 %exitcond, label %for.end, label %for.body 100 101for.end: ; preds = %for.body, %entry 102 ret void 103} 104 105;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load) 106define void @test2d(i64 %Size) nounwind ssp { 107; CHECK-LABEL: @test2d( 108; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic 109; CHECK: store 110; CHECK: ret void 111bb.nph: 112 %Base = alloca i32, i32 10000 113 %Dest = alloca i32, i32 10000 114 br label %for.body 115 116for.body: ; preds = %bb.nph, %for.body 117 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 118 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 119 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 120 %V = load i32, i32* %I.0.014, align 4 121 store atomic i32 %V, i32* %DestI unordered, align 2 122 %indvar.next = add i64 %indvar, 1 123 %exitcond = icmp eq i64 %indvar.next, %Size 124 br i1 %exitcond, label %for.end, label %for.body 125 126for.end: ; preds = %for.body, %entry 127 ret void 128} 129 130 131;; memcpy.atomic formation (normal store, atomic load) 132define void @test3(i64 %Size) nounwind ssp { 133; CHECK-LABEL: @test3( 134; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) 135; CHECK-NOT: store 136; CHECK: ret void 137bb.nph: 138 %Base = alloca i8, i32 10000 139 %Dest = alloca i8, i32 10000 140 br label %for.body 141 142for.body: ; preds = %bb.nph, %for.body 143 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 144 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 145 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 146 %V = load atomic i8, i8* %I.0.014 unordered, align 1 147 store i8 %V, i8* %DestI, align 1 148 %indvar.next = add i64 %indvar, 1 149 %exitcond = icmp eq i64 %indvar.next, %Size 150 br i1 %exitcond, label %for.end, label %for.body 151 152for.end: ; preds = %for.body, %entry 153 ret void 154} 155 156;; memcpy.atomic formation rejection (normal store w/ no align, atomic load) 157define void @test3b(i64 %Size) nounwind ssp { 158; CHECK-LABEL: @test3b( 159; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1) 160; CHECK-NOT: store 161; CHECK: ret void 162bb.nph: 163 %Base = alloca i8, i32 10000 164 %Dest = alloca i8, i32 10000 165 br label %for.body 166 167for.body: ; preds = %bb.nph, %for.body 168 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 169 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 170 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 171 %V = load atomic i8, i8* %I.0.014 unordered, align 1 172 store i8 %V, i8* %DestI 173 %indvar.next = add i64 %indvar, 1 174 %exitcond = icmp eq i64 %indvar.next, %Size 175 br i1 %exitcond, label %for.end, label %for.body 176 177for.end: ; preds = %for.body, %entry 178 ret void 179} 180 181;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align) 182define void @test3c(i64 %Size) nounwind ssp { 183; CHECK-LABEL: @test3c( 184; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic 185; CHECK: store 186; CHECK: ret void 187bb.nph: 188 %Base = alloca i32, i32 10000 189 %Dest = alloca i32, i32 10000 190 br label %for.body 191 192for.body: ; preds = %bb.nph, %for.body 193 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 194 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 195 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 196 %V = load atomic i32, i32* %I.0.014 unordered, align 2 197 store i32 %V, i32* %DestI, align 4 198 %indvar.next = add i64 %indvar, 1 199 %exitcond = icmp eq i64 %indvar.next, %Size 200 br i1 %exitcond, label %for.end, label %for.body 201 202for.end: ; preds = %for.body, %entry 203 ret void 204} 205 206;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load) 207define void @test3d(i64 %Size) nounwind ssp { 208; CHECK-LABEL: @test3d( 209; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic 210; CHECK: store 211; CHECK: ret void 212bb.nph: 213 %Base = alloca i32, i32 10000 214 %Dest = alloca i32, i32 10000 215 br label %for.body 216 217for.body: ; preds = %bb.nph, %for.body 218 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 219 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 220 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 221 %V = load atomic i32, i32* %I.0.014 unordered, align 4 222 store i32 %V, i32* %DestI, align 2 223 %indvar.next = add i64 %indvar, 1 224 %exitcond = icmp eq i64 %indvar.next, %Size 225 br i1 %exitcond, label %for.end, label %for.body 226 227for.end: ; preds = %for.body, %entry 228 ret void 229} 230 231 232;; memcpy.atomic formation rejection (atomic load, ordered-atomic store) 233define void @test4(i64 %Size) nounwind ssp { 234; CHECK-LABEL: @test4( 235; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic 236; CHECK: store 237; CHECK: ret void 238bb.nph: 239 %Base = alloca i8, i32 10000 240 %Dest = alloca i8, i32 10000 241 br label %for.body 242 243for.body: ; preds = %bb.nph, %for.body 244 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 245 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 246 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 247 %V = load atomic i8, i8* %I.0.014 unordered, align 1 248 store atomic i8 %V, i8* %DestI monotonic, align 1 249 %indvar.next = add i64 %indvar, 1 250 %exitcond = icmp eq i64 %indvar.next, %Size 251 br i1 %exitcond, label %for.end, label %for.body 252 253for.end: ; preds = %for.body, %entry 254 ret void 255} 256 257;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store) 258define void @test5(i64 %Size) nounwind ssp { 259; CHECK-LABEL: @test5( 260; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic 261; CHECK: store 262; CHECK: ret void 263bb.nph: 264 %Base = alloca i8, i32 10000 265 %Dest = alloca i8, i32 10000 266 br label %for.body 267 268for.body: ; preds = %bb.nph, %for.body 269 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 270 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 271 %DestI = getelementptr i8, i8* %Dest, i64 %indvar 272 %V = load atomic i8, i8* %I.0.014 monotonic, align 1 273 store atomic i8 %V, i8* %DestI unordered, align 1 274 %indvar.next = add i64 %indvar, 1 275 %exitcond = icmp eq i64 %indvar.next, %Size 276 br i1 %exitcond, label %for.end, label %for.body 277 278for.end: ; preds = %for.body, %entry 279 ret void 280} 281 282;; memcpy.atomic formation (atomic load & store) -- element size 2 283define void @test6(i64 %Size) nounwind ssp { 284; CHECK-LABEL: @test6( 285; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 1 286; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 [[Sz]], i32 2) 287; CHECK-NOT: store 288; CHECK: ret void 289bb.nph: 290 %Base = alloca i16, i32 10000 291 %Dest = alloca i16, i32 10000 292 br label %for.body 293 294for.body: ; preds = %bb.nph, %for.body 295 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 296 %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar 297 %DestI = getelementptr i16, i16* %Dest, i64 %indvar 298 %V = load atomic i16, i16* %I.0.014 unordered, align 2 299 store atomic i16 %V, i16* %DestI unordered, align 2 300 %indvar.next = add i64 %indvar, 1 301 %exitcond = icmp eq i64 %indvar.next, %Size 302 br i1 %exitcond, label %for.end, label %for.body 303 304for.end: ; preds = %for.body, %entry 305 ret void 306} 307 308;; memcpy.atomic formation (atomic load & store) -- element size 4 309define void @test7(i64 %Size) nounwind ssp { 310; CHECK-LABEL: @test7( 311; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 2 312; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 [[Sz]], i32 4) 313; CHECK-NOT: store 314; CHECK: ret void 315bb.nph: 316 %Base = alloca i32, i32 10000 317 %Dest = alloca i32, i32 10000 318 br label %for.body 319 320for.body: ; preds = %bb.nph, %for.body 321 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 322 %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar 323 %DestI = getelementptr i32, i32* %Dest, i64 %indvar 324 %V = load atomic i32, i32* %I.0.014 unordered, align 4 325 store atomic i32 %V, i32* %DestI unordered, align 4 326 %indvar.next = add i64 %indvar, 1 327 %exitcond = icmp eq i64 %indvar.next, %Size 328 br i1 %exitcond, label %for.end, label %for.body 329 330for.end: ; preds = %for.body, %entry 331 ret void 332} 333 334;; memcpy.atomic formation (atomic load & store) -- element size 8 335define void @test8(i64 %Size) nounwind ssp { 336; CHECK-LABEL: @test8( 337; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 3 338; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 [[Sz]], i32 8) 339; CHECK-NOT: store 340; CHECK: ret void 341bb.nph: 342 %Base = alloca i64, i32 10000 343 %Dest = alloca i64, i32 10000 344 br label %for.body 345 346for.body: ; preds = %bb.nph, %for.body 347 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 348 %I.0.014 = getelementptr i64, i64* %Base, i64 %indvar 349 %DestI = getelementptr i64, i64* %Dest, i64 %indvar 350 %V = load atomic i64, i64* %I.0.014 unordered, align 8 351 store atomic i64 %V, i64* %DestI unordered, align 8 352 %indvar.next = add i64 %indvar, 1 353 %exitcond = icmp eq i64 %indvar.next, %Size 354 br i1 %exitcond, label %for.end, label %for.body 355 356for.end: ; preds = %for.body, %entry 357 ret void 358} 359 360;; memcpy.atomic formation rejection (atomic load & store) -- element size 16 361define void @test9(i64 %Size) nounwind ssp { 362; CHECK-LABEL: @test9( 363; CHECK: [[Sz:%[0-9]+]] = shl nuw i64 %Size, 4 364; CHECK: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 [[Sz]], i32 16) 365; CHECK-NOT: store 366; CHECK: ret void 367bb.nph: 368 %Base = alloca i128, i32 10000 369 %Dest = alloca i128, i32 10000 370 br label %for.body 371 372for.body: ; preds = %bb.nph, %for.body 373 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 374 %I.0.014 = getelementptr i128, i128* %Base, i64 %indvar 375 %DestI = getelementptr i128, i128* %Dest, i64 %indvar 376 %V = load atomic i128, i128* %I.0.014 unordered, align 16 377 store atomic i128 %V, i128* %DestI unordered, align 16 378 %indvar.next = add i64 %indvar, 1 379 %exitcond = icmp eq i64 %indvar.next, %Size 380 br i1 %exitcond, label %for.end, label %for.body 381 382for.end: ; preds = %for.body, %entry 383 ret void 384} 385 386;; memcpy.atomic formation rejection (atomic load & store) -- element size 32 387define void @test10(i64 %Size) nounwind ssp { 388; CHECK-LABEL: @test10( 389; CHECK-NOT: call void @llvm.memcpy.element.unordered.atomic 390; CHECK: store 391; CHECK: ret void 392bb.nph: 393 %Base = alloca i256, i32 10000 394 %Dest = alloca i256, i32 10000 395 br label %for.body 396 397for.body: ; preds = %bb.nph, %for.body 398 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 399 %I.0.014 = getelementptr i256, i256* %Base, i64 %indvar 400 %DestI = getelementptr i256, i256* %Dest, i64 %indvar 401 %V = load atomic i256, i256* %I.0.014 unordered, align 32 402 store atomic i256 %V, i256* %DestI unordered, align 32 403 %indvar.next = add i64 %indvar, 1 404 %exitcond = icmp eq i64 %indvar.next, %Size 405 br i1 %exitcond, label %for.end, label %for.body 406 407for.end: ; preds = %for.body, %entry 408 ret void 409} 410 411 412 413; Make sure that atomic memset doesn't get recognized by mistake 414define void @test_nomemset(i8* %Base, i64 %Size) nounwind ssp { 415; CHECK-LABEL: @test_nomemset( 416; CHECK-NOT: call void @llvm.memset 417; CHECK: store 418; CHECK: ret void 419bb.nph: ; preds = %entry 420 br label %for.body 421 422for.body: ; preds = %bb.nph, %for.body 423 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] 424 %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar 425 store atomic i8 0, i8* %I.0.014 unordered, align 1 426 %indvar.next = add i64 %indvar, 1 427 %exitcond = icmp eq i64 %indvar.next, %Size 428 br i1 %exitcond, label %for.end, label %for.body 429 430for.end: ; preds = %for.body, %entry 431 ret void 432} 433 434; Verify that unordered memset_pattern isn't recognized. 435; This is a replica of test11_pattern from basic.ll 436define void @test_nomemset_pattern(i32* nocapture %P) nounwind ssp { 437; CHECK-LABEL: @test_nomemset_pattern( 438; CHECK-NEXT: entry: 439; CHECK-NOT: bitcast 440; CHECK-NOT: memset_pattern 441; CHECK: store atomic 442; CHECK: ret void 443entry: 444 br label %for.body 445 446for.body: ; preds = %entry, %for.body 447 %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] 448 %arrayidx = getelementptr i32, i32* %P, i64 %indvar 449 store atomic i32 1, i32* %arrayidx unordered, align 4 450 %indvar.next = add i64 %indvar, 1 451 %exitcond = icmp eq i64 %indvar.next, 10000 452 br i1 %exitcond, label %for.end, label %for.body 453 454for.end: ; preds = %for.body 455 ret void 456} 457