1; RUN: opt < %s -memcpyopt -S | FileCheck %s 2 3; All the stores in this example should be merged into a single memset. 4 5target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" 6target triple = "i386-apple-darwin8" 7 8define void @test1(i8 signext %c) nounwind { 9entry: 10 %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] 11 %tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0 ; <i8*> [#uses=1] 12 store i8 %c, i8* %tmp, align 1 13 %tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1 ; <i8*> [#uses=1] 14 store i8 %c, i8* %tmp5, align 1 15 %tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2 ; <i8*> [#uses=1] 16 store i8 %c, i8* %tmp9, align 1 17 %tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3 ; <i8*> [#uses=1] 18 store i8 %c, i8* %tmp13, align 1 19 %tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4 ; <i8*> [#uses=1] 20 store i8 %c, i8* %tmp17, align 1 21 %tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5 ; <i8*> [#uses=1] 22 store i8 %c, i8* %tmp21, align 1 23 %tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6 ; <i8*> [#uses=1] 24 store i8 %c, i8* %tmp25, align 1 25 %tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7 ; <i8*> [#uses=1] 26 store i8 %c, i8* %tmp29, align 1 27 %tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8 ; <i8*> [#uses=1] 28 store i8 %c, i8* %tmp33, align 1 29 %tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9 ; <i8*> [#uses=1] 30 store i8 %c, i8* %tmp37, align 1 31 %tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10 ; <i8*> [#uses=1] 32 store i8 %c, i8* %tmp41, align 1 33 %tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11 ; <i8*> [#uses=1] 34 store i8 %c, i8* %tmp45, align 1 35 %tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12 ; <i8*> [#uses=1] 36 store i8 %c, i8* %tmp49, align 1 37 %tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13 ; <i8*> [#uses=1] 38 store i8 %c, i8* %tmp53, align 1 39 %tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14 ; <i8*> [#uses=1] 40 store i8 %c, i8* %tmp57, align 1 41 %tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15 ; <i8*> [#uses=1] 42 store i8 %c, i8* %tmp61, align 1 43 %tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16 ; <i8*> [#uses=1] 44 store i8 %c, i8* %tmp65, align 1 45 %tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17 ; <i8*> [#uses=1] 46 store i8 %c, i8* %tmp69, align 1 47 %tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18 ; <i8*> [#uses=1] 48 store i8 %c, i8* %tmp73, align 1 49 %tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind 50 ret void 51; CHECK-LABEL: @test1( 52; CHECK-NOT: store 53; CHECK: call void @llvm.memset.p0i8.i64 54; CHECK-NOT: store 55; CHECK: ret 56} 57 58declare i32 @bar(...) 59 60%struct.MV = type { i16, i16 } 61 62 63define void @test2() nounwind { 64entry: 65 %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] 66 %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] 67 %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] 68 %tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7 ; <i8*> [#uses=1] 69 store i8 -1, i8* %tmp20, align 1 70 %tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6 ; <i8*> [#uses=1] 71 store i8 -1, i8* %tmp23, align 1 72 %tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5 ; <i8*> [#uses=1] 73 store i8 -1, i8* %tmp26, align 1 74 %tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4 ; <i8*> [#uses=1] 75 store i8 -1, i8* %tmp29, align 1 76 %tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3 ; <i8*> [#uses=1] 77 store i8 -1, i8* %tmp32, align 1 78 %tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2 ; <i8*> [#uses=1] 79 store i8 -1, i8* %tmp35, align 1 80 %tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1 ; <i8*> [#uses=1] 81 store i8 -1, i8* %tmp38, align 1 82 %tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0 ; <i8*> [#uses=2] 83 store i8 -1, i8* %tmp41, align 1 84 %tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; <i16*> [#uses=1] 85 store i16 0, i16* %tmp43, align 2 86 %tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; <i16*> [#uses=1] 87 store i16 0, i16* %tmp46, align 2 88 %tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; <i16*> [#uses=1] 89 store i16 0, i16* %tmp57, align 2 90 %tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; <i16*> [#uses=1] 91 store i16 0, i16* %tmp60, align 2 92 %tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; <i16*> [#uses=1] 93 store i16 0, i16* %tmp71, align 2 94 %tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; <i16*> [#uses=1] 95 store i16 0, i16* %tmp74, align 2 96 %tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; <i16*> [#uses=1] 97 store i16 0, i16* %tmp85, align 2 98 %tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; <i16*> [#uses=1] 99 store i16 0, i16* %tmp88, align 2 100 %tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; <i16*> [#uses=1] 101 store i16 0, i16* %tmp99, align 2 102 %tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; <i16*> [#uses=1] 103 store i16 0, i16* %tmp102, align 2 104 %tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; <i16*> [#uses=1] 105 store i16 0, i16* %tmp113, align 2 106 %tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; <i16*> [#uses=1] 107 store i16 0, i16* %tmp116, align 2 108 %tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; <i16*> [#uses=1] 109 store i16 0, i16* %tmp127, align 2 110 %tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; <i16*> [#uses=1] 111 store i16 0, i16* %tmp130, align 2 112 %tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; <i16*> [#uses=1] 113 store i16 0, i16* %tmp141, align 8 114 %tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; <i16*> [#uses=1] 115 store i16 0, i16* %tmp144, align 2 116 %tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; <i16*> [#uses=1] 117 store i16 0, i16* %tmp148, align 2 118 %tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; <i16*> [#uses=1] 119 store i16 0, i16* %tmp151, align 2 120 %tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; <i16*> [#uses=1] 121 store i16 0, i16* %tmp162, align 2 122 %tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; <i16*> [#uses=1] 123 store i16 0, i16* %tmp165, align 2 124 %tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; <i16*> [#uses=1] 125 store i16 0, i16* %tmp176, align 2 126 %tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; <i16*> [#uses=1] 127 store i16 0, i16* %tmp179, align 2 128 %tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; <i16*> [#uses=1] 129 store i16 0, i16* %tmp190, align 2 130 %tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; <i16*> [#uses=1] 131 store i16 0, i16* %tmp193, align 2 132 %tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; <i16*> [#uses=1] 133 store i16 0, i16* %tmp204, align 2 134 %tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; <i16*> [#uses=1] 135 store i16 0, i16* %tmp207, align 2 136 %tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; <i16*> [#uses=1] 137 store i16 0, i16* %tmp218, align 2 138 %tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; <i16*> [#uses=1] 139 store i16 0, i16* %tmp221, align 2 140 %tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; <i16*> [#uses=1] 141 store i16 0, i16* %tmp232, align 2 142 %tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; <i16*> [#uses=1] 143 store i16 0, i16* %tmp235, align 2 144 %tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; <i16*> [#uses=1] 145 store i16 0, i16* %tmp246, align 8 146 %tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; <i16*> [#uses=1] 147 store i16 0, i16* %tmp249, align 2 148 %up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] 149 %left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] 150 call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind 151 ret void 152 153; CHECK-LABEL: @test2( 154; CHECK-NOT: store 155; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false) 156; CHECK-NOT: store 157; CHECK: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 32, i32 8, i1 false) 158; CHECK-NOT: store 159; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 32, i32 8, i1 false) 160; CHECK-NOT: store 161; CHECK: ret 162} 163 164declare void @foo(%struct.MV*, %struct.MV*, i8*) 165 166 167; Store followed by memset. 168define void @test3(i32* nocapture %P) nounwind ssp { 169entry: 170 %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 171 store i32 0, i32* %arrayidx, align 4 172 %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 173 %0 = bitcast i32* %add.ptr to i8* 174 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) 175 ret void 176; CHECK-LABEL: @test3( 177; CHECK-NOT: store 178; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) 179} 180 181; store followed by memset, different offset scenario 182define void @test4(i32* nocapture %P) nounwind ssp { 183entry: 184 store i32 0, i32* %P, align 4 185 %add.ptr = getelementptr inbounds i32, i32* %P, i64 1 186 %0 = bitcast i32* %add.ptr to i8* 187 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) 188 ret void 189; CHECK-LABEL: @test4( 190; CHECK-NOT: store 191; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) 192} 193 194declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind 195 196; Memset followed by store. 197define void @test5(i32* nocapture %P) nounwind ssp { 198entry: 199 %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 200 %0 = bitcast i32* %add.ptr to i8* 201 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) 202 %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 203 store i32 0, i32* %arrayidx, align 4 204 ret void 205; CHECK-LABEL: @test5( 206; CHECK-NOT: store 207; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) 208} 209 210;; Memset followed by memset. 211define void @test6(i32* nocapture %P) nounwind ssp { 212entry: 213 %0 = bitcast i32* %P to i8* 214 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false) 215 %add.ptr = getelementptr inbounds i32, i32* %P, i64 3 216 %1 = bitcast i32* %add.ptr to i8* 217 tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false) 218 ret void 219; CHECK-LABEL: @test6( 220; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false) 221} 222 223; More aggressive heuristic 224; rdar://9892684 225define void @test7(i32* nocapture %c) nounwind optsize { 226 store i32 -1, i32* %c, align 4 227 %1 = getelementptr inbounds i32, i32* %c, i32 1 228 store i32 -1, i32* %1, align 4 229 %2 = getelementptr inbounds i32, i32* %c, i32 2 230 store i32 -1, i32* %2, align 4 231 %3 = getelementptr inbounds i32, i32* %c, i32 3 232 store i32 -1, i32* %3, align 4 233 %4 = getelementptr inbounds i32, i32* %c, i32 4 234 store i32 -1, i32* %4, align 4 235; CHECK-LABEL: @test7( 236; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false) 237 ret void 238} 239 240%struct.test8 = type { [4 x i32] } 241 242define void @test8() { 243entry: 244 %memtmp = alloca %struct.test8, align 16 245 %0 = bitcast %struct.test8* %memtmp to <4 x i32>* 246 store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16 247 ret void 248; CHECK-LABEL: @test8( 249; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16 250} 251 252@test9buf = internal unnamed_addr global [16 x i64] zeroinitializer, align 16 253 254define void @test9() nounwind { 255 store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16 256 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1 257 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2 258 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 3), align 1 259 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 4), align 4 260 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 5), align 1 261 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 6), align 2 262 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 7), align 1 263 store i8 -1, i8* bitcast (i64* getelementptr inbounds ([16 x i64], [16 x i64]* @test9buf, i64 0, i64 1) to i8*), align 8 264 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 9), align 1 265 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 10), align 2 266 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 11), align 1 267 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 12), align 4 268 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 13), align 1 269 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2 270 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1 271 ret void 272; CHECK-LABEL: @test9( 273; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false) 274} 275 276; PR19092 277define void @test10(i8* nocapture %P) nounwind { 278 tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) 279 tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false) 280 ret void 281; CHECK-LABEL: @test10( 282; CHECK-NOT: memset 283; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) 284; CHECK-NOT: memset 285; CHECK: ret void 286} 287 288; Memset followed by odd store. 289define void @test11(i32* nocapture %P) nounwind ssp { 290entry: 291 %add.ptr = getelementptr inbounds i32, i32* %P, i64 3 292 %0 = bitcast i32* %add.ptr to i8* 293 tail call void @llvm.memset.p0i8.i64(i8* %0, i8 1, i64 11, i32 1, i1 false) 294 %arrayidx = getelementptr inbounds i32, i32* %P, i64 0 295 %arrayidx.cast = bitcast i32* %arrayidx to i96* 296 store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4 297 ret void 298; CHECK-LABEL: @test11( 299; CHECK-NOT: store 300; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 1, i64 23, i32 4, i1 false) 301} 302