1; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s 2; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5target triple = "x86_64-apple-macosx10.8.0" 6 7%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } 8%struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } 9 10; CHECK: merge_const_store 11; save 1,2,3 ... as one big integer. 12; CHECK: movabsq $578437695752307201 13; CHECK: ret 14define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 15 %1 = icmp sgt i32 %count, 0 16 br i1 %1, label %.lr.ph, label %._crit_edge 17.lr.ph: 18 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 19 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 20 %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0 21 store i8 1, i8* %2, align 1 22 %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1 23 store i8 2, i8* %3, align 1 24 %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2 25 store i8 3, i8* %4, align 1 26 %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3 27 store i8 4, i8* %5, align 1 28 %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4 29 store i8 5, i8* %6, align 1 30 %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5 31 store i8 6, i8* %7, align 1 32 %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6 33 store i8 7, i8* %8, align 1 34 %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7 35 store i8 8, i8* %9, align 1 36 %10 = add nsw i32 %i.02, 1 37 %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1 38 %exitcond = icmp eq i32 %10, %count 39 br i1 %exitcond, label %._crit_edge, label %.lr.ph 40._crit_edge: 41 ret void 42} 43 44; No vectors because we use noimplicitfloat 45; CHECK: merge_const_store_no_vec 46; CHECK-NOT: vmovups 47; CHECK: ret 48define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{ 49 %1 = icmp sgt i32 %count, 0 50 br i1 %1, label %.lr.ph, label %._crit_edge 51.lr.ph: 52 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 53 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 54 %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 55 store i32 0, i32* %2, align 4 56 %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 57 store i32 0, i32* %3, align 4 58 %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2 59 store i32 0, i32* %4, align 4 60 %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3 61 store i32 0, i32* %5, align 4 62 %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4 63 store i32 0, i32* %6, align 4 64 %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5 65 store i32 0, i32* %7, align 4 66 %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6 67 store i32 0, i32* %8, align 4 68 %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7 69 store i32 0, i32* %9, align 4 70 %10 = add nsw i32 %i.02, 1 71 %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 72 %exitcond = icmp eq i32 %10, %count 73 br i1 %exitcond, label %._crit_edge, label %.lr.ph 74._crit_edge: 75 ret void 76} 77 78; Move the constants using a single vector store. 79; CHECK: merge_const_store_vec 80; CHECK: vmovups 81; CHECK: ret 82define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp { 83 %1 = icmp sgt i32 %count, 0 84 br i1 %1, label %.lr.ph, label %._crit_edge 85.lr.ph: 86 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 87 %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] 88 %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 89 store i32 0, i32* %2, align 4 90 %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 91 store i32 0, i32* %3, align 4 92 %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2 93 store i32 0, i32* %4, align 4 94 %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3 95 store i32 0, i32* %5, align 4 96 %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4 97 store i32 0, i32* %6, align 4 98 %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5 99 store i32 0, i32* %7, align 4 100 %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6 101 store i32 0, i32* %8, align 4 102 %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7 103 store i32 0, i32* %9, align 4 104 %10 = add nsw i32 %i.02, 1 105 %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 106 %exitcond = icmp eq i32 %10, %count 107 br i1 %exitcond, label %._crit_edge, label %.lr.ph 108._crit_edge: 109 ret void 110} 111 112; Move the first 4 constants as a single vector. Move the rest as scalars. 113; CHECK: merge_nonconst_store 114; CHECK: movl $67305985 115; CHECK: movb 116; CHECK: movb 117; CHECK: movb 118; CHECK: movb 119; CHECK: ret 120define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { 121 %1 = icmp sgt i32 %count, 0 122 br i1 %1, label %.lr.ph, label %._crit_edge 123.lr.ph: 124 %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] 125 %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] 126 %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0 127 store i8 1, i8* %2, align 1 128 %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1 129 store i8 2, i8* %3, align 1 130 %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2 131 store i8 3, i8* %4, align 1 132 %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3 133 store i8 4, i8* %5, align 1 134 %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4 135 store i8 %zz, i8* %6, align 1 ; <----------- Not a const; 136 %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5 137 store i8 6, i8* %7, align 1 138 %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6 139 store i8 7, i8* %8, align 1 140 %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7 141 store i8 8, i8* %9, align 1 142 %10 = add nsw i32 %i.02, 1 143 %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1 144 %exitcond = icmp eq i32 %10, %count 145 br i1 %exitcond, label %._crit_edge, label %.lr.ph 146._crit_edge: 147 ret void 148} 149 150 151; CHECK-LABEL: merge_loads_i16: 152; load: 153; CHECK: movw 154; store: 155; CHECK: movw 156; CHECK: ret 157define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 158 %1 = icmp sgt i32 %count, 0 159 br i1 %1, label %.lr.ph, label %._crit_edge 160 161.lr.ph: ; preds = %0 162 %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0 163 %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1 164 br label %4 165 166; <label>:4 ; preds = %4, %.lr.ph 167 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 168 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ] 169 %5 = load i8, i8* %2, align 1 170 %6 = load i8, i8* %3, align 1 171 %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0 172 store i8 %5, i8* %7, align 1 173 %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1 174 store i8 %6, i8* %8, align 1 175 %9 = add nsw i32 %i.02, 1 176 %10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1 177 %exitcond = icmp eq i32 %9, %count 178 br i1 %exitcond, label %._crit_edge, label %4 179 180._crit_edge: ; preds = %4, %0 181 ret void 182} 183 184; The loads and the stores are interleaved. Can't merge them. 185; CHECK-LABEL: no_merge_loads: 186; CHECK: movb 187; CHECK: movb 188; CHECK: movb 189; CHECK: movb 190; CHECK: ret 191define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { 192 %1 = icmp sgt i32 %count, 0 193 br i1 %1, label %.lr.ph, label %._crit_edge 194 195.lr.ph: ; preds = %0 196 %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0 197 %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1 198 br label %a4 199 200a4: ; preds = %4, %.lr.ph 201 %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ] 202 %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ] 203 %a5 = load i8, i8* %2, align 1 204 %a7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0 205 store i8 %a5, i8* %a7, align 1 206 %a8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1 207 %a6 = load i8, i8* %3, align 1 208 store i8 %a6, i8* %a8, align 1 209 %a9 = add nsw i32 %i.02, 1 210 %a10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1 211 %exitcond = icmp eq i32 %a9, %count 212 br i1 %exitcond, label %._crit_edge, label %a4 213 214._crit_edge: ; preds = %4, %0 215 ret void 216} 217 218 219; CHECK-LABEL: merge_loads_integer: 220; load: 221; CHECK: movq 222; store: 223; CHECK: movq 224; CHECK: ret 225define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 226 %1 = icmp sgt i32 %count, 0 227 br i1 %1, label %.lr.ph, label %._crit_edge 228 229.lr.ph: ; preds = %0 230 %2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0 231 %3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1 232 br label %4 233 234; <label>:4 ; preds = %4, %.lr.ph 235 %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] 236 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ] 237 %5 = load i32, i32* %2 238 %6 = load i32, i32* %3 239 %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 240 store i32 %5, i32* %7 241 %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 242 store i32 %6, i32* %8 243 %9 = add nsw i32 %i.02, 1 244 %10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 245 %exitcond = icmp eq i32 %9, %count 246 br i1 %exitcond, label %._crit_edge, label %4 247 248._crit_edge: ; preds = %4, %0 249 ret void 250} 251 252 253; CHECK-LABEL: merge_loads_vector: 254; load: 255; CHECK: movups 256; store: 257; CHECK: movups 258; CHECK: ret 259define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 260 %a1 = icmp sgt i32 %count, 0 261 br i1 %a1, label %.lr.ph, label %._crit_edge 262 263.lr.ph: ; preds = %0 264 %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0 265 %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1 266 %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2 267 %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3 268 br label %block4 269 270block4: ; preds = %4, %.lr.ph 271 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 272 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 273 %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 274 %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 275 %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2 276 %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3 277 %b1 = load i32, i32* %a2 278 %b2 = load i32, i32* %a3 279 %b3 = load i32, i32* %a4 280 %b4 = load i32, i32* %a5 281 store i32 %b1, i32* %a7 282 store i32 %b2, i32* %a8 283 store i32 %b3, i32* %a9 284 store i32 %b4, i32* %a10 285 %c9 = add nsw i32 %i.02, 1 286 %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 287 %exitcond = icmp eq i32 %c9, %count 288 br i1 %exitcond, label %._crit_edge, label %block4 289 290._crit_edge: ; preds = %4, %0 291 ret void 292} 293 294; CHECK-LABEL: merge_loads_no_align: 295; load: 296; CHECK: movl 297; CHECK: movl 298; CHECK: movl 299; CHECK: movl 300; store: 301; CHECK: movl 302; CHECK: movl 303; CHECK: movl 304; CHECK: movl 305; CHECK: ret 306define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp { 307 %a1 = icmp sgt i32 %count, 0 308 br i1 %a1, label %.lr.ph, label %._crit_edge 309 310.lr.ph: ; preds = %0 311 %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0 312 %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1 313 %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2 314 %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3 315 br label %block4 316 317block4: ; preds = %4, %.lr.ph 318 %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ] 319 %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ] 320 %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0 321 %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1 322 %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2 323 %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3 324 %b1 = load i32, i32* %a2, align 1 325 %b2 = load i32, i32* %a3, align 1 326 %b3 = load i32, i32* %a4, align 1 327 %b4 = load i32, i32* %a5, align 1 328 store i32 %b1, i32* %a7, align 1 329 store i32 %b2, i32* %a8, align 1 330 store i32 %b3, i32* %a9, align 1 331 store i32 %b4, i32* %a10, align 1 332 %c9 = add nsw i32 %i.02, 1 333 %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1 334 %exitcond = icmp eq i32 %c9, %count 335 br i1 %exitcond, label %._crit_edge, label %block4 336 337._crit_edge: ; preds = %4, %0 338 ret void 339} 340 341; Make sure that we merge the consecutive load/store sequence below and use a 342; word (16 bit) instead of a byte copy. 343; CHECK: MergeLoadStoreBaseIndexOffset 344; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 345; CHECK: movw [[REG]], (%{{.*}}) 346define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { 347 br label %1 348 349; <label>:1 350 %.09 = phi i32 [ %n, %0 ], [ %11, %1 ] 351 %.08 = phi i8* [ %b, %0 ], [ %10, %1 ] 352 %.0 = phi i64* [ %a, %0 ], [ %2, %1 ] 353 %2 = getelementptr inbounds i64, i64* %.0, i64 1 354 %3 = load i64, i64* %.0, align 1 355 %4 = getelementptr inbounds i8, i8* %c, i64 %3 356 %5 = load i8, i8* %4, align 1 357 %6 = add i64 %3, 1 358 %7 = getelementptr inbounds i8, i8* %c, i64 %6 359 %8 = load i8, i8* %7, align 1 360 store i8 %5, i8* %.08, align 1 361 %9 = getelementptr inbounds i8, i8* %.08, i64 1 362 store i8 %8, i8* %9, align 1 363 %10 = getelementptr inbounds i8, i8* %.08, i64 2 364 %11 = add nsw i32 %.09, -1 365 %12 = icmp eq i32 %11, 0 366 br i1 %12, label %13, label %1 367 368; <label>:13 369 ret void 370} 371 372; Make sure that we merge the consecutive load/store sequence below and use a 373; word (16 bit) instead of a byte copy even if there are intermediate sign 374; extensions. 375; CHECK: MergeLoadStoreBaseIndexOffsetSext 376; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 377; CHECK: movw [[REG]], (%{{.*}}) 378define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) { 379 br label %1 380 381; <label>:1 382 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] 383 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] 384 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] 385 %2 = getelementptr inbounds i8, i8* %.0, i64 1 386 %3 = load i8, i8* %.0, align 1 387 %4 = sext i8 %3 to i64 388 %5 = getelementptr inbounds i8, i8* %c, i64 %4 389 %6 = load i8, i8* %5, align 1 390 %7 = add i64 %4, 1 391 %8 = getelementptr inbounds i8, i8* %c, i64 %7 392 %9 = load i8, i8* %8, align 1 393 store i8 %6, i8* %.08, align 1 394 %10 = getelementptr inbounds i8, i8* %.08, i64 1 395 store i8 %9, i8* %10, align 1 396 %11 = getelementptr inbounds i8, i8* %.08, i64 2 397 %12 = add nsw i32 %.09, -1 398 %13 = icmp eq i32 %12, 0 399 br i1 %13, label %14, label %1 400 401; <label>:14 402 ret void 403} 404 405; However, we can only merge ignore sign extensions when they are on all memory 406; computations; 407; CHECK: loadStoreBaseIndexOffsetSextNoSex 408; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] 409; CHECK-NOT: movw [[REG]], (%{{.*}}) 410define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) { 411 br label %1 412 413; <label>:1 414 %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] 415 %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] 416 %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] 417 %2 = getelementptr inbounds i8, i8* %.0, i64 1 418 %3 = load i8, i8* %.0, align 1 419 %4 = sext i8 %3 to i64 420 %5 = getelementptr inbounds i8, i8* %c, i64 %4 421 %6 = load i8, i8* %5, align 1 422 %7 = add i8 %3, 1 423 %wrap.4 = sext i8 %7 to i64 424 %8 = getelementptr inbounds i8, i8* %c, i64 %wrap.4 425 %9 = load i8, i8* %8, align 1 426 store i8 %6, i8* %.08, align 1 427 %10 = getelementptr inbounds i8, i8* %.08, i64 1 428 store i8 %9, i8* %10, align 1 429 %11 = getelementptr inbounds i8, i8* %.08, i64 2 430 %12 = add nsw i32 %.09, -1 431 %13 = icmp eq i32 %12, 0 432 br i1 %13, label %14, label %1 433 434; <label>:14 435 ret void 436} 437 438; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ) 439define void @merge_vec_element_store(<8 x float> %v, float* %ptr) { 440 %vecext0 = extractelement <8 x float> %v, i32 0 441 %vecext1 = extractelement <8 x float> %v, i32 1 442 %vecext2 = extractelement <8 x float> %v, i32 2 443 %vecext3 = extractelement <8 x float> %v, i32 3 444 %vecext4 = extractelement <8 x float> %v, i32 4 445 %vecext5 = extractelement <8 x float> %v, i32 5 446 %vecext6 = extractelement <8 x float> %v, i32 6 447 %vecext7 = extractelement <8 x float> %v, i32 7 448 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1 449 %arrayidx2 = getelementptr inbounds float, float* %ptr, i64 2 450 %arrayidx3 = getelementptr inbounds float, float* %ptr, i64 3 451 %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 4 452 %arrayidx5 = getelementptr inbounds float, float* %ptr, i64 5 453 %arrayidx6 = getelementptr inbounds float, float* %ptr, i64 6 454 %arrayidx7 = getelementptr inbounds float, float* %ptr, i64 7 455 store float %vecext0, float* %ptr, align 4 456 store float %vecext1, float* %arrayidx1, align 4 457 store float %vecext2, float* %arrayidx2, align 4 458 store float %vecext3, float* %arrayidx3, align 4 459 store float %vecext4, float* %arrayidx4, align 4 460 store float %vecext5, float* %arrayidx5, align 4 461 store float %vecext6, float* %arrayidx6, align 4 462 store float %vecext7, float* %arrayidx7, align 4 463 ret void 464 465; CHECK-LABEL: merge_vec_element_store 466; CHECK: vmovups 467; CHECK-NEXT: vzeroupper 468; CHECK-NEXT: retq 469} 470 471; This is a minimized test based on real code that was failing. 472; We could merge stores (and loads) like this... 473 474define void @merge_vec_element_and_scalar_load([6 x i64]* %array) { 475 %idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0 476 %idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1 477 %idx4 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 4 478 %idx5 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 5 479 480 %a0 = load i64, i64* %idx0, align 8 481 store i64 %a0, i64* %idx4, align 8 482 483 %b = bitcast i64* %idx1 to <2 x i64>* 484 %v = load <2 x i64>, <2 x i64>* %b, align 8 485 %a1 = extractelement <2 x i64> %v, i32 0 486 store i64 %a1, i64* %idx5, align 8 487 ret void 488 489; CHECK-LABEL: merge_vec_element_and_scalar_load 490; CHECK: movq (%rdi), %rax 491; CHECK-NEXT: movq %rax, 32(%rdi) 492; CHECK-NEXT: movq 8(%rdi), %rax 493; CHECK-NEXT: movq %rax, 40(%rdi) 494; CHECK-NEXT: retq 495} 496