1; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses -vectorize-num-stores-pred=1 -enable-cond-stores-vec < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4%pair = type { i64, i64 } 5 6; Ensure that we vectorize the interleaved load group even though the loop 7; contains a conditional store. The store group contains gaps and is not 8; vectorized. 9; 10; CHECK-LABEL: @interleaved_with_cond_store_0( 11; 12; CHECK: min.iters.checked 13; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 14; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 15; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 16; CHECK: %n.vec = sub i64 %[[N]], %[[R]] 17; 18; CHECK: vector.body: 19; CHECK: %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}} 20; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> undef, <2 x i32> <i32 0, i32 2> 21; 22; CHECK: pred.store.if 23; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 24; CHECK: store i64 %[[X1]], {{.*}} 25; 26; CHECK: pred.store.if 27; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 28; CHECK: store i64 %[[X2]], {{.*}} 29 30define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) { 31entry: 32 br label %for.body 33 34for.body: 35 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 36 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 37 %0 = load i64, i64* %p.1, align 8 38 %1 = icmp eq i64 %0, %x 39 br i1 %1, label %if.then, label %if.merge 40 41if.then: 42 store i64 %0, i64* %p.1, align 8 43 br label %if.merge 44 45if.merge: 46 %i.next = add nuw nsw i64 %i, 1 47 %cond = icmp slt i64 %i.next, %n 48 br i1 %cond, label %for.body, label %for.end 49 50for.end: 51 ret void 52} 53 54; Ensure that we don't form a single interleaved group for the two loads. The 55; conditional store prevents the second load from being hoisted. The two load 56; groups are separately vectorized. The store group contains gaps and is not 57; vectorized. 58; 59; CHECK-LABEL: @interleaved_with_cond_store_1( 60; 61; CHECK: min.iters.checked 62; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 63; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 64; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 65; CHECK: %n.vec = sub i64 %[[N]], %[[R]] 66; 67; CHECK: vector.body: 68; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} 69; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2> 70; 71; CHECK: pred.store.if 72; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 73; CHECK: store i64 %[[X1]], {{.*}} 74; 75; CHECK: pred.store.if 76; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 77; CHECK: store i64 %[[X2]], {{.*}} 78; 79; CHECK: pred.store.continue 80; CHECK: %[[L2:.+]] = load <4 x i64>, <4 x i64>* {{.*}} 81; CHECK: %[[X3:.+]] = extractelement <4 x i64> %[[L2]], i32 0 82; CHECK: store i64 %[[X3]], {{.*}} 83; CHECK: %[[X4:.+]] = extractelement <4 x i64> %[[L2]], i32 2 84; CHECK: store i64 %[[X4]], {{.*}} 85 86define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) { 87entry: 88 br label %for.body 89 90for.body: 91 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 92 %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 93 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 94 %0 = load i64, i64* %p.1, align 8 95 %1 = icmp eq i64 %0, %x 96 br i1 %1, label %if.then, label %if.merge 97 98if.then: 99 store i64 %0, i64* %p.0, align 8 100 br label %if.merge 101 102if.merge: 103 %2 = load i64, i64* %p.0, align 8 104 store i64 %2, i64 *%p.1, align 8 105 %i.next = add nuw nsw i64 %i, 1 106 %cond = icmp slt i64 %i.next, %n 107 br i1 %cond, label %for.body, label %for.end 108 109for.end: 110 ret void 111} 112 113; Ensure that we don't create a single interleaved group for the two stores. 114; The second store is conditional and we can't sink the first store inside the 115; predicated block. The load group is vectorized, and the store groups contain 116; gaps and are not vectorized. 117; 118; CHECK-LABEL: @interleaved_with_cond_store_2( 119; 120; CHECK: min.iters.checked 121; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 122; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 123; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf 124; CHECK: %n.vec = sub i64 %[[N]], %[[R]] 125; 126; CHECK: vector.body: 127; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} 128; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2> 129; CHECK: store i64 %x, {{.*}} 130; CHECK: store i64 %x, {{.*}} 131; 132; CHECK: pred.store.if 133; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0 134; CHECK: store i64 %[[X1]], {{.*}} 135; 136; CHECK: pred.store.if 137; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2 138; CHECK: store i64 %[[X2]], {{.*}} 139 140define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) { 141entry: 142 br label %for.body 143 144for.body: 145 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 146 %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 147 %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 148 %0 = load i64, i64* %p.1, align 8 149 store i64 %x, i64* %p.0, align 8 150 %1 = icmp eq i64 %0, %x 151 br i1 %1, label %if.then, label %if.merge 152 153if.then: 154 store i64 %0, i64* %p.1, align 8 155 br label %if.merge 156 157if.merge: 158 %i.next = add nuw nsw i64 %i, 1 159 %cond = icmp slt i64 %i.next, %n 160 br i1 %cond, label %for.body, label %for.end 161 162for.end: 163 ret void 164} 165