1; REQUIRES: asserts 2; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_UNMASKED 3; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_MASKED 4 5target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" 6 7; We test here that the loop-vectorizer forms an interleave-groups from 8; predicated memory accesses only if they are both in the same (predicated) 9; block (first scenario below). 10; If the accesses are not in the same predicated block, an interleave-group 11; is not formed (scenarios 2,3 below). 12 13; Scenario 1: Check the case where it is legal to create masked interleave- 14; groups. Altogether two groups are created (one for loads and one for stores) 15; when masked-interleaved-acceses are enabled. When masked-interleaved-acceses 16; are disabled we do not create any interleave-group. 17; 18; void masked_strided1(const unsigned char* restrict p, 19; unsigned char* restrict q, 20; unsigned char guard) { 21; for(ix=0; ix < 1024; ++ix) { 22; if (ix > guard) { 23; char left = p[2*ix]; 24; char right = p[2*ix + 1]; 25; char max = max(left, right); 26; q[2*ix] = max; 27; q[2*ix+1] = 0 - max; 28; } 29; } 30;} 31 32 33; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided1" 34; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... 35; STRIDED_UNMASKED-NOT: LV: Creating an interleave group 36 37; STRIDED_MASKED: LV: Checking a loop in "masked_strided1" 38; STRIDED_MASKED: LV: Analyzing interleaved accesses... 39; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 %{{.*}}, i8* %{{.*}}, align 1 40; STRIDED_MASKED-NEXT: LV: Inserted: store i8 %{{.*}}, i8* %{{.*}}, align 1 41; STRIDED_MASKED-NEXT: into the interleave group with store i8 %{{.*}}, i8* %{{.*}}, align 1 42; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: %{{.*}} = load i8, i8* %{{.*}}, align 1 43; STRIDED_MASKED-NEXT: LV: Inserted: %{{.*}} = load i8, i8* %{{.*}}, align 1 44; STRIDED_MASKED-NEXT: into the interleave group with %{{.*}} = load i8, i8* %{{.*}}, align 1 45 46; Scenario 2: Check the case where it is illegal to create a masked interleave- 47; group because the first access is predicated, and the second isn't. 48; We therefore create a separate interleave-group with gaps for each of the 49; stores (if masked-interleaved-accesses are enabled) and these are later 50; invalidated because interleave-groups of stores with gaps are not supported. 51; If masked-interleaved-accesses is not enabled we create only one interleave 52; group of stores (for the non-predicated store) and it is later invalidated 53; due to gaps. 54; 55; void masked_strided2(const unsigned char* restrict p, 56; unsigned char* restrict q, 57; unsigned char guard1, 58; unsigned char guard2) { 59; for(ix=0; ix < 1024; ++ix) { 60; if (ix > guard1) { 61; q[2*ix] = 1; 62; } 63; q[2*ix+1] = 2; 64; } 65;} 66 67; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided2" 68; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... 69; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with: store i8 1, i8* %{{.*}}, align 1 70; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps. 71; STRIDED_UNMASKED-NOT: LV: Creating an interleave group 72 73; STRIDED_MASKED: LV: Checking a loop in "masked_strided2" 74; STRIDED_MASKED: LV: Analyzing interleaved accesses... 75; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, i8* %{{.*}}, align 1 76; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, i8* %{{.*}}, align 1 77; STRIDED_MASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps. 78; STRIDED_MASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps. 79 80 81; Scenario 3: Check the case where it is illegal to create a masked interleave- 82; group because the two accesses are in separate predicated blocks. 83; We therefore create a separate interleave-group with gaps for each of the accesses, 84; (which are later invalidated because interleave-groups of stores with gaps are 85; not supported). 86; If masked-interleaved-accesses is not enabled we don't create any interleave 87; group because all accesses are predicated. 88; 89; void masked_strided3(const unsigned char* restrict p, 90; unsigned char* restrict q, 91; unsigned char guard1, 92; unsigned char guard2) { 93; for(ix=0; ix < 1024; ++ix) { 94; if (ix > guard1) { 95; q[2*ix] = 1; 96; } 97; if (ix > guard2) { 98; q[2*ix+1] = 2; 99; } 100; } 101;} 102 103 104; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided3" 105; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... 106; STRIDED_UNMASKED-NOT: LV: Creating an interleave group 107 108; STRIDED_MASKED: LV: Checking a loop in "masked_strided3" 109; STRIDED_MASKED: LV: Analyzing interleaved accesses... 110; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, i8* %{{.*}}, align 1 111; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, i8* %{{.*}}, align 1 112; STRIDED_MASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps. 113; STRIDED_MASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps. 114 115 116; ModuleID = 'test.c' 117source_filename = "test.c" 118target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" 119target triple = "i386-unknown-linux-gnu" 120 121define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 { 122entry: 123 %conv = zext i8 %guard to i32 124 br label %for.body 125 126for.body: 127 %ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 128 %cmp1 = icmp ugt i32 %ix.024, %conv 129 br i1 %cmp1, label %if.then, label %for.inc 130 131if.then: 132 %mul = shl nuw nsw i32 %ix.024, 1 133 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 134 %0 = load i8, i8* %arrayidx, align 1 135 %add = or i32 %mul, 1 136 %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add 137 %1 = load i8, i8* %arrayidx4, align 1 138 %cmp.i = icmp slt i8 %0, %1 139 %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0 140 %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul 141 store i8 %spec.select.i, i8* %arrayidx6, align 1 142 %sub = sub i8 0, %spec.select.i 143 %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add 144 store i8 %sub, i8* %arrayidx11, align 1 145 br label %for.inc 146 147for.inc: 148 %inc = add nuw nsw i32 %ix.024, 1 149 %exitcond = icmp eq i32 %inc, 1024 150 br i1 %exitcond, label %for.end, label %for.body 151 152for.end: 153 ret void 154} 155 156 157define dso_local void @masked_strided2(i8* noalias nocapture readnone %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 { 158entry: 159 %conv = zext i8 %guard to i32 160 br label %for.body 161 162for.body: 163 %ix.012 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 164 %mul = shl nuw nsw i32 %ix.012, 1 165 %arrayidx = getelementptr inbounds i8, i8* %q, i32 %mul 166 store i8 1, i8* %arrayidx, align 1 167 %cmp1 = icmp ugt i32 %ix.012, %conv 168 br i1 %cmp1, label %if.then, label %for.inc 169 170if.then: 171 %add = or i32 %mul, 1 172 %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %add 173 store i8 2, i8* %arrayidx3, align 1 174 br label %for.inc 175 176for.inc: 177 %inc = add nuw nsw i32 %ix.012, 1 178 %exitcond = icmp eq i32 %inc, 1024 179 br i1 %exitcond, label %for.end, label %for.body 180 181for.end: 182 ret void 183} 184 185 186define dso_local void @masked_strided3(i8* noalias nocapture readnone %p, i8* noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 { 187entry: 188 %conv = zext i8 %guard1 to i32 189 %conv3 = zext i8 %guard2 to i32 190 br label %for.body 191 192for.body: 193 %ix.018 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 194 %mul = shl nuw nsw i32 %ix.018, 1 195 %cmp1 = icmp ugt i32 %ix.018, %conv 196 br i1 %cmp1, label %if.then, label %if.end 197 198if.then: 199 %arrayidx = getelementptr inbounds i8, i8* %q, i32 %mul 200 store i8 1, i8* %arrayidx, align 1 201 br label %if.end 202 203if.end: 204 %cmp4 = icmp ugt i32 %ix.018, %conv3 205 br i1 %cmp4, label %if.then6, label %for.inc 206 207if.then6: 208 %add = or i32 %mul, 1 209 %arrayidx7 = getelementptr inbounds i8, i8* %q, i32 %add 210 store i8 2, i8* %arrayidx7, align 1 211 br label %for.inc 212 213for.inc: 214 %inc = add nuw nsw i32 %ix.018, 1 215 %exitcond = icmp eq i32 %inc, 1024 216 br i1 %exitcond, label %for.end, label %for.body 217 218for.end: 219 ret void 220} 221 222attributes #0 = { "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" } 223