1; REQUIRES: asserts
2; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_UNMASKED
3; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_MASKED
4
5target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
6
7; We test here that the loop-vectorizer forms an interleave-groups from
8; predicated memory accesses only if they are both in the same (predicated)
9; block (first scenario below).
10; If the accesses are not in the same predicated block, an interleave-group
11; is not formed (scenarios 2,3 below).
12
13; Scenario 1: Check the case where it is legal to create masked interleave-
14; groups. Altogether two groups are created (one for loads and one for stores)
15; when masked-interleaved-acceses are enabled. When masked-interleaved-acceses
16; are disabled we do not create any interleave-group.
17;
18; void masked_strided1(const unsigned char* restrict p,
19;                     unsigned char* restrict q,
20;                     unsigned char guard) {
21; for(ix=0; ix < 1024; ++ix) {
22;     if (ix > guard) {
23;         char left = p[2*ix];
24;         char right = p[2*ix + 1];
25;         char max = max(left, right);
26;         q[2*ix] = max;
27;         q[2*ix+1] = 0 - max;
28;     }
29; }
30;}
31
32
33; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided1"
34; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
35; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
36
37; STRIDED_MASKED: LV: Checking a loop in "masked_strided1"
38; STRIDED_MASKED: LV: Analyzing interleaved accesses...
39; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 %{{.*}}, i8* %{{.*}}, align 1
40; STRIDED_MASKED-NEXT: LV: Inserted:  store i8  %{{.*}}, i8* %{{.*}}, align 1
41; STRIDED_MASKED-NEXT:     into the interleave group with  store i8 %{{.*}}, i8* %{{.*}}, align 1
42; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:   %{{.*}} = load i8, i8* %{{.*}}, align 1
43; STRIDED_MASKED-NEXT: LV: Inserted:  %{{.*}} = load i8, i8* %{{.*}}, align 1
44; STRIDED_MASKED-NEXT:     into the interleave group with   %{{.*}} = load i8, i8* %{{.*}}, align 1
45
46; Scenario 2: Check the case where it is illegal to create a masked interleave-
47; group because the first access is predicated, and the second isn't.
48; We therefore create a separate interleave-group with gaps for each of the
49; stores (if masked-interleaved-accesses are enabled) and these are later
50; invalidated because interleave-groups of stores with gaps are not supported.
51; If masked-interleaved-accesses is not enabled we create only one interleave
52; group of stores (for the non-predicated store) and it is later invalidated
53; due to gaps.
54;
55; void masked_strided2(const unsigned char* restrict p,
56;                     unsigned char* restrict q,
57;                     unsigned char guard1,
58;                     unsigned char guard2) {
59; for(ix=0; ix < 1024; ++ix) {
60;     if (ix > guard1) {
61;         q[2*ix] = 1;
62;     }
63;     q[2*ix+1] = 2;
64; }
65;}
66
67; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided2"
68; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
69; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
70; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
71; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
72
73; STRIDED_MASKED: LV: Checking a loop in "masked_strided2"
74; STRIDED_MASKED: LV: Analyzing interleaved accesses...
75; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, i8* %{{.*}}, align 1
76; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
77; STRIDED_MASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
78; STRIDED_MASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
79
80
81; Scenario 3: Check the case where it is illegal to create a masked interleave-
82; group because the two accesses are in separate predicated blocks.
83; We therefore create a separate interleave-group with gaps for each of the accesses,
84; (which are later invalidated because interleave-groups of stores with gaps are
85; not supported).
86; If masked-interleaved-accesses is not enabled we don't create any interleave
87; group because all accesses are predicated.
88;
89; void masked_strided3(const unsigned char* restrict p,
90;                     unsigned char* restrict q,
91;                     unsigned char guard1,
92;                     unsigned char guard2) {
93; for(ix=0; ix < 1024; ++ix) {
94;     if (ix > guard1) {
95;         q[2*ix] = 1;
96;     }
97;     if (ix > guard2) {
98;         q[2*ix+1] = 2;
99;     }
100; }
101;}
102
103
104; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided3"
105; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
106; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
107
108; STRIDED_MASKED: LV: Checking a loop in "masked_strided3"
109; STRIDED_MASKED: LV: Analyzing interleaved accesses...
110; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, i8* %{{.*}}, align 1
111; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
112; STRIDED_MASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
113; STRIDED_MASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
114
115
116; ModuleID = 'test.c'
117source_filename = "test.c"
118target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
119target triple = "i386-unknown-linux-gnu"
120
121define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
122entry:
123  %conv = zext i8 %guard to i32
124  br label %for.body
125
126for.body:
127  %ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
128  %cmp1 = icmp ugt i32 %ix.024, %conv
129  br i1 %cmp1, label %if.then, label %for.inc
130
131if.then:
132  %mul = shl nuw nsw i32 %ix.024, 1
133  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
134  %0 = load i8, i8* %arrayidx, align 1
135  %add = or i32 %mul, 1
136  %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add
137  %1 = load i8, i8* %arrayidx4, align 1
138  %cmp.i = icmp slt i8 %0, %1
139  %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
140  %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul
141  store i8 %spec.select.i, i8* %arrayidx6, align 1
142  %sub = sub i8 0, %spec.select.i
143  %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add
144  store i8 %sub, i8* %arrayidx11, align 1
145  br label %for.inc
146
147for.inc:
148  %inc = add nuw nsw i32 %ix.024, 1
149  %exitcond = icmp eq i32 %inc, 1024
150  br i1 %exitcond, label %for.end, label %for.body
151
152for.end:
153  ret void
154}
155
156
157define dso_local void @masked_strided2(i8* noalias nocapture readnone %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
158entry:
159  %conv = zext i8 %guard to i32
160  br label %for.body
161
162for.body:
163  %ix.012 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
164  %mul = shl nuw nsw i32 %ix.012, 1
165  %arrayidx = getelementptr inbounds i8, i8* %q, i32 %mul
166  store i8 1, i8* %arrayidx, align 1
167  %cmp1 = icmp ugt i32 %ix.012, %conv
168  br i1 %cmp1, label %if.then, label %for.inc
169
170if.then:
171  %add = or i32 %mul, 1
172  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %add
173  store i8 2, i8* %arrayidx3, align 1
174  br label %for.inc
175
176for.inc:
177  %inc = add nuw nsw i32 %ix.012, 1
178  %exitcond = icmp eq i32 %inc, 1024
179  br i1 %exitcond, label %for.end, label %for.body
180
181for.end:
182  ret void
183}
184
185
186define dso_local void @masked_strided3(i8* noalias nocapture readnone %p, i8* noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 {
187entry:
188  %conv = zext i8 %guard1 to i32
189  %conv3 = zext i8 %guard2 to i32
190  br label %for.body
191
192for.body:
193  %ix.018 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
194  %mul = shl nuw nsw i32 %ix.018, 1
195  %cmp1 = icmp ugt i32 %ix.018, %conv
196  br i1 %cmp1, label %if.then, label %if.end
197
198if.then:
199  %arrayidx = getelementptr inbounds i8, i8* %q, i32 %mul
200  store i8 1, i8* %arrayidx, align 1
201  br label %if.end
202
203if.end:
204  %cmp4 = icmp ugt i32 %ix.018, %conv3
205  br i1 %cmp4, label %if.then6, label %for.inc
206
207if.then6:
208  %add = or i32 %mul, 1
209  %arrayidx7 = getelementptr inbounds i8, i8* %q, i32 %add
210  store i8 2, i8* %arrayidx7, align 1
211  br label %for.inc
212
213for.inc:
214  %inc = add nuw nsw i32 %ix.018, 1
215  %exitcond = icmp eq i32 %inc, 1024
216  br i1 %exitcond, label %for.end, label %for.body
217
218for.end:
219  ret void
220}
221
222attributes #0 = {  "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"  }
223